b3c271576169b5ad8ed33758cefe608f55f61967
kent
  Wed Jul 31 18:11:08 2013 -0700
Seems to work.  Sadly it does change line length.
diff --git src/utils/faUniqify/faUniqify.c src/utils/faUniqify/faUniqify.c
new file mode 100644
index 0000000..caac6a8
--- /dev/null
+++ src/utils/faUniqify/faUniqify.c
@@ -0,0 +1,60 @@
+/* faUniqify - Remove redundant sequences from fasta.  Warn if different sequences have same name.. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "fa.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "faUniqify - Remove redundant sequences from fasta.  Warn if different sequences have same name.\n"
+  "usage:\n"
+  "   faUniqify in.fa out.fa\n"
+  "options:\n"
+  "   -verbose=0 - suppress warning messages\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+void faUniqify(char *inFile, char *outFile)
+/* faUniqify - Remove redundant sequences from fasta.  Warn if different sequences have same name.. */
+{
+struct hash *uniqHash = hashNew(0);
+struct lineFile *lf = lineFileOpen(inFile, FALSE);
+FILE *f = mustOpen(outFile, "w");
+DNA *dna;
+int size;
+char *name;
+while (faMixedSpeedReadNext(lf, &dna, &size, &name))
+    {
+    char *newDna = cloneMem(dna, size+1);
+    char *oldDna = hashFindVal(uniqHash, name);
+    if (oldDna != NULL)
+         {
+	 if (!sameString(oldDna, newDna))
+	     warn("Name %s reused for different sequences", name);
+	 }
+    else
+         {
+	 hashAdd(uniqHash, name, newDna);
+	 faWriteNext(f, name, newDna, size);
+	 }
+    }
+carefulClose(&f);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 3)
+    usage();
+faUniqify(argv[1], argv[2]);
+return 0;
+}