src/hg/txGene/txGeneAddOldUnmapped/txGeneAddOldUnmapped.c 1.1
1.1 2009/10/03 01:28:43 kent
Utility to add in genes that couldn't be mapped from previous assembly to oldToNew table seems to work.
Index: src/hg/txGene/txGeneAddOldUnmapped/txGeneAddOldUnmapped.c
===================================================================
RCS file: src/hg/txGene/txGeneAddOldUnmapped/txGeneAddOldUnmapped.c
diff -N src/hg/txGene/txGeneAddOldUnmapped/txGeneAddOldUnmapped.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/txGene/txGeneAddOldUnmapped/txGeneAddOldUnmapped.c 3 Oct 2009 01:28:43 -0000 1.1
@@ -0,0 +1,67 @@
+/* txGeneAddOldUnmapped - Add information about genes that didn't map from old assembly to oldToNew table.. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+
+static char const rcsid[] = "$Id$";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+ "txGeneAddOldUnmapped - Add information about genes that didn't map from old assembly to oldToNew table.\n"
+ "usage:\n"
+ " txGeneAddOldUnmapped oldToNewMapped.tab old.unmapped oldToNew.tab\n"
+ "options:\n"
+ " -xxx=XXX\n"
+ );
+}
+
+static struct optionSpec options[] = {
+ {NULL, 0},
+};
+
+void txGeneAddOldUnmapped(char *mappedFile, char *unmappedFile, char *outputFile)
+/* txGeneAddOldUnmapped - Add information about genes that didn't map from old assembly to oldToNew table.. */
+{
+struct lineFile *mappedLf = lineFileOpen(mappedFile, TRUE);
+struct lineFile *unmappedLf = lineFileOpen(unmappedFile, TRUE);
+FILE *f = mustOpen(outputFile, "w");
+
+/* Read through the mapped ones and save to output unchanged, but keep hash of all the accessions
+ * we've seen to double check mapped/unmapped go together. */
+struct hash *accHash = hashNew(0);
+ {
+ char *row[4];
+ while (lineFileRowTab(mappedLf, row))
+ {
+ char *acc = row[1];
+ hashAdd(accHash, acc, NULL);
+ fprintf(f, "%s\t%s\t%s\t%s\n", row[0], row[1], row[2], row[3]);
+ }
+ }
+
+/* Read through bed file for unmapped and save data. */
+ {
+ char *row[12];
+ while (lineFileRowTab(unmappedLf, row))
+ {
+ char *acc = row[3];
+ if (hashLookup(accHash, acc))
+ errAbort("%s is in both %s and %s\n", acc, mappedFile, unmappedFile);
+ fprintf(f, "\t%s\t\t%s\n", acc, "unmapped");
+ }
+ }
+carefulClose(&f);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 4)
+ usage();
+txGeneAddOldUnmapped(argv[1], argv[2], argv[3]);
+return 0;
+}