804466264c86aa3d9edc8edcfe1039a2f5392b20
braney
  Mon Jul 29 16:12:54 2019 -0700
a utility to print out a bed file with regions that have no alignment in
a maf file

diff --git src/hg/mouseStuff/mafNoAlign/mafNoAlign.c src/hg/mouseStuff/mafNoAlign/mafNoAlign.c
new file mode 100644
index 0000000..a5437c0
--- /dev/null
+++ src/hg/mouseStuff/mafNoAlign/mafNoAlign.c
@@ -0,0 +1,100 @@
+/* mafNoAlign - output a bed where there are no other sequences mapping to the reference sequence */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "maf.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "mafNoAlign - output a bed where there are no other sequences mapping to the reference sequence\n"
+  "NOTE: code expects first sequence in blocks to be the reference and to be on the '+' strand\n"
+  "      and the sequence name to be in the form species.chromosome.\n"
+  "usage:\n"
+  "   mafNoAlign in.maf out.bed\n"
+  );
+}
+
+/* Command line option specifications */
+static struct optionSpec optionSpecs[] = {
+    {NULL, 0}
+};
+
+void mafNoAlign(char *in, char *out)
+/* Output a bed where there are no other sequences mapping to the reference sequence. */
+{
+struct mafFile *mf = mafOpen(in);
+FILE *f = mustOpen(out, "w");
+struct mafAli *ma = NULL;
+int prevChromSize = 0;
+char *prevChrom = NULL;
+int prevEnd = 0;
+
+while ((ma = mafNext(mf)) != NULL)  // for each block in the maf
+    {
+    struct mafComp *mc;
+
+    // ignore empty blocks
+    for (mc = ma->components->next;  mc != NULL;  mc = mc->next)
+        {
+        if (mc->size != 0)  // here's some sequence
+            break;
+        }
+    if (mc == NULL)  // empty block, ignore it
+        {
+        mafAliFree(&ma);
+        continue;
+        }
+
+    // get chrom of reference sequence
+    struct mafComp *mcRef = ma->components;
+    char *chrom = strchr(mcRef->src, '.');
+    if (chrom == NULL)
+        errAbort("maf has reference not in species.chrom format on line %d\n",mf->lf->lineIx);
+    chrom++;
+
+    // if this is a new chrom, output the piece at the end of the previous chrom (if any)
+    if ((prevChrom != NULL) && differentString(chrom, prevChrom))
+        {
+        if (prevEnd != prevChromSize)
+            fprintf(f, "%s\t%d\t%d\n", prevChrom, prevEnd, prevChromSize);
+        prevChrom = NULL;
+        }
+
+    // if this is a new chrom, save some data and restart our range
+    if (prevChrom == NULL)  
+        {
+        prevChrom = cloneString(chrom);
+        prevChromSize = mcRef->srcSize;
+        prevEnd = 0;
+        }
+
+    // if this block starts after the end of the previous block, it's a gap
+    int start = mcRef->start;
+    if (prevEnd != start)
+        fprintf(f, "%s\t%d\t%d\n", prevChrom, prevEnd, start);
+
+    // remember the end to this block
+    prevEnd = start + ma->components->size;
+
+    mafAliFree(&ma);
+    }
+
+if ((prevChrom != NULL) && (prevEnd != prevChromSize))
+    fprintf(f, "%s\t%d\t%d\n", prevChrom, prevEnd, prevChromSize);
+
+mafFileFree(&mf);
+fclose(f);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, optionSpecs);
+if (argc != 3)
+    usage();
+mafNoAlign(argv[1], argv[2]);
+return 0;
+}