0077bc3d653ba288e13b6ad75857ed5d6c4b51cd
markd
  Sun Apr 2 10:45:31 2023 -0700
decode bigMaf in chunks so as not to use absurd about of memory

diff --git src/hg/utils/bigMafToMaf/bigMafToMaf.c src/hg/utils/bigMafToMaf/bigMafToMaf.c
index a36ecc0..1926cab 100644
--- src/hg/utils/bigMafToMaf/bigMafToMaf.c
+++ src/hg/utils/bigMafToMaf/bigMafToMaf.c
@@ -1,66 +1,91 @@
 /* bigMafToMaf - convert bigMaf to maf file. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "bigBed.h"
 #include "options.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bigMafToMaf - convert bigMaf to maf file\n"
   "usage:\n"
   "   bigMafToMaf bigMaf.bb file.maf\n"
   "options:\n"
-  "   -xxx=XXX\n"
   );
 }
 
+/* chunking keeps memory down */
+static int chunkSizeBases = 1048576;
+
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {NULL, 0},
 };
 
-void bigMafToMaf(char *bigBed, char *mafFile)
-/* bigMafToMaf - convert bigMaf to maf file. */
-{
-struct bbiFile *bbi = bigBedFileOpen(bigBed);
-FILE *f = mustOpen(mafFile, "w");
-fprintf(f, "##maf version=1\n");
-struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
-for (chrom = chromList; chrom != NULL; chrom = chrom->next)
+static void writeMafAli(char *ptr, FILE *f)
+/* output one block, change ';'  back to newline */
 {
-    int start = 0, end = chrom->size;
-    int itemsLeft = 0;
-    char *chromName = chrom->name;
-
-    struct lm *lm = lmInit(0);
-    struct bigBedInterval  *bbList = bigBedIntervalQuery(bbi, chromName,
-            start, end, itemsLeft, lm);
-    
-    for(; bbList; bbList = bbList->next)
-        {
-        char *ptr = bbList->rest;
-
 for(; *ptr; ptr++)
+    {
     if (*ptr == ';')
         fputc('\n', f);
     else
         fputc(*ptr, f);
+    }
 fputc('\n', f);
+
 }
 
+static void processChromChunk(struct bbiFile *bbi, struct bbiChromInfo *chrom,
+                              int start, int end, FILE *f)
+/* Output one chunk.  Only blocks where start is in the range will be written
+ * to avoid outputting a block multiple tines.  */
+{
+struct lm *lm = lmInit(0);
+struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chrom->name,
+                                                    start, end, 0, lm);
+for(; bbList; bbList = bbList->next)
+    {
+    if ((start <= bbList->start) && (bbList->start < end))
+        writeMafAli(bbList->rest, f);
+    }
 lmCleanup(&lm);
 }
+
+static void processChrom(struct bbiFile *bbi, struct bbiChromInfo *chrom,
+                         FILE *f)
+/* output MAF blocks from one chrom */
+{
+int start = 0;
+while (start < chrom->size)
+    {
+    int end = min(start + chunkSizeBases, chrom->size);
+    processChromChunk(bbi, chrom, start, end, f);
+    start = end;
+    }
+}
+
+static void bigMafToMaf(char *bigBed, char *mafFile)
+/* bigMafToMaf - convert bigMaf to maf file. */
+{
+struct bbiFile *bbi = bigBedFileOpen(bigBed);
+FILE *f = mustOpen(mafFile, "w");
+fprintf(f, "##maf version=1\n");
+struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
+for (chrom = chromList; chrom != NULL; chrom = chrom->next)
+    {
+    processChrom(bbi, chrom, f);
+    }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 bigMafToMaf(argv[1], argv[2]);
 return 0;
 }