af3fb358fdeee7bf9080bdee719eb0cc1d4b4c72
hiram
  Sat Nov 22 15:32:57 2025 -0800
allow input to be a bigBed file refs #36672

diff --git src/hg/utils/twoBitMask/twoBitMask.c src/hg/utils/twoBitMask/twoBitMask.c
index eb201da6a81..c78b57a69b7 100644
--- src/hg/utils/twoBitMask/twoBitMask.c
+++ src/hg/utils/twoBitMask/twoBitMask.c
@@ -11,48 +11,48 @@
 #include "memalloc.h"
 #include "repMask.h"
 #include "twoBit.h"
 #include "bed.h"
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "twoBitMask - apply masking to a .2bit file, creating a new .2bit file\n"
   "usage:\n"
   "   twoBitMask input.2bit maskFile output.2bit\n"
   "options:\n"
   "   -add   Don't remove pre-existing masking before applying maskFile.\n"
-  "   -type=.XXX   Type of maskFile is XXX (bed or out).\n"
-  "maskFile can be a RepeatMasker .out file or a .bed file.  It must not\n"
-  "contain rows for sequences which are not in input.2bit.\n"
+  "   -type=.XXX   Type of maskFile is XXX (bed or bb or out).\n"
+  "maskFile can be a RepeatMasker .out file, a bigBed .bb file, or a .bed file.\n"
+  "It must not contain rows for sequences which are not in input.2bit.\n"
 );
 }
 
 /* Options: */
 boolean add = FALSE;
 char *type = NULL;
 
 static struct optionSpec options[] = {
    {"add", OPTION_BOOLEAN},
    {"type", OPTION_STRING},
    {NULL, 0},
 };
 
 
-unsigned slurpInput(char *inName, struct hash *tbHash,
+static unsigned slurpInput(char *inName, struct hash *tbHash,
 			  struct hash *bitmapHash, struct twoBit **list)
 /* Read .2bit file inName into memory and return list of twoBit items.
  * Populate tbHash with twoBit items, and bitmapHash with bitmaps for
  * easy masking.  Both are hashed by twoBit sequence name. */
 {
 struct twoBit *twoBitList = NULL;
 struct twoBit *twoBit = NULL;
 struct twoBitFile *tbf = twoBitOpen(inName);
 int version = tbf->version;
 *list =  twoBitList = twoBitFromOpenFile(tbf);
 /* Free and clear the masking data (unless -add).  Hash twoBits by name. */
 for (twoBit = twoBitList;  twoBit != NULL;  twoBit = twoBit->next)
     {
     Bits *bits = bitAlloc(twoBit->size);
     if (add)
@@ -64,54 +64,54 @@
 	    bitSetRange(bits, twoBit->maskStarts[i], twoBit->maskSizes[i]);
 	    }
 	}
     /* Free the current representation of masking -- it will be replaced. */
     twoBit->maskBlockCount = 0;
     freez(&(twoBit->maskStarts));
     freez(&(twoBit->maskSizes));
     /* Hash twoBit and our new bitmap by sequence name. */
     hashAddUnique(tbHash, twoBit->name, twoBit);
     hashAddUnique(bitmapHash, twoBit->name, bits);
     }
 return version;
 }
 
 
-void addMasking(struct hash *twoBitHash, struct hash *bitmapHash, char *seqName,
+static void addMasking(struct hash *twoBitHash, struct hash *bitmapHash, char *seqName,
 		unsigned start, unsigned end)
 /* Set bits in range. */
 {
 if (end > start)
     {
     struct twoBit *tb = (struct twoBit *)hashMustFindVal(twoBitHash, seqName);
     if ((end > tb->size) || (start >= tb->size))
 	errAbort("bed range (%d - %d) is off the end of chromosome %s size %d",
 	    start, end, seqName, tb->size);
     Bits *bits = (Bits *)hashMustFindVal(bitmapHash, seqName);
     bitSetRange(bits, start, (end - start));
     }
 }
 
 
 struct unsignedRange
     {
     struct unsignedRange *next;
     unsigned start;
     unsigned size;
     };
 
-void bitmapToMaskArray(struct hash *bitmapHash, struct hash *tbHash)
+static void bitmapToMaskArray(struct hash *bitmapHash, struct hash *tbHash)
 /* Translate each bitmap in bitmapHash into an array of mask coordinates
  * in the corresponding twoBit in tbHash.  Assume tbHash's mask array is
  * empty at the start -- we allocate it here.  Free bitmap when done. */
 {
 struct hashCookie cookie = hashFirst(tbHash);
 struct hashEl *hel = NULL;
 
 while ((hel = hashNext(&cookie)) != NULL)
     {
     char *seqName = hel->name;
     struct twoBit *tb = (struct twoBit *)(hel->val);
     struct hashEl *bHel = hashLookup(bitmapHash, seqName);
     Bits *bits;
     unsigned start=0, end=0;
 
@@ -148,57 +148,77 @@
 	for (i = 0, range = rangeList;  range != NULL;
 	     i++, range = range->next)
 	    {
 	    tb->maskStarts[i] = range->start;
 	    tb->maskSizes[i] = range->size;
 	    }
 	    }
 	lmCleanup(&lm);
 	bitFree(&bits);
 	bHel->val = NULL;
 	}
     }
 }
 
 
-void maskWithBed(char *bedName, struct hash *tbHash, struct hash *bitmapHash)
+static void maskWithBigBed(char *bbName, struct hash *tbHash, struct hash *bitmapHash)
+/* Read coordinates from bbName and apply them to twoBits in tbHash. */
+{
+struct bbiFile *bbi = bigBedFileOpen(bbName);
+struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
+for (chrom = chromList; chrom != NULL; chrom = chrom->next)
+    {
+    struct lm *lm = lmInit(0);
+    struct bigBedInterval *list = bigBedIntervalQuery(bbi,chrom->name,0,chrom->size,0,lm);
+    struct bigBedInterval *el;
+    for (el = list; el != NULL; el = el->next)
+	{
+        addMasking(tbHash, bitmapHash, chrom->name, el->start, el->end);
+	}
+    lmCleanup(&lm);
+    }
+bigBedFileClose(&bbi);
+bitmapToMaskArray(bitmapHash, tbHash);
+}
+
+static void maskWithBed(char *bedName, struct hash *tbHash, struct hash *bitmapHash)
 /* Read coordinates from bedName and apply them to twoBits in tbHash. */
 {
 struct lineFile *lf = lineFileOpen(bedName, TRUE);
 int wordCount;
 char *words[13];
 boolean alreadyWarned = FALSE;
 while ((wordCount = lineFileChop(lf, words)) != 0)
     {
     struct bed bed;
     /* warn if bed has at least 12 fields -- no support for blocks */
     if (wordCount >= 12 && !alreadyWarned)
 	{
 	warn("Warning: BED file %s has >=%d fields which means it might "
 	     "contain block coordinates, but this program uses only the "
 	     "first three fields (the entire span -- no support for blocks).",
 	     bedName, wordCount);
 	alreadyWarned = TRUE;
 	}
     bedStaticLoad(words, &bed);
     addMasking(tbHash, bitmapHash, bed.chrom, bed.chromStart, bed.chromEnd);
     }
 bitmapToMaskArray(bitmapHash, tbHash);
 }
 
 
-void maskWithOut(char *outName, struct hash *tbHash, struct hash *bitmapHash)
+static void maskWithOut(char *outName, struct hash *tbHash, struct hash *bitmapHash)
 /* Read coordinates from outName and apply them to twoBits in tbHash. */
 {
 struct lineFile *lf = lineFileOpen(outName, TRUE);
 char *line;
 int lineSize;
 
 /* Make sure we have a .out header. */
 if (!lineFileNext(lf, &line, &lineSize))
     errAbort("Empty %s", lf->fileName);
 if (!startsWith("   SW  perc perc", line))
     {
     if (!startsWith("   SW   perc perc", line))
 	errAbort("%s doesn't seem to be a RepeatMasker .out file, first "
 	    "line seen:\n%s", lf->fileName, line);
     }
@@ -238,30 +258,32 @@
 FILE *f = NULL;
 
 if (! twoBitIsFile(inName))
     {
     if (twoBitIsSpec(inName))
 	errAbort("Sorry, this works only on whole .2bit files, not specs.");
     else
 	errAbort("Input %s does not look like a proper .2bit file.", inName);
     }
 
 unsigned version = slurpInput(inName, tbHash, bitmapHash, &twoBitList);
 
 /* Read mask data into bitmapHash, store it in twoBits: */
 if ((type && endsWith(type, "bed")) || endsWith(maskName, ".bed"))
     maskWithBed(maskName, tbHash, bitmapHash);
+else if ((type && endsWith(type, "bb")) || endsWith(maskName, ".bb"))
+    maskWithBigBed(maskName, tbHash, bitmapHash);
 else if ((type && endsWith(type, "out")) || endsWith(maskName, ".out"))
     maskWithOut(maskName, tbHash, bitmapHash);
 else
     errAbort("Sorry, maskFile must end in \".bed\" or \".out\".");
 
 /* Create a new .2bit file, write it out from twoBits. */
 f = mustOpen(outName, "wb");
 twoBitWriteHeaderExt(twoBitList, f, version == 1);
 for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
     {
     twoBitWriteOne(twoBit, f);
     }
 carefulClose(&f);
 
 /* Don't bother freeing twoBitList and hashes here -- just exit. */