54406b80d5d435970989acff7b22dd6146c6b411
braney
  Sat Jan 22 15:24:59 2022 -0800
adding chrom alias support to big files

diff --git src/lib/bbiRead.c src/lib/bbiRead.c
index f517a34..2c7fadc 100644
--- src/lib/bbiRead.c
+++ src/lib/bbiRead.c
@@ -65,41 +65,42 @@
 if (isSwapped)
     {
     magic = byteSwap32(magic);
     if (magic != sig)
         return FALSE;
     }
 else
     {
     if (magic != sig)
         return FALSE;
     }
 
 return TRUE;
 }
 
-struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName)
-/* Open up big wig or big bed file. */
+struct bbiFile *bbiFileOpenAlias(char *fileName, bits32 sig, char *typeName, struct hash *aliasHash)
+/* Open up big wig or big bed file using a chrom alias hash if non-NULL */
 {
 /* This code needs to agree with code in two other places currently - bigBedFileCreate,
  * and bigWigFileCreate.  I'm thinking of refactoring to share at least between
  * bigBedFileCreate and bigWigFileCreate.  It'd be great so it could be structured
  * so that it could send the input in one chromosome at a time, and send in the zoom
  * stuff only after all the chromosomes are done.  This'd potentially reduce the memory
  * footprint by a factor of 2 or 4.  Still, for now it works. -JK */
 struct bbiFile *bbi;
 AllocVar(bbi);
+bbi->aliasHash = aliasHash;
 bbi->fileName = cloneString(fileName);
 struct udcFile *udc = bbi->udc = udcFileOpen(fileName, udcDefaultDir());
 
 /* Read magic number at head of file and use it to see if we are proper file type, and
  * see if we are byte-swapped. */
 bits32 magic;
 boolean isSwapped = FALSE;
 udcMustRead(udc, &magic, sizeof(magic));
 if (magic != sig)
     {
     magic = byteSwap32(magic);
     isSwapped = TRUE;
     if (magic != sig)
        errAbort("%s is not a %s file", fileName, typeName);
     }
@@ -138,72 +139,110 @@
 if (bbi->extensionOffset != 0)
     {
     udcSeek(udc, bbi->extensionOffset);
     bbi->extensionSize = udcReadBits16(udc, isSwapped);
     bbi->extraIndexCount = udcReadBits16(udc, isSwapped);
     bbi->extraIndexListOffset = udcReadBits64(udc, isSwapped);
     }
 
 /* Attach B+ tree of chromosome names and ids. */
 udcSeek(udc, bbi->chromTreeOffset);
 bbi->chromBpt =  bptFileAttach(fileName, udc);
 
 return bbi;
 }
 
+struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName)
+/* Open up big wig or big bed file. */
+{
+return bbiFileOpenAlias(fileName, sig, typeName, NULL);
+}
+
 void bbiFileClose(struct bbiFile **pBwf)
 /* Close down a big wig/big bed file. */
 {
 struct bbiFile *bwf = *pBwf;
 if (bwf != NULL)
     {
     cirTreeFileDetach(&bwf->unzoomedCir);
     slFreeList(&bwf->levelList);
     slFreeList(&bwf->levelList);
     bptFileDetach(&bwf->chromBpt);
     udcFileClose(&bwf->udc);
     freeMem(bwf->fileName);
     freez(pBwf);
     }
 }
 
 static void chromIdSizeHandleSwapped(boolean isSwapped, struct bbiChromIdSize *idSize)
 /* Swap bytes in chromosome Id and Size as needed. */
 {
 if (isSwapped)
     {
     idSize->chromId = byteSwap32(idSize->chromId);
     idSize->chromSize = byteSwap32(idSize->chromSize);
     }
 }
 
 
-struct fileOffsetSize *bbiOverlappingBlocks(struct bbiFile *bbi, struct cirTreeFile *ctf,
-	char *chrom, bits32 start, bits32 end, bits32 *retChromId)
-/* Fetch list of file blocks that contain items overlapping chromosome range. */
+static struct bbiChromIdSize *getChromIdSize(struct bbiFile *bbi, char *chrom)
+/* Return idSize for given chrom, using chrom alias if available. */
+{
+struct bbiChromIdSize *idSize;
+AllocVar(idSize);
+
+// first look for the given chrom name
+if (!bptFileFind(bbi->chromBpt, chrom, strlen(chrom), idSize, sizeof(idSize)))
     {
-struct bbiChromIdSize idSize;
-if (!bptFileFind(bbi->chromBpt, chrom, strlen(chrom), &idSize, sizeof(idSize)))
+    if (bbi->aliasHash)
+        {
+        // didn't find chrom name, but have an alias hash.  Try the aliases
+        struct hashEl *hel = hashLookup(bbi->aliasHash, chrom);
+
+        while(hel)
+            {
+            char *alias = hel->val;
+            if (bptFileFind(bbi->chromBpt, alias, strlen(alias), idSize, sizeof(idSize)))
+                break;
+
+            hel = hashLookupNext(hel);
+            }
+        }
+    else
         {
         // if chrom is not found and the chrom starts with "chr", try without "chr"
-    if (!startsWith("chr", chrom) || !bptFileFind(bbi->chromBpt, &chrom[3], strlen(chrom) - 3, &idSize, sizeof(idSize)))
+        if (!startsWith("chr", chrom) || !bptFileFind(bbi->chromBpt, &chrom[3], strlen(chrom) - 3, idSize, sizeof(idSize)))
             return NULL;
         }
-chromIdSizeHandleSwapped(bbi->isSwapped, &idSize);
+    }
+chromIdSizeHandleSwapped(bbi->isSwapped, idSize);
+
+return idSize;
+}
+
+struct fileOffsetSize *bbiOverlappingBlocks(struct bbiFile *bbi, struct cirTreeFile *ctf,
+	char *chrom, bits32 start, bits32 end, bits32 *retChromId)
+/* Fetch list of file blocks that contain items overlapping chromosome range. */
+{
+struct bbiChromIdSize *idSize = getChromIdSize(bbi, chrom);
+
+if (idSize == NULL)
+    return NULL;
+
 if (retChromId != NULL)
-    *retChromId = idSize.chromId;
-return cirTreeFindOverlappingBlocks(ctf, idSize.chromId, start, end);
+    *retChromId = idSize->chromId;
+return cirTreeFindOverlappingBlocks(ctf, idSize->chromId, start, end);
 }
 
 struct chromNameCallbackContext
 /* Some stuff that the bPlusTree traverser needs for context. */
     {
     struct bbiChromInfo *list;		/* The list we are building. */
     boolean isSwapped;			/* Need to byte-swap things? */
     };
 
 static void chromNameCallback(void *context, void *key, int keySize, void *val, int valSize)
 /* Callback that captures chromInfo from bPlusTree. */
 {
 struct chromNameCallbackContext *c = context;
 struct bbiChromInfo *info;
 struct bbiChromIdSize *idSize = val;
@@ -218,35 +257,36 @@
 
 struct bbiChromInfo *bbiChromList(struct bbiFile *bbi)
 /* Return list of chromosomes. */
 {
 struct chromNameCallbackContext context;
 context.list = NULL;
 context.isSwapped = bbi->isSwapped;
 bptFileTraverse(bbi->chromBpt, &context, chromNameCallback);
 slReverse(&context.list);
 return context.list;
 }
 
 bits32 bbiChromSize(struct bbiFile *bbi, char *chrom)
 /* Return chromosome size, or 0 if no such chromosome in file. */
 {
-struct bbiChromIdSize idSize;
-if (!bptFileFind(bbi->chromBpt, chrom, strlen(chrom), &idSize, sizeof(idSize)))
+struct bbiChromIdSize *idSize = getChromIdSize(bbi, chrom);
+
+if (idSize == NULL)
     return 0;
-chromIdSizeHandleSwapped(bbi->isSwapped, &idSize);
-return idSize.chromSize;
+
+return idSize->chromSize;
 }
 
 void bbiChromInfoFree(struct bbiChromInfo **pInfo)
 /* Free up one chromInfo */
 {
 struct bbiChromInfo *info = *pInfo;
 if (info != NULL)
     {
     freeMem(info->name);
     freez(pInfo);
     }
 }
 
 void bbiChromInfoFreeList(struct bbiChromInfo **pList)
 /* Free a list of dynamically allocated bbiChromInfo's */
@@ -491,35 +531,36 @@
 		maxVal = sum->maxVal;
 	    if (minVal > sum->minVal)
 		minVal = sum->minVal;
 	    }
 	}
 
     if (countFactor > 0)
 	validCount = normalizeCount(el, countFactor, minVal, maxVal, sumData, sumSquares);
     }
 return validCount;
 }
 
 static int bbiChromId(struct bbiFile *bbi, char *chrom)
 /* Return chromosome Id */
 {
-struct bbiChromIdSize idSize;
-if (!bptFileFind(bbi->chromBpt, chrom, strlen(chrom), &idSize, sizeof(idSize)))
+struct bbiChromIdSize *idSize = getChromIdSize(bbi, chrom);
+
+if (idSize == NULL)
     return -1;
-chromIdSizeHandleSwapped(bbi->isSwapped, &idSize);
-return idSize.chromId;
+
+return idSize->chromId;
 }
 
 static boolean bbiSummaryArrayFromZoom(struct bbiZoomLevel *zoom, struct bbiFile *bbi, 
 	char *chrom, bits32 start, bits32 end,
 	int summarySize, struct bbiSummaryElement *summary)
 /* Look up region in index and get data at given zoom level.  Summarize this data
  * in the summary array. */
 {
 boolean result = FALSE;
 int chromId = bbiChromId(bbi, chrom);
 if (chromId < 0)
     return FALSE;
 struct bbiSummary *sum, *sumList = bbiSummariesInRegion(zoom, bbi, chromId, start, end);
 if (sumList != NULL)
     {