b622d147b7dbac52dbf3ba26928cd18e02d42bd8
braney
  Sat Feb 26 12:34:37 2022 -0800
add support for using a bigBed as the chromAlias file

diff --git src/hg/lib/chromAlias.c src/hg/lib/chromAlias.c
index 79cc541..a0d1027 100644
--- src/hg/lib/chromAlias.c
+++ src/hg/lib/chromAlias.c
@@ -1,360 +1,481 @@
 /* chromAlias.c was originally generated by the autoSql program, which also 
  * generated chromAlias.h and chromAlias.sql.  This module links the database and
  * the RAM representation of objects. */
 
+#include <pthread.h>
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "chromAlias.h"
 #include "hdb.h"
 #include "trackHub.h"
 #include "fieldedTable.h"
+#include "bigBed.h"
+#include "bPlusTree.h"
 
 
 char *chromAliasCommaSepFieldNames = "alias,chrom,source";
 
 void chromAliasStaticLoad(char **row, struct chromAlias *ret)
 /* Load a row from chromAlias table into ret.  The contents of ret will
  * be replaced at the next call to this function. */
 {
 
 ret->alias = row[0];
 ret->chrom = row[1];
 ret->source = row[2];
 }
 
 struct chromAlias *chromAliasLoad(char **row)
 /* Load a chromAlias from row fetched with select * from chromAlias
  * from database.  Dispose of this with chromAliasFree(). */
 {
 struct chromAlias *ret;
 
 AllocVar(ret);
 ret->alias = cloneString(row[0]);
 ret->chrom = cloneString(row[1]);
 ret->source = cloneString(row[2]);
 return ret;
 }
 
 struct chromAlias *chromAliasLoadAll(char *fileName) 
 /* Load all chromAlias from a whitespace-separated file.
  * Dispose of this with chromAliasFreeList(). */
 {
 struct chromAlias *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[3];
 
 while (lineFileRow(lf, row))
     {
     el = chromAliasLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct chromAlias *chromAliasLoadAllByChar(char *fileName, char chopper) 
 /* Load all chromAlias from a chopper separated file.
  * Dispose of this with chromAliasFreeList(). */
 {
 struct chromAlias *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[3];
 
 while (lineFileNextCharRow(lf, chopper, row, ArraySize(row)))
     {
     el = chromAliasLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct chromAlias *chromAliasCommaIn(char **pS, struct chromAlias *ret)
 /* Create a chromAlias out of a comma separated string. 
  * This will fill in ret if non-null, otherwise will
  * return a new chromAlias */
 {
 char *s = *pS;
 
 if (ret == NULL)
     AllocVar(ret);
 ret->alias = sqlStringComma(&s);
 ret->chrom = sqlStringComma(&s);
 ret->source = sqlStringComma(&s);
 *pS = s;
 return ret;
 }
 
 void chromAliasFree(struct chromAlias **pEl)
 /* Free a single dynamically allocated chromAlias such as created
  * with chromAliasLoad(). */
 {
 struct chromAlias *el;
 
 if ((el = *pEl) == NULL) return;
 freeMem(el->alias);
 freeMem(el->chrom);
 freeMem(el->source);
 freez(pEl);
 }
 
 void chromAliasFreeList(struct chromAlias **pList)
 /* Free a list of dynamically allocated chromAlias's */
 {
 struct chromAlias *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     chromAliasFree(&el);
     }
 *pList = NULL;
 }
 
 void chromAliasOutput(struct chromAlias *el, FILE *f, char sep, char lastSep) 
 /* Print out chromAlias.  Separate fields with sep. Follow last field with lastSep. */
 {
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->alias);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->chrom);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->source);
 if (sep == ',') fputc('"',f);
 fputc(lastSep,f);
 }
 
 void chromAliasJsonOutput(struct chromAlias *el, FILE *f) 
 /* Print out chromAlias in JSON format. */
 {
 fputc('{',f);
 fputc('"',f);
 fprintf(f,"alias");
 fputc('"',f);
 fputc(':',f);
 fputc('"',f);
 fprintf(f, "%s", el->alias);
 fputc('"',f);
 fputc(',',f);
 fputc('"',f);
 fprintf(f,"chrom");
 fputc('"',f);
 fputc(':',f);
 fputc('"',f);
 fprintf(f, "%s", el->chrom);
 fputc('"',f);
 fputc(',',f);
 fputc('"',f);
 fprintf(f,"source");
 fputc('"',f);
 fputc(':',f);
 fputc('"',f);
 fprintf(f, "%s", el->source);
 fputc('"',f);
 fputc('}',f);
 }
 
 /* -------------------------------- End autoSql Generated Code -------------------------------- */
 
 /* our "global" data */
+struct bptIndex
+{
+struct bptIndex *next;
+int fieldIx;
+struct bptFile *bpt;
+};
+
 static struct
 {
 boolean inited;
-char *database;
+boolean bptInited;
+struct bptIndex *bptList;
+struct bbiFile *bbi;
 struct hash *chromToAliasHash;
 struct hash *aliasToChromHash;
-struct hash *forwardHash;
-struct hash *reverseHash;
-} chromHashes;
-
-static boolean checkDatabase(char *database)
-/* Make sure we don't see different databases. */
-{
-if (database == NULL) 
-    return TRUE;
-
-if (chromHashes.database != NULL)
-    {
-    if (!sameString(chromHashes.database, database))
-        {
-        errAbort("chromAliasSetup: only works for one db.  %s was passed in earlier, now %s.", chromHashes.database, database);
-        return FALSE;
-        }
-    return TRUE;
-    }
-
-chromHashes.database = cloneString(database);
-return TRUE;
-}
+} chromAliasGlobals;
 
 static void readOldAlias(struct lineFile *lf)
 /* Don't assume the table is fully populated, and dummy up a value for source. */
 {
 char *words[1024];	/* process lines, no more than 1,024 words on a line */
 char *line;
 int size;
 while (lineFileNext(lf, &line, &size))
     {
     int wordCount = chopByWhite(line, words, ArraySize(words));
     if (wordCount > 1)
         {
         int i = 1;
         for ( ; i < wordCount; ++i )
             {
             if (isNotEmpty(words[i]))
                 {
-                struct chromAlias *ali;
-                AllocVar(ali);
-                ali->alias = cloneString(words[i]);
-                ali->chrom = cloneString(words[0]);
-                ali->source = cloneString("asmHub");
-                hashAdd(chromHashes.forwardHash, ali->alias, ali);
-                hashAdd(chromHashes.reverseHash, ali->chrom, ali);
-                hashAdd(chromHashes.chromToAliasHash, ali->chrom, ali->alias);
-                hashAdd(chromHashes.aliasToChromHash, ali->alias, ali->chrom);
-                //hashAdd(aliasHash, words[0], ali);
+                char *alias = cloneString(words[i]);
+                char *chrom = cloneString(words[0]);
+                hashAdd(chromAliasGlobals.chromToAliasHash, chrom, alias);
+                hashAdd(chromAliasGlobals.aliasToChromHash, alias, chrom);
                 }
             }
         }
     }
 }
 
 static void readFieldedTable(struct lineFile *lf)
 /* Use the fieldedTable library to read in fully populated chromAlias.txt file. */
 {
 struct fieldedTable *aliasTable = fieldedTableAttach(lf, NULL, 0); 
-
 struct fieldedRow *row;
 for(row = aliasTable->rowList; row; row = row->next)
     {
     char *chrom = row->row[0];
 
     unsigned field;
     for(field=1; field< aliasTable->fieldCount; field++)
         {
-        struct chromAlias *new;
-        AllocVar(new);
-        new->chrom = chrom;
-        new->alias = row->row[field];
-        new->source = aliasTable->fields[field];
-
-        hashAdd(chromHashes.forwardHash, new->alias, new);
-        hashAdd(chromHashes.reverseHash, new->chrom, new);
-        hashAdd(chromHashes.chromToAliasHash, new->chrom, new->alias);
-        hashAdd(chromHashes.aliasToChromHash, new->alias, new->chrom);
+        char *alias = row->row[field];
+        hashAdd(chromAliasGlobals.chromToAliasHash, chrom, alias);
+        hashAdd(chromAliasGlobals.aliasToChromHash, alias, chrom);
+        }
     }
 }
+
+static char * gbdbBbExists(char *database)
+/* use a gbdb bigBed as our alias file. */
+{
+// not supported at the moment
+return NULL;
+}
+
+static void chromAliasSetupBb(char *database, char *bbFile)
+/* Look for a chromAlias bigBed file and open it. */
+{
+chromAliasGlobals.bbi = bigBedFileOpen(bbFile);
 }
 
 static void chromAliasSetupHub(char *database)
 /* Look for a chromAlias text table and load the hashes with its contents. */
 {
+char *aliasBbFile = trackHubAliasBbFile(database);
+if (aliasBbFile != NULL)
+    {
+    chromAliasSetupBb(database, aliasBbFile);
+    return;
+    }
 char *aliasFile = trackHubAliasFile(database);
 if (aliasFile == NULL)
     return;
 
 struct lineFile *lf = udcWrapShortLineFile(aliasFile, NULL, MAX_HUB_TRACKDB_FILE_SIZE);
 
-chromHashes.forwardHash = hashNew(0);
-chromHashes.reverseHash = hashNew(0);
-chromHashes.chromToAliasHash = hashNew(0);
-chromHashes.aliasToChromHash = hashNew(0);
+chromAliasGlobals.chromToAliasHash = hashNew(0);
+chromAliasGlobals.aliasToChromHash = hashNew(0);
 
 char *line;
 if (!lineFileNext(lf, &line, NULL))
    errAbort("%s is empty", lf->fileName);
 lineFileReuse(lf);
 
 // for the moment always read the alias file in the "old" way
 //if (line[0] == '#')
 if (0)
     readFieldedTable(lf);
 else
     readOldAlias(lf);
 lineFileClose(&lf);
 }
 
 static void chromAliasSetupSql(char *database)
 /* Look for a chromAlias SQL table and load the hashes with its contents. */
 {
 if (!hTableExists(database, "chromAlias"))
     return;
 
 struct sqlConnection *conn = hAllocConn(database);
-chromHashes.forwardHash = hashNew(0);
-chromHashes.reverseHash = hashNew(0);
-chromHashes.chromToAliasHash = hashNew(0);
-chromHashes.aliasToChromHash = hashNew(0);
+chromAliasGlobals.chromToAliasHash = hashNew(0);
+chromAliasGlobals.aliasToChromHash = hashNew(0);
 
 char query[2048];
 sqlSafef(query, sizeof(query), "select * from chromAlias");
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct chromAlias *new = chromAliasLoad(row);
-    hashAdd(chromHashes.forwardHash, new->alias, new);
-    hashAdd(chromHashes.reverseHash, new->chrom, new);
-    hashAdd(chromHashes.chromToAliasHash, new->chrom, new->alias);
-    hashAdd(chromHashes.aliasToChromHash, new->alias, new->chrom);
+    hashAdd(chromAliasGlobals.chromToAliasHash, new->chrom, new->alias);
+    hashAdd(chromAliasGlobals.aliasToChromHash, new->alias, new->chrom);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 }
 
+static pthread_mutex_t ourMutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void getLock()
+/* Create a mutex to make the code thread safe. */
+{
+pthread_mutex_lock( &ourMutex );
+}
+
+static void releaseLock()
+/* Release our mutex. */
+{
+pthread_mutex_unlock( &ourMutex );
+}
+
 void chromAliasSetup(char *database)
 /* Read in the chromAlias file/table for this database. */
 {
 if (database == NULL)
     return;
 
-if (!checkDatabase(database))
+getLock();
+if (chromAliasGlobals.inited)
     return;
+chromAliasGlobals.inited = TRUE;
 
-if (chromHashes.inited)
-    return;
-chromHashes.inited = TRUE;
-
+char *gbdbFile;
 if (trackHubDatabase(database))
     chromAliasSetupHub(database);
+else if ((gbdbFile = gbdbBbExists(database)) != NULL)
+    chromAliasSetupBb(database, gbdbFile);
 else
     chromAliasSetupSql(database);
+releaseLock();
 }
 
-struct hash *chromAliasMakeLookupTable(char *database)
-/* Given a database name and a connection to that database, construct a lookup table
- * that takes chromosome alias names to a matching struct chromAlias.  Returns NULL
- * if the given database does not have a chromAlias table. */
+char *findNativeHashes(char *alias)
+/* Find a native sequence given an alias using the hash tables. */
 {
-return chromHashes.forwardHash;
+char *chrom = (char *)hashFindVal(chromAliasGlobals.aliasToChromHash, alias);
+if (isNotEmpty(chrom))
+    return cloneString(chrom);
+return NULL;
 }
 
-struct hash *chromAliasMakeReverseLookupTable(char *database)
-/* Given a database name and a connection to that database, construct a lookup table
- * that takes the actual assembly chromosome names to struct chromAliases.  Because a
- * chromosome name may well have multiple aliases, repeated calls to hashLookupNext
- * may be required to see them all.  Returns NULL if the given database does not have
- * a chromAlias table. */
+static struct bptIndex *getBpts(struct bbiFile *bbi)
+/* Open any extra indices that this bigBed has. */
+{
+if (chromAliasGlobals.bptInited)
+    return chromAliasGlobals.bptList; 
+
+if (!chromAliasGlobals.bptInited)
+    {
+    struct bptIndex *bptList = NULL;
+    struct slName *indexList = bigBedListExtraIndexes(bbi);
+    for(; indexList; indexList = indexList->next)
         {
-return chromHashes.reverseHash;
+        struct bptIndex *bptIndex;
+        AllocVar(bptIndex);
+        bptIndex->bpt = bigBedOpenExtraIndex(bbi, indexList->name, &bptIndex->fieldIx);
+        slAddHead(&bptList, bptIndex);
         }
+    chromAliasGlobals.bptList = bptList;
+    chromAliasGlobals.bptInited = TRUE;
+    }
+
+return chromAliasGlobals.bptList;
+}
+
+char *findNativeBb(struct bbiFile *bbi, char *alias)
+/* Find the native seqName for a given alias given a bigBed. */
+{
+struct bptIndex *bptIndex = getBpts(bbi);
 
-struct hash *chromAliasAliasToChromHash(char *database)
-/* Get the hash that maps chrom names to their aliases. */
+for(; bptIndex; bptIndex = bptIndex->next)
     {
-return chromHashes.aliasToChromHash;
+    struct lm *lm = lmInit(0);
+    struct bigBedInterval *bb= bigBedNameQuery(bbi, bptIndex->bpt, bptIndex->fieldIx, alias, lm);
+
+    if (bb != NULL)
+        {
+        char chromName[1024];
+        bptStringKeyAtPos(bbi->chromBpt, bb->chromId,  chromName, sizeof(chromName));
+
+        return cloneString(chromName);
+        }
+    }
+
+return NULL;
+}
+
+char *chromAliasFindNative(char *alias)
+/* Find the native seqName for a given alias. */
+{
+static struct hash *cachedNative;
+char *chrom;
+
+if (cachedNative == NULL)
+    cachedNative = newHash(6);
+
+if ((chrom = hashFindVal(cachedNative, alias)) != NULL)
+    return chrom;
+
+getLock();
+if ((chrom = hashFindVal(cachedNative, alias)) == NULL)
+    {
+    if (chromAliasGlobals.bbi)
+        chrom = findNativeBb(chromAliasGlobals.bbi, alias);
+    else if (chromAliasGlobals.aliasToChromHash)
+        chrom = findNativeHashes(alias);
+
+    hashAdd(cachedNative, alias, chrom);
+    }
+releaseLock();
+
+return cloneString(chrom);
 }
 
-struct hash *chromAliasChromToAliasHash(char *database)
-/* Get the hash that maps chrom names to their aliases. */
+struct slName *findAliasesBb(struct bbiFile *bbi, char *seqName)
+/* Find the aliases for a given seqName using the alias bigBed. */
+{
+struct lm *lm = lmInit(0);
+struct bigBedInterval *bb, *bbList =  bigBedIntervalQuery(bbi, seqName, 0, 1, 0, lm);
+char *bedRow[bbi->fieldCount];
+char startBuf[16], endBuf[16];
+struct slName *list = NULL;
+for (bb = bbList; bb != NULL; bb = bb->next)
+    {
+    bigBedIntervalToRow(bb, seqName, startBuf, endBuf, bedRow, ArraySize(bedRow));
+    int ii;
+    for(ii=3; ii < chromAliasGlobals.bbi->fieldCount; ii++)
 	{
-return chromHashes.chromToAliasHash;
+	struct slName *name = newSlName(bedRow[ii]);
+	slAddHead(&list, name);
+	}
+    }
+
+return list;
+}
+
+struct slName *findAliasesHashes(char *seqName)
+/* Find the aliases for a given seqName using the hashes. */
+{
+struct slName *slList = NULL;
+struct hashEl *thisEl = hashLookup(chromAliasGlobals.chromToAliasHash, seqName);
+
+for (;thisEl != NULL; thisEl = hashLookupNext(thisEl))
+    {
+    struct slName *name = newSlName((char *)thisEl->val);
+    slAddHead(&slList, name);
+    }
+
+return slList;
+}
+
+struct slName *chromAliasFindAliases(char *seqName)
+/* Find the aliases for a given seqName. */
+{
+static struct hash *cachedAliases;
+struct slName *aliases;
+
+if (cachedAliases == NULL)
+    cachedAliases = newHash(6);
+
+if ((aliases = hashFindVal(cachedAliases, seqName)) != NULL)
+    return aliases;
+
+getLock();
+if ((aliases = hashFindVal(cachedAliases, seqName)) == NULL)
+    {
+    if (chromAliasGlobals.bbi)
+        aliases = findAliasesBb(chromAliasGlobals.bbi, seqName);
+    else if (chromAliasGlobals.chromToAliasHash)
+        aliases = findAliasesHashes(seqName);
+
+    hashAdd(cachedAliases, seqName, aliases);
+    }
+releaseLock();
+
+return aliases;
 }