eb4b6546550cba82f0031c043ee4f55bdbec8c30 hiram Tue Mar 21 13:43:21 2023 -0700 expose chromAliasSetupBb for use in hubApi refs #30544 diff --git src/hg/lib/chromAlias.c src/hg/lib/chromAlias.c index 0f49981..87769d8 100644 --- src/hg/lib/chromAlias.c +++ src/hg/lib/chromAlias.c @@ -1,544 +1,544 @@ /* chromAlias.c was originally generated by the autoSql program, which also * generated chromAlias.h and chromAlias.sql. This module links the database and * the RAM representation of objects. */ #include <pthread.h> #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "cart.h" #include "chromAlias.h" #include "hdb.h" #include "trackHub.h" #include "fieldedTable.h" #include "bigBed.h" #include "bbiAlias.h" #include "bPlusTree.h" #include "errCatch.h" char *chromAliasCommaSepFieldNames = "alias,chrom,source"; void chromAliasStaticLoad(char **row, struct chromAlias *ret) /* Load a row from chromAlias table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->alias = row[0]; ret->chrom = row[1]; ret->source = row[2]; } struct chromAlias *chromAliasLoad(char **row) /* Load a chromAlias from row fetched with select * from chromAlias * from database. Dispose of this with chromAliasFree(). */ { struct chromAlias *ret; AllocVar(ret); ret->alias = cloneString(row[0]); ret->chrom = cloneString(row[1]); ret->source = cloneString(row[2]); return ret; } struct chromAlias *chromAliasLoadAll(char *fileName) /* Load all chromAlias from a whitespace-separated file. * Dispose of this with chromAliasFreeList(). */ { struct chromAlias *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileRow(lf, row)) { el = chromAliasLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct chromAlias *chromAliasLoadAllByChar(char *fileName, char chopper) /* Load all chromAlias from a chopper separated file. * Dispose of this with chromAliasFreeList(). */ { struct chromAlias *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = chromAliasLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct chromAlias *chromAliasCommaIn(char **pS, struct chromAlias *ret) /* Create a chromAlias out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new chromAlias */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->alias = sqlStringComma(&s); ret->chrom = sqlStringComma(&s); ret->source = sqlStringComma(&s); *pS = s; return ret; } void chromAliasFree(struct chromAlias **pEl) /* Free a single dynamically allocated chromAlias such as created * with chromAliasLoad(). */ { struct chromAlias *el; if ((el = *pEl) == NULL) return; freeMem(el->alias); freeMem(el->chrom); freeMem(el->source); freez(pEl); } void chromAliasFreeList(struct chromAlias **pList) /* Free a list of dynamically allocated chromAlias's */ { struct chromAlias *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; chromAliasFree(&el); } *pList = NULL; } void chromAliasOutput(struct chromAlias *el, FILE *f, char sep, char lastSep) /* Print out chromAlias. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->alias); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->chrom); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->source); if (sep == ',') fputc('"',f); fputc(lastSep,f); } void chromAliasJsonOutput(struct chromAlias *el, FILE *f) /* Print out chromAlias in JSON format. */ { fputc('{',f); fputc('"',f); fprintf(f,"alias"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->alias); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"chrom"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->chrom); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"source"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->source); fputc('"',f); fputc('}',f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ /* our "global" data */ static struct { boolean inited; boolean bptInited; struct bptIndex *bptList; struct bbiFile *bbi; struct lm *lm; int fieldCount; /* Number of fields. */ char **fields; /* Names of fields. */ struct hash *chromToAliasHash; struct hash *aliasToChromHash; } chromAliasGlobals; static void readOldAlias(struct lineFile *lf) /* Don't assume the table is fully populated, and dummy up a value for source. */ { char *words[1024]; /* process lines, no more than 1,024 words on a line */ char *line; int size; while (lineFileNext(lf, &line, &size)) { int wordCount = chopByWhite(line, words, ArraySize(words)); if (wordCount > 1) { int i = 1; char *native = cloneString(words[0]); for ( ; i < wordCount; ++i ) { if (isNotEmpty(words[i])) { struct chromAlias *chromAlias; AllocVar(chromAlias); chromAlias->chrom = native; chromAlias->alias = cloneString(words[i]); chromAlias->source = "none"; hashAdd(chromAliasGlobals.chromToAliasHash, chromAlias->chrom, chromAlias); hashAdd(chromAliasGlobals.aliasToChromHash, chromAlias->alias, chromAlias); } } } } } static void readFieldedTable(struct lineFile *lf) /* Use the fieldedTable library to read in fully populated chromAlias.txt file. */ { struct fieldedTable *aliasTable = fieldedTableAttach(lf, NULL, 0); chromAliasGlobals.fieldCount = aliasTable->fieldCount; chromAliasGlobals.fields = aliasTable->fields; struct fieldedRow *row; for(row = aliasTable->rowList; row; row = row->next) { char *native = row->row[0]; unsigned field; for(field=0; field < aliasTable->fieldCount; field++) { char *alias = row->row[field]; char *source = aliasTable->fields[field]; struct chromAlias *chromAlias; AllocVar(chromAlias); chromAlias->chrom = native; chromAlias->alias = alias; chromAlias->source = source; hashAdd(chromAliasGlobals.chromToAliasHash, native, chromAlias); hashAdd(chromAliasGlobals.aliasToChromHash, alias, chromAlias); } } } static char * gbdbBbExists(char *database) /* use a gbdb bigBed as our alias file. */ { // not supported at the moment /* char buffer[4096]; safef(buffer, sizeof buffer, "/gbdb/%s/chromAlias.bb", database); if (fileExists(buffer)) return cloneString(buffer); */ return NULL; } -static void chromAliasSetupBb(char *database, char *bbFile) +void chromAliasSetupBb(char *database, char *bbFile) /* Look for a chromAlias bigBed file and open it. */ { chromAliasGlobals.bbi = bigBedFileOpen(bbFile); struct slName *fieldNames = bbFieldNames(chromAliasGlobals.bbi); chromAliasGlobals.fieldCount = slCount(fieldNames) - chromAliasGlobals.bbi->definedFieldCount; AllocArray(chromAliasGlobals.fields, chromAliasGlobals.fieldCount); int ii; for(ii=0; ii < chromAliasGlobals.bbi->definedFieldCount; ii++, fieldNames = fieldNames->next) ; for(ii=0; ii < chromAliasGlobals.fieldCount; ii++, fieldNames = fieldNames->next) chromAliasGlobals.fields[ii] = fieldNames->name; chromAliasGlobals.bptList = bbiAliasOpenExtra(chromAliasGlobals.bbi); chromAliasGlobals.lm = lmInit(0); } static void chromAliasSetupHub(char *database) /* Look for a chromAlias text table and load the hashes with its contents. */ { char *aliasBbFile = trackHubAliasBbFile(database); if (aliasBbFile != NULL) { chromAliasSetupBb(database, aliasBbFile); return; } char *aliasFile = trackHubAliasFile(database); if (aliasFile == NULL) return; struct lineFile *lf = udcWrapShortLineFile(aliasFile, NULL, MAX_HUB_TRACKDB_FILE_SIZE); chromAliasGlobals.chromToAliasHash = hashNew(0); chromAliasGlobals.aliasToChromHash = hashNew(0); char *line; if (!lineFileNext(lf, &line, NULL)) errAbort("%s is empty", lf->fileName); lineFileReuse(lf); struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) readFieldedTable(lf); errCatchEnd(errCatch); if (errCatch->gotError) { lineFileClose(&lf); lf = udcWrapShortLineFile(aliasFile, NULL, MAX_HUB_TRACKDB_FILE_SIZE); readOldAlias(lf); } errCatchFree(&errCatch); lineFileClose(&lf); } static void chromAliasSetupSql(char *database) /* Look for a chromAlias SQL table and load the hashes with its contents. */ { if (!hTableExists(database, "chromAlias")) return; struct sqlConnection *conn = hAllocConn(database); chromAliasGlobals.chromToAliasHash = hashNew(0); chromAliasGlobals.aliasToChromHash = hashNew(0); /* the 'source' field of this table can be a comma separated list of * naming authorities, not just one. Keep track so they can be counted. */ struct hash *sources = hashNew(0); int sourceCount = 0; struct slName *fieldNames = NULL; /* a list of strings, source authority name */ struct slName *name; /* one name to add to list */ char query[2048]; sqlSafef(query, sizeof(query), "select * from chromAlias"); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct chromAlias *new = chromAliasLoad(row); char *words[1024]; /* 1024 naming authorities ? surely never more . . . */ int wordCount = chopByChar(new->source, ',', words, ArraySize(words)); for (int i = 0; i < wordCount; ++i) { int sourceN = hashIntValDefault(sources, words[i], -1); if (sourceN < 0) /* a new source */ { name = slNameNew(words[i]); slAddHead(&fieldNames, name); hashAddInt(sources, words[i], sourceCount++); } struct chromAlias *chromAlias; AllocVar(chromAlias); chromAlias->chrom = cloneString(new->chrom); chromAlias->alias = cloneString(new->alias); chromAlias->source = cloneString(words[i]); hashAdd(chromAliasGlobals.chromToAliasHash, new->chrom, chromAlias); hashAdd(chromAliasGlobals.aliasToChromHash, new->alias, chromAlias); } chromAliasFree(&new); } sqlFreeResult(&sr); hFreeConn(&conn); chromAliasGlobals.fieldCount = sourceCount; slReverse(&fieldNames); AllocArray(chromAliasGlobals.fields, chromAliasGlobals.fieldCount); name = fieldNames; for(int i=0; i < chromAliasGlobals.fieldCount; i++, name = name->next) chromAliasGlobals.fields[i] = name->name; } /* static void chromAliasSetupSql(char *database) */ static pthread_mutex_t ourMutex = PTHREAD_MUTEX_INITIALIZER; static void getLock() /* Create a mutex to make the code thread safe. */ { pthread_mutex_lock( &ourMutex ); } static void releaseLock() /* Release our mutex. */ { pthread_mutex_unlock( &ourMutex ); } void chromAliasSetup(char *database) /* Read in the chromAlias file/table for this database. */ { if (database == NULL) return; getLock(); if (chromAliasGlobals.inited) { releaseLock(); return; } chromAliasGlobals.inited = TRUE; char *gbdbFile; if (trackHubDatabase(database)) chromAliasSetupHub(database); else if ((gbdbFile = gbdbBbExists(database)) != NULL) chromAliasSetupBb(database, gbdbFile); else chromAliasSetupSql(database); releaseLock(); } char *findNativeHashes(char *alias) /* Find a native sequence given an alias using the hash tables. */ { struct chromAlias *chromAlias = (struct chromAlias *)hashFindVal(chromAliasGlobals.aliasToChromHash, alias); if (chromAlias != NULL) return cloneString(chromAlias->chrom); return NULL; } char *chromAliasFindNative(char *alias) /* Find the native seqName for a given alias. */ { static struct hash *cachedNative; char *chrom; if (cachedNative == NULL) cachedNative = newHash(6); if ((chrom = hashFindVal(cachedNative, alias)) != NULL) return chrom; getLock(); if ((chrom = hashFindVal(cachedNative, alias)) == NULL) { if (chromAliasGlobals.bbi) chrom = bbiAliasFindNative(chromAliasGlobals.bbi, chromAliasGlobals.bptList, chromAliasGlobals.lm, alias); else if (chromAliasGlobals.aliasToChromHash) chrom = findNativeHashes(alias); hashAdd(cachedNative, alias, cloneString(chrom)); } releaseLock(); return cloneString(chrom); } struct slName *findAliasesHashes(char *seqName) /* Find the aliases for a given seqName using the hashes. */ { struct slName *slList = NULL; struct hashEl *thisEl = hashLookup(chromAliasGlobals.chromToAliasHash, seqName); for (;thisEl != NULL; thisEl = hashLookupNext(thisEl)) { struct chromAlias *chromAlias = (struct chromAlias *)thisEl->val; struct slName *name = newSlName(chromAlias->alias); slAddHead(&slList, name); } return slList; } struct slName *chromAliasFindAliases(char *seqName) /* Find the aliases for a given seqName. */ { static struct hash *cachedAliases; struct slName *aliases; if (cachedAliases == NULL) cachedAliases = newHash(6); if ((aliases = hashFindVal(cachedAliases, seqName)) != NULL) return aliases; getLock(); if ((aliases = hashFindVal(cachedAliases, seqName)) == NULL) { if (chromAliasGlobals.bbi) aliases = bbiAliasFindAliases(chromAliasGlobals.bbi,chromAliasGlobals.lm, seqName); else if (chromAliasGlobals.chromToAliasHash) aliases = findAliasesHashes(seqName); hashAdd(cachedAliases, seqName, aliases); } releaseLock(); return aliases; } char *chromAliasFindSingleAlias(char *seqName, char *authority) /* Find the aliases for a given seqName from a given authority. */ { if (authority == NULL) return cloneString(seqName); struct slName *aliases = chromAliasFindAliases(seqName); if (aliases == NULL) return cloneString(seqName); unsigned fieldNum = 0; for(; fieldNum < chromAliasGlobals.fieldCount; fieldNum++) { if (sameString(authority, chromAliasGlobals.fields[fieldNum])) break; } if (fieldNum >= chromAliasGlobals.fieldCount) return cloneString(seqName); unsigned count = 0; for(; aliases && count < fieldNum; count++,aliases = aliases->next) ; if (!aliases) return cloneString(seqName); if (!isEmpty(aliases->name)) return cloneString(aliases->name); return cloneString(seqName); } char *chromAliasGetDisplayChrom(char *db, struct cart *cart, char *seqName) /* Return the sequence name to display based on the database and cart. */ { if (trackHubDatabase(db)) { struct trackHubGenome *genome = trackHubGetGenome(db); return chromAliasFindSingleAlias(seqName, genome->chromAuthority); } return seqName; } char *chromAliasNCBI(char *db, char *chr, char *gcX) /* given the database and the chrom name, find the NCBI equivalent chr name */ { char *seqName = NULL; /* just in case this has not yet been done by the caller */ chromAliasSetup(db); if (startsWith("GCF", gcX)) seqName = chromAliasFindSingleAlias(chr, "refseq"); else seqName = chromAliasFindSingleAlias(chr, "genbank"); return seqName; }