7df99795b147931dfea8220ed5ab305d11fde6b4 braney Thu Mar 10 15:41:46 2022 -0800 move some chromAlias stuff around so bedToBigBed can use a chromAlias bigBed as a chromAlias file. diff --git src/hg/lib/chromAlias.c src/hg/lib/chromAlias.c index a0d1027..f318542 100644 --- src/hg/lib/chromAlias.c +++ src/hg/lib/chromAlias.c @@ -1,481 +1,410 @@ /* chromAlias.c was originally generated by the autoSql program, which also * generated chromAlias.h and chromAlias.sql. This module links the database and * the RAM representation of objects. */ #include <pthread.h> #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "chromAlias.h" #include "hdb.h" #include "trackHub.h" #include "fieldedTable.h" #include "bigBed.h" +#include "bbiAlias.h" #include "bPlusTree.h" char *chromAliasCommaSepFieldNames = "alias,chrom,source"; void chromAliasStaticLoad(char **row, struct chromAlias *ret) /* Load a row from chromAlias table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->alias = row[0]; ret->chrom = row[1]; ret->source = row[2]; } struct chromAlias *chromAliasLoad(char **row) /* Load a chromAlias from row fetched with select * from chromAlias * from database. Dispose of this with chromAliasFree(). */ { struct chromAlias *ret; AllocVar(ret); ret->alias = cloneString(row[0]); ret->chrom = cloneString(row[1]); ret->source = cloneString(row[2]); return ret; } struct chromAlias *chromAliasLoadAll(char *fileName) /* Load all chromAlias from a whitespace-separated file. * Dispose of this with chromAliasFreeList(). */ { struct chromAlias *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileRow(lf, row)) { el = chromAliasLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct chromAlias *chromAliasLoadAllByChar(char *fileName, char chopper) /* Load all chromAlias from a chopper separated file. * Dispose of this with chromAliasFreeList(). */ { struct chromAlias *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = chromAliasLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct chromAlias *chromAliasCommaIn(char **pS, struct chromAlias *ret) /* Create a chromAlias out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new chromAlias */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->alias = sqlStringComma(&s); ret->chrom = sqlStringComma(&s); ret->source = sqlStringComma(&s); *pS = s; return ret; } void chromAliasFree(struct chromAlias **pEl) /* Free a single dynamically allocated chromAlias such as created * with chromAliasLoad(). */ { struct chromAlias *el; if ((el = *pEl) == NULL) return; freeMem(el->alias); freeMem(el->chrom); freeMem(el->source); freez(pEl); } void chromAliasFreeList(struct chromAlias **pList) /* Free a list of dynamically allocated chromAlias's */ { struct chromAlias *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; chromAliasFree(&el); } *pList = NULL; } void chromAliasOutput(struct chromAlias *el, FILE *f, char sep, char lastSep) /* Print out chromAlias. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->alias); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->chrom); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->source); if (sep == ',') fputc('"',f); fputc(lastSep,f); } void chromAliasJsonOutput(struct chromAlias *el, FILE *f) /* Print out chromAlias in JSON format. */ { fputc('{',f); fputc('"',f); fprintf(f,"alias"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->alias); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"chrom"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->chrom); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"source"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->source); fputc('"',f); fputc('}',f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ /* our "global" data */ -struct bptIndex -{ -struct bptIndex *next; -int fieldIx; -struct bptFile *bpt; -}; - static struct { boolean inited; boolean bptInited; struct bptIndex *bptList; struct bbiFile *bbi; +struct lm *lm; struct hash *chromToAliasHash; struct hash *aliasToChromHash; } chromAliasGlobals; static void readOldAlias(struct lineFile *lf) /* Don't assume the table is fully populated, and dummy up a value for source. */ { char *words[1024]; /* process lines, no more than 1,024 words on a line */ char *line; int size; while (lineFileNext(lf, &line, &size)) { int wordCount = chopByWhite(line, words, ArraySize(words)); if (wordCount > 1) { int i = 1; for ( ; i < wordCount; ++i ) { if (isNotEmpty(words[i])) { char *alias = cloneString(words[i]); char *chrom = cloneString(words[0]); hashAdd(chromAliasGlobals.chromToAliasHash, chrom, alias); hashAdd(chromAliasGlobals.aliasToChromHash, alias, chrom); } } } } } static void readFieldedTable(struct lineFile *lf) /* Use the fieldedTable library to read in fully populated chromAlias.txt file. */ { struct fieldedTable *aliasTable = fieldedTableAttach(lf, NULL, 0); struct fieldedRow *row; for(row = aliasTable->rowList; row; row = row->next) { char *chrom = row->row[0]; unsigned field; for(field=1; field< aliasTable->fieldCount; field++) { char *alias = row->row[field]; hashAdd(chromAliasGlobals.chromToAliasHash, chrom, alias); hashAdd(chromAliasGlobals.aliasToChromHash, alias, chrom); } } } static char * gbdbBbExists(char *database) /* use a gbdb bigBed as our alias file. */ { // not supported at the moment return NULL; } static void chromAliasSetupBb(char *database, char *bbFile) /* Look for a chromAlias bigBed file and open it. */ { chromAliasGlobals.bbi = bigBedFileOpen(bbFile); +chromAliasGlobals.bptList = bbiAliasOpenExtra(chromAliasGlobals.bbi); +chromAliasGlobals.lm = lmInit(0); } static void chromAliasSetupHub(char *database) /* Look for a chromAlias text table and load the hashes with its contents. */ { char *aliasBbFile = trackHubAliasBbFile(database); if (aliasBbFile != NULL) { chromAliasSetupBb(database, aliasBbFile); return; } char *aliasFile = trackHubAliasFile(database); if (aliasFile == NULL) return; struct lineFile *lf = udcWrapShortLineFile(aliasFile, NULL, MAX_HUB_TRACKDB_FILE_SIZE); chromAliasGlobals.chromToAliasHash = hashNew(0); chromAliasGlobals.aliasToChromHash = hashNew(0); char *line; if (!lineFileNext(lf, &line, NULL)) errAbort("%s is empty", lf->fileName); lineFileReuse(lf); // for the moment always read the alias file in the "old" way //if (line[0] == '#') if (0) readFieldedTable(lf); else readOldAlias(lf); lineFileClose(&lf); } static void chromAliasSetupSql(char *database) /* Look for a chromAlias SQL table and load the hashes with its contents. */ { if (!hTableExists(database, "chromAlias")) return; struct sqlConnection *conn = hAllocConn(database); chromAliasGlobals.chromToAliasHash = hashNew(0); chromAliasGlobals.aliasToChromHash = hashNew(0); char query[2048]; sqlSafef(query, sizeof(query), "select * from chromAlias"); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct chromAlias *new = chromAliasLoad(row); hashAdd(chromAliasGlobals.chromToAliasHash, new->chrom, new->alias); hashAdd(chromAliasGlobals.aliasToChromHash, new->alias, new->chrom); } sqlFreeResult(&sr); hFreeConn(&conn); } static pthread_mutex_t ourMutex = PTHREAD_MUTEX_INITIALIZER; static void getLock() /* Create a mutex to make the code thread safe. */ { pthread_mutex_lock( &ourMutex ); } static void releaseLock() /* Release our mutex. */ { pthread_mutex_unlock( &ourMutex ); } void chromAliasSetup(char *database) /* Read in the chromAlias file/table for this database. */ { if (database == NULL) return; getLock(); if (chromAliasGlobals.inited) return; chromAliasGlobals.inited = TRUE; char *gbdbFile; if (trackHubDatabase(database)) chromAliasSetupHub(database); else if ((gbdbFile = gbdbBbExists(database)) != NULL) chromAliasSetupBb(database, gbdbFile); else chromAliasSetupSql(database); releaseLock(); } char *findNativeHashes(char *alias) /* Find a native sequence given an alias using the hash tables. */ { char *chrom = (char *)hashFindVal(chromAliasGlobals.aliasToChromHash, alias); if (isNotEmpty(chrom)) return cloneString(chrom); return NULL; } -static struct bptIndex *getBpts(struct bbiFile *bbi) -/* Open any extra indices that this bigBed has. */ -{ -if (chromAliasGlobals.bptInited) - return chromAliasGlobals.bptList; - -if (!chromAliasGlobals.bptInited) - { - struct bptIndex *bptList = NULL; - struct slName *indexList = bigBedListExtraIndexes(bbi); - for(; indexList; indexList = indexList->next) - { - struct bptIndex *bptIndex; - AllocVar(bptIndex); - bptIndex->bpt = bigBedOpenExtraIndex(bbi, indexList->name, &bptIndex->fieldIx); - slAddHead(&bptList, bptIndex); - } - chromAliasGlobals.bptList = bptList; - chromAliasGlobals.bptInited = TRUE; - } - -return chromAliasGlobals.bptList; -} - -char *findNativeBb(struct bbiFile *bbi, char *alias) -/* Find the native seqName for a given alias given a bigBed. */ -{ -struct bptIndex *bptIndex = getBpts(bbi); - -for(; bptIndex; bptIndex = bptIndex->next) - { - struct lm *lm = lmInit(0); - struct bigBedInterval *bb= bigBedNameQuery(bbi, bptIndex->bpt, bptIndex->fieldIx, alias, lm); - - if (bb != NULL) - { - char chromName[1024]; - bptStringKeyAtPos(bbi->chromBpt, bb->chromId, chromName, sizeof(chromName)); - - return cloneString(chromName); - } - } - -return NULL; -} - char *chromAliasFindNative(char *alias) /* Find the native seqName for a given alias. */ { static struct hash *cachedNative; char *chrom; if (cachedNative == NULL) cachedNative = newHash(6); if ((chrom = hashFindVal(cachedNative, alias)) != NULL) return chrom; getLock(); if ((chrom = hashFindVal(cachedNative, alias)) == NULL) { if (chromAliasGlobals.bbi) - chrom = findNativeBb(chromAliasGlobals.bbi, alias); + chrom = bbiAliasFindNative(chromAliasGlobals.bbi, chromAliasGlobals.bptList, chromAliasGlobals.lm, alias); else if (chromAliasGlobals.aliasToChromHash) chrom = findNativeHashes(alias); hashAdd(cachedNative, alias, chrom); } releaseLock(); return cloneString(chrom); } -struct slName *findAliasesBb(struct bbiFile *bbi, char *seqName) -/* Find the aliases for a given seqName using the alias bigBed. */ -{ -struct lm *lm = lmInit(0); -struct bigBedInterval *bb, *bbList = bigBedIntervalQuery(bbi, seqName, 0, 1, 0, lm); -char *bedRow[bbi->fieldCount]; -char startBuf[16], endBuf[16]; -struct slName *list = NULL; -for (bb = bbList; bb != NULL; bb = bb->next) - { - bigBedIntervalToRow(bb, seqName, startBuf, endBuf, bedRow, ArraySize(bedRow)); - int ii; - for(ii=3; ii < chromAliasGlobals.bbi->fieldCount; ii++) - { - struct slName *name = newSlName(bedRow[ii]); - slAddHead(&list, name); - } - } - -return list; -} - struct slName *findAliasesHashes(char *seqName) /* Find the aliases for a given seqName using the hashes. */ { struct slName *slList = NULL; struct hashEl *thisEl = hashLookup(chromAliasGlobals.chromToAliasHash, seqName); for (;thisEl != NULL; thisEl = hashLookupNext(thisEl)) { struct slName *name = newSlName((char *)thisEl->val); slAddHead(&slList, name); } return slList; } struct slName *chromAliasFindAliases(char *seqName) /* Find the aliases for a given seqName. */ { static struct hash *cachedAliases; struct slName *aliases; if (cachedAliases == NULL) cachedAliases = newHash(6); if ((aliases = hashFindVal(cachedAliases, seqName)) != NULL) return aliases; getLock(); if ((aliases = hashFindVal(cachedAliases, seqName)) == NULL) { if (chromAliasGlobals.bbi) - aliases = findAliasesBb(chromAliasGlobals.bbi, seqName); + aliases = bbiAliasFindAliases(chromAliasGlobals.bbi,chromAliasGlobals.lm, seqName); else if (chromAliasGlobals.chromToAliasHash) aliases = findAliasesHashes(seqName); hashAdd(cachedAliases, seqName, aliases); } releaseLock(); return aliases; }