98874f1656202cd5781466a5d9b25f47da1f104d hiram Mon Feb 13 16:03:53 2023 -0800 a bit closer to correct NCBI links, not perfect, but better than it was no redmine diff --git src/hg/lib/chromAlias.c src/hg/lib/chromAlias.c index 5cf7c0d..382c3c6 100644 --- src/hg/lib/chromAlias.c +++ src/hg/lib/chromAlias.c @@ -308,67 +308,101 @@ } errCatchFree(&errCatch); lineFileClose(&lf); } static void chromAliasSetupSql(char *database) /* Look for a chromAlias SQL table and load the hashes with its contents. */ { if (!hTableExists(database, "chromAlias")) return; struct sqlConnection *conn = hAllocConn(database); chromAliasGlobals.chromToAliasHash = hashNew(0); chromAliasGlobals.aliasToChromHash = hashNew(0); +/* the 'source' field of this table can be a comma separated list of + * naming authorities, not just one. Keep track so they can be counted. + */ +struct hash *sources = hashNew(0); +int sourceCount = 0; +struct slName *fieldNames = NULL; /* a list of strings, source authority name */ +struct slName *name; /* one name to add to list */ + char query[2048]; sqlSafef(query, sizeof(query), "select * from chromAlias"); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct chromAlias *new = chromAliasLoad(row); - hashAdd(chromAliasGlobals.chromToAliasHash, new->chrom, new); - hashAdd(chromAliasGlobals.aliasToChromHash, new->alias, new); + char *words[1024]; /* 1024 naming authorities ? surely never more . . . */ + int wordCount = chopByChar(new->source, ',', words, ArraySize(words)); + for (int i = 0; i < wordCount; ++i) + { + int sourceN = hashIntValDefault(sources, words[i], -1); + if (sourceN < 0) /* a new source */ + { + name = slNameNew(words[i]); + slAddHead(&fieldNames, name); + hashAddInt(sources, words[i], sourceCount++); + } + struct chromAlias *chromAlias; + AllocVar(chromAlias); + chromAlias->chrom = cloneString(new->chrom); + chromAlias->alias = cloneString(new->alias); + chromAlias->source = cloneString(words[i]); + hashAdd(chromAliasGlobals.chromToAliasHash, new->chrom, chromAlias); + hashAdd(chromAliasGlobals.aliasToChromHash, new->alias, chromAlias); + } + chromAliasFree(&new); } sqlFreeResult(&sr); hFreeConn(&conn); -} +chromAliasGlobals.fieldCount = sourceCount; +slReverse(&fieldNames); +AllocArray(chromAliasGlobals.fields, chromAliasGlobals.fieldCount); +name = fieldNames; +for(int i=0; i < chromAliasGlobals.fieldCount; i++, name = name->next) + chromAliasGlobals.fields[i] = name->name; +} /* static void chromAliasSetupSql(char *database) */ static pthread_mutex_t ourMutex = PTHREAD_MUTEX_INITIALIZER; static void getLock() /* Create a mutex to make the code thread safe. */ { pthread_mutex_lock( &ourMutex ); } static void releaseLock() /* Release our mutex. */ { pthread_mutex_unlock( &ourMutex ); } void chromAliasSetup(char *database) /* Read in the chromAlias file/table for this database. */ { if (database == NULL) return; getLock(); -if (chromAliasGlobals.inited) +if (chromAliasGlobals.inited) { + releaseLock(); return; +} chromAliasGlobals.inited = TRUE; char *gbdbFile; if (trackHubDatabase(database)) chromAliasSetupHub(database); else if ((gbdbFile = gbdbBbExists(database)) != NULL) chromAliasSetupBb(database, gbdbFile); else chromAliasSetupSql(database); releaseLock(); } char *findNativeHashes(char *alias) /* Find a native sequence given an alias using the hash tables. */ { @@ -442,54 +476,65 @@ aliases = findAliasesHashes(seqName); hashAdd(cachedAliases, seqName, aliases); } releaseLock(); return aliases; } char *chromAliasFindSingleAlias(char *seqName, char *authority) /* Find the aliases for a given seqName from a given authority. */ { if (authority == NULL) return cloneString(seqName); - struct slName *aliases = chromAliasFindAliases(seqName); + if (aliases == NULL) return cloneString(seqName); unsigned fieldNum = 0; for(; fieldNum < chromAliasGlobals.fieldCount; fieldNum++) { if (sameString(authority, chromAliasGlobals.fields[fieldNum])) break; } if (fieldNum >= chromAliasGlobals.fieldCount) return cloneString(seqName); - unsigned count = 0; for(; aliases && count < fieldNum; count++,aliases = aliases->next) ; if (!isEmpty(aliases->name)) return cloneString(aliases->name); return cloneString(seqName); } char *chromAliasGetDisplayChrom(char *db, struct cart *cart, char *seqName) /* Return the sequence name to display based on the database and cart. */ { if (trackHubDatabase(db)) { struct trackHubGenome *genome = trackHubGetGenome(db); return chromAliasFindSingleAlias(seqName, genome->chromAuthority); } return seqName; } +char *chromAliasNCBI(char *db, char *chr, char *gcX) +/* given the database and the chrom name, find the NCBI equivalent chr name */ +{ +char *seqName = NULL; +/* just in case this has not yet been done by the caller */ +chromAliasSetup(db); +if (startsWith("GCF", gcX)) + seqName = chromAliasFindSingleAlias(chr, "refseq"); +else + seqName = chromAliasFindSingleAlias(chr, "genbank"); +return seqName; +}