54406b80d5d435970989acff7b22dd6146c6b411 braney Sat Jan 22 15:24:59 2022 -0800 adding chrom alias support to big files diff --git src/hg/lib/chromAlias.c src/hg/lib/chromAlias.c index 0ec22b8..c5c2a59 100644 --- src/hg/lib/chromAlias.c +++ src/hg/lib/chromAlias.c @@ -1,215 +1,252 @@ /* chromAlias.c was originally generated by the autoSql program, which also * generated chromAlias.h and chromAlias.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "chromAlias.h" #include "hdb.h" char *chromAliasCommaSepFieldNames = "alias,chrom,source"; void chromAliasStaticLoad(char **row, struct chromAlias *ret) /* Load a row from chromAlias table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->alias = row[0]; ret->chrom = row[1]; ret->source = row[2]; } struct chromAlias *chromAliasLoad(char **row) /* Load a chromAlias from row fetched with select * from chromAlias * from database. Dispose of this with chromAliasFree(). */ { struct chromAlias *ret; AllocVar(ret); ret->alias = cloneString(row[0]); ret->chrom = cloneString(row[1]); ret->source = cloneString(row[2]); return ret; } struct chromAlias *chromAliasLoadAll(char *fileName) /* Load all chromAlias from a whitespace-separated file. * Dispose of this with chromAliasFreeList(). */ { struct chromAlias *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileRow(lf, row)) { el = chromAliasLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct chromAlias *chromAliasLoadAllByChar(char *fileName, char chopper) /* Load all chromAlias from a chopper separated file. * Dispose of this with chromAliasFreeList(). */ { struct chromAlias *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = chromAliasLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct chromAlias *chromAliasCommaIn(char **pS, struct chromAlias *ret) /* Create a chromAlias out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new chromAlias */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->alias = sqlStringComma(&s); ret->chrom = sqlStringComma(&s); ret->source = sqlStringComma(&s); *pS = s; return ret; } void chromAliasFree(struct chromAlias **pEl) /* Free a single dynamically allocated chromAlias such as created * with chromAliasLoad(). */ { struct chromAlias *el; if ((el = *pEl) == NULL) return; freeMem(el->alias); freeMem(el->chrom); freeMem(el->source); freez(pEl); } void chromAliasFreeList(struct chromAlias **pList) /* Free a list of dynamically allocated chromAlias's */ { struct chromAlias *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; chromAliasFree(&el); } *pList = NULL; } void chromAliasOutput(struct chromAlias *el, FILE *f, char sep, char lastSep) /* Print out chromAlias. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->alias); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->chrom); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->source); if (sep == ',') fputc('"',f); fputc(lastSep,f); } void chromAliasJsonOutput(struct chromAlias *el, FILE *f) /* Print out chromAlias in JSON format. */ { fputc('{',f); fputc('"',f); fprintf(f,"alias"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->alias); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"chrom"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->chrom); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"source"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->source); fputc('"',f); fputc('}',f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ -struct hash *chromAliasMakeLookupTable(char *database) -/* Given a database name and a connection to that database, construct a lookup table - * that takes chromosome alias names to a matching struct chromAlias. Returns NULL - * if the given database does not have a chromAlias table. */ +/* our "global" data */ +static struct +{ +char *database; +struct hash *nameHash; +struct hash *forwardHash; +struct hash *reverseHash; +} chromHashes; + +static boolean checkDatabase(char *database) +/* Make sure we don't see different databases. */ +{ +if (database == NULL) + return TRUE; + +if (chromHashes.database != NULL) + { + if (!sameString(chromHashes.database, database)) + { + errAbort("chromAliasSetup: only works for one db. %s was passed in earlier, now %s.", chromHashes.database, database); + return FALSE; + } + return TRUE; + } + +chromHashes.database = cloneString(database); +return TRUE; +} + +void chromAliasSetup(char *database) +/* Read in the chromAlias file/table for this database. */ { -struct hash *hash = NULL; +if (!checkDatabase(database)) + return; + if (!hTableExists(database, "chromAlias")) - return NULL; + return; struct sqlConnection *conn = hAllocConn(database); -hash = hashNew(0); +chromHashes.forwardHash = hashNew(0); +chromHashes.reverseHash = hashNew(0); +chromHashes.nameHash = hashNew(0); + char query[2048]; sqlSafef(query, sizeof(query), "select * from chromAlias"); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct chromAlias *new = chromAliasLoad(row); - hashAdd(hash, new->alias, new); + hashAdd(chromHashes.forwardHash, new->alias, new); + hashAdd(chromHashes.reverseHash, new->chrom, new); + hashAdd(chromHashes.nameHash, new->chrom, new->alias); } sqlFreeResult(&sr); hFreeConn(&conn); -return hash; +} + +struct hash *chromAliasMakeLookupTable(char *database) +/* Given a database name and a connection to that database, construct a lookup table + * that takes chromosome alias names to a matching struct chromAlias. Returns NULL + * if the given database does not have a chromAlias table. */ +{ +if (!checkDatabase(database)) + return NULL; +return chromHashes.forwardHash; } struct hash *chromAliasMakeReverseLookupTable(char *database) /* Given a database name and a connection to that database, construct a lookup table * that takes the actual assembly chromosome names to struct chromAliases. Because a * chromosome name may well have multiple aliases, repeated calls to hashLookupNext * may be required to see them all. Returns NULL if the given database does not have * a chromAlias table. */ { -struct hash *hash = NULL; -if (!hTableExists(database, "chromAlias")) +if (!checkDatabase(database)) return NULL; +return chromHashes.reverseHash; +} -struct sqlConnection *conn = hAllocConn(database); -hash = hashNew(0); -char query[2048]; -sqlSafef(query, sizeof(query), "select * from chromAlias"); -struct sqlResult *sr = sqlGetResult(conn, query); -char **row; -while ((row = sqlNextRow(sr)) != NULL) +struct hash *chromAliasGetHash(char *database) +/* Get the hash that maps chrom names to their aliases. */ { - struct chromAlias *new = chromAliasLoad(row); - hashAdd(hash, new->chrom, new); - } -sqlFreeResult(&sr); -hFreeConn(&conn); -return hash; +if (!checkDatabase(database)) + return NULL; + +return chromHashes.nameHash; }