d12a52edb9b3841adcfe42fc25b361e59654ef11 jcasper Fri Feb 28 15:59:19 2020 -0800 Better accounting for chromosome aliases in .hic files, refs #25055 diff --git src/hg/lib/chromAlias.c src/hg/lib/chromAlias.c index f400df2..0ec22b8 100644 --- src/hg/lib/chromAlias.c +++ src/hg/lib/chromAlias.c @@ -1,188 +1,215 @@ /* chromAlias.c was originally generated by the autoSql program, which also * generated chromAlias.h and chromAlias.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "chromAlias.h" #include "hdb.h" char *chromAliasCommaSepFieldNames = "alias,chrom,source"; void chromAliasStaticLoad(char **row, struct chromAlias *ret) /* Load a row from chromAlias table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->alias = row[0]; ret->chrom = row[1]; ret->source = row[2]; } struct chromAlias *chromAliasLoad(char **row) /* Load a chromAlias from row fetched with select * from chromAlias * from database. Dispose of this with chromAliasFree(). */ { struct chromAlias *ret; AllocVar(ret); ret->alias = cloneString(row[0]); ret->chrom = cloneString(row[1]); ret->source = cloneString(row[2]); return ret; } struct chromAlias *chromAliasLoadAll(char *fileName) /* Load all chromAlias from a whitespace-separated file. * Dispose of this with chromAliasFreeList(). */ { struct chromAlias *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileRow(lf, row)) { el = chromAliasLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct chromAlias *chromAliasLoadAllByChar(char *fileName, char chopper) /* Load all chromAlias from a chopper separated file. * Dispose of this with chromAliasFreeList(). */ { struct chromAlias *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = chromAliasLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct chromAlias *chromAliasCommaIn(char **pS, struct chromAlias *ret) /* Create a chromAlias out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new chromAlias */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->alias = sqlStringComma(&s); ret->chrom = sqlStringComma(&s); ret->source = sqlStringComma(&s); *pS = s; return ret; } void chromAliasFree(struct chromAlias **pEl) /* Free a single dynamically allocated chromAlias such as created * with chromAliasLoad(). */ { struct chromAlias *el; if ((el = *pEl) == NULL) return; freeMem(el->alias); freeMem(el->chrom); freeMem(el->source); freez(pEl); } void chromAliasFreeList(struct chromAlias **pList) /* Free a list of dynamically allocated chromAlias's */ { struct chromAlias *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; chromAliasFree(&el); } *pList = NULL; } void chromAliasOutput(struct chromAlias *el, FILE *f, char sep, char lastSep) /* Print out chromAlias. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->alias); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->chrom); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->source); if (sep == ',') fputc('"',f); fputc(lastSep,f); } void chromAliasJsonOutput(struct chromAlias *el, FILE *f) /* Print out chromAlias in JSON format. */ { fputc('{',f); fputc('"',f); fprintf(f,"alias"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->alias); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"chrom"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->chrom); fputc('"',f); fputc(',',f); fputc('"',f); fprintf(f,"source"); fputc('"',f); fputc(':',f); fputc('"',f); fprintf(f, "%s", el->source); fputc('"',f); fputc('}',f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ struct hash *chromAliasMakeLookupTable(char *database) /* Given a database name and a connection to that database, construct a lookup table * that takes chromosome alias names to a matching struct chromAlias. Returns NULL * if the given database does not have a chromAlias table. */ { struct hash *hash = NULL; if (!hTableExists(database, "chromAlias")) return NULL; struct sqlConnection *conn = hAllocConn(database); hash = hashNew(0); char query[2048]; sqlSafef(query, sizeof(query), "select * from chromAlias"); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct chromAlias *new = chromAliasLoad(row); hashAdd(hash, new->alias, new); } sqlFreeResult(&sr); hFreeConn(&conn); return hash; } + +struct hash *chromAliasMakeReverseLookupTable(char *database) +/* Given a database name and a connection to that database, construct a lookup table + * that takes the actual assembly chromosome names to struct chromAliases. Because a + * chromosome name may well have multiple aliases, repeated calls to hashLookupNext + * may be required to see them all. Returns NULL if the given database does not have + * a chromAlias table. */ +{ +struct hash *hash = NULL; +if (!hTableExists(database, "chromAlias")) + return NULL; + +struct sqlConnection *conn = hAllocConn(database); +hash = hashNew(0); +char query[2048]; +sqlSafef(query, sizeof(query), "select * from chromAlias"); +struct sqlResult *sr = sqlGetResult(conn, query); +char **row; +while ((row = sqlNextRow(sr)) != NULL) + { + struct chromAlias *new = chromAliasLoad(row); + hashAdd(hash, new->chrom, new); + } +sqlFreeResult(&sr); +hFreeConn(&conn); +return hash; +}