5f9ba1691bd3051146f2f9464c9d1375e42f6ff4 angie Thu Mar 8 11:36:57 2018 -0800 Added support for ENS*T* transcript IDs in HGVS position search, using the latest Gencode. Added support for parsing ENS*P* protein IDs, but can't yet map those to the genome because our Gencode tables don't yet include a mapping between transcript and protein IDs. refs #21076 diff --git src/hg/lib/hdb.c src/hg/lib/hdb.c index 8815dbe..8cb5d24 100644 --- src/hg/lib/hdb.c +++ src/hg/lib/hdb.c @@ -5583,30 +5583,80 @@ return NULL; struct sqlConnection *conn = hAllocConn(db); struct slName *snpNNNTables = hListSnpNNNTables(conn, suffix); // Return the first trackDb that we can find (sometimes there is a brand new lastest-version table // that does not yet have a trackDb) struct slName *table; for (table = snpNNNTables; table != NULL; table = table->next) { struct trackDb *tdb = tdbForTrack(db, table->name, pFullTrackList); if (tdb) return tdb; } return NULL; } +static int getVVersion(const char *name) +/* If name ends in V[0-9]+, return the number, else 0. */ +{ +int version = 0; +char *p = strrchr(name, 'V'); +if (p) + { + char *versionStr = p + 1; + if (isAllDigits(versionStr)) + version = atoi(versionStr); + } +return version; +} + +static int cmpVDesc(const void *va, const void *vb) +/* Compare by version number, descending, e.g. tableV2 < tableV1. */ +{ +const struct slName *a = *((struct slName **)va); +const struct slName *b = *((struct slName **)vb); +int aVersion = getVVersion(a->name); +int bVersion = getVVersion(b->name); +int dif = bVersion - aVersion; +if (dif == 0) + dif = strcmp(b->name, a->name); +return dif; +} + +static struct slName *hListGencodeTables(struct sqlConnection *conn, char *suffix) +/* Return a list of 'wgEncodeGencode<suffix>V<version>' tables, if any, highest version first. + * If suffix is NULL, it defaults to Basic. */ +{ +char likeExpr[128]; +safef(likeExpr, sizeof(likeExpr), "wgEncodeGencode%sV%%", suffix ? suffix : "Basic"); +struct slName *gencodeTables = sqlListTablesLike(conn, likeExpr); +slSort(&gencodeTables, cmpVDesc); +return gencodeTables; +} + +char *hFindLatestGencodeTableConn(struct sqlConnection *conn, char *suffix) +/* Return the 'wgEncodeGencode<suffix>V<version>' table with the highest version number, if any. + * If suffix is NULL, it defaults to Basic. */ +{ +char *tableName = NULL; +struct slName *gencodeTables = hListGencodeTables(conn, suffix); +if (gencodeTables) + tableName = cloneString(gencodeTables->name); +slNameFreeList(&gencodeTables); +return tableName; +} + boolean hDbHasNcbiRefSeq(char *db) /* Return TRUE if db has NCBI's RefSeq alignments and annotations. */ { // hTableExists() caches results so this shouldn't make for loads of new SQL queries if called // more than once. return (hTableExists(db, "ncbiRefSeq") && hTableExists(db, "ncbiRefSeqPsl") && hTableExists(db, "ncbiRefSeqCds") && hTableExists(db, "ncbiRefSeqLink") && hTableExists(db, "ncbiRefSeqPepTable") && hTableExists(db, "seqNcbiRefSeq") && hTableExists(db, "extNcbiRefSeq")); } char *hRefSeqAccForChrom(char *db, char *chrom) /* Return the RefSeq NC_000... accession for chrom if we can find it, else just chrom. * db must never change. */ {