5f9ba1691bd3051146f2f9464c9d1375e42f6ff4
angie
  Thu Mar 8 11:36:57 2018 -0800
Added support for ENS*T* transcript IDs in HGVS position search, using the latest Gencode.
Added support for parsing ENS*P* protein IDs, but can't yet map those to the genome because
our Gencode tables don't yet include a mapping between transcript and protein IDs.
refs #21076

diff --git src/hg/lib/hdb.c src/hg/lib/hdb.c
index 8815dbe..8cb5d24 100644
--- src/hg/lib/hdb.c
+++ src/hg/lib/hdb.c
@@ -5583,30 +5583,80 @@
     return NULL;
 struct sqlConnection *conn = hAllocConn(db);
 struct slName *snpNNNTables = hListSnpNNNTables(conn, suffix);
 // Return the first trackDb that we can find (sometimes there is a brand new lastest-version table
 // that does not yet have a trackDb)
 struct slName *table;
 for (table = snpNNNTables;  table != NULL;  table = table->next)
     {
     struct trackDb *tdb = tdbForTrack(db, table->name, pFullTrackList);
     if (tdb)
         return tdb;
     }
 return NULL;
 }
 
+static int getVVersion(const char *name)
+/* If name ends in V[0-9]+, return the number, else 0. */
+{
+int version = 0;
+char *p = strrchr(name, 'V');
+if (p)
+    {
+    char *versionStr = p + 1;
+    if (isAllDigits(versionStr))
+        version = atoi(versionStr);
+    }
+return version;
+}
+
+static int cmpVDesc(const void *va, const void *vb)
+/* Compare by version number, descending, e.g. tableV2 < tableV1. */
+{
+const struct slName *a = *((struct slName **)va);
+const struct slName *b = *((struct slName **)vb);
+int aVersion = getVVersion(a->name);
+int bVersion = getVVersion(b->name);
+int dif = bVersion - aVersion;
+if (dif == 0)
+    dif = strcmp(b->name, a->name);
+return dif;
+}
+
+static struct slName *hListGencodeTables(struct sqlConnection *conn, char *suffix)
+/* Return a list of 'wgEncodeGencode<suffix>V<version>' tables, if any, highest version first.
+ * If suffix is NULL, it defaults to Basic. */
+{
+char likeExpr[128];
+safef(likeExpr, sizeof(likeExpr), "wgEncodeGencode%sV%%", suffix ? suffix : "Basic");
+struct slName *gencodeTables = sqlListTablesLike(conn, likeExpr);
+slSort(&gencodeTables, cmpVDesc);
+return gencodeTables;
+}
+
+char *hFindLatestGencodeTableConn(struct sqlConnection *conn, char *suffix)
+/* Return the 'wgEncodeGencode<suffix>V<version>' table with the highest version number, if any.
+ * If suffix is NULL, it defaults to Basic. */
+{
+char *tableName = NULL;
+struct slName *gencodeTables = hListGencodeTables(conn, suffix);
+if (gencodeTables)
+    tableName = cloneString(gencodeTables->name);
+slNameFreeList(&gencodeTables);
+return tableName;
+}
+
 boolean hDbHasNcbiRefSeq(char *db)
 /* Return TRUE if db has NCBI's RefSeq alignments and annotations. */
 {
 // hTableExists() caches results so this shouldn't make for loads of new SQL queries if called
 // more than once.
 return (hTableExists(db, "ncbiRefSeq") && hTableExists(db, "ncbiRefSeqPsl") &&
         hTableExists(db, "ncbiRefSeqCds") && hTableExists(db, "ncbiRefSeqLink") &&
         hTableExists(db, "ncbiRefSeqPepTable") &&
         hTableExists(db, "seqNcbiRefSeq") && hTableExists(db, "extNcbiRefSeq"));
 }
 
 char *hRefSeqAccForChrom(char *db, char *chrom)
 /* Return the RefSeq NC_000... accession for chrom if we can find it, else just chrom.
  * db must never change. */
 {