c2d33a4ecdb13f97ae828bc7099280f89f1e5dbd
angie
  Wed Oct 19 09:55:33 2016 -0700
hgHgvs was assuming that refGene always exists, oops.
hgHgvs was also assuming that db is a mysql database.  Thanks Braney!
refs #18240 note-5, note-9

diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c
index ff10445..22729cf 100644
--- src/hg/lib/hgHgvs.c
+++ src/hg/lib/hgHgvs.c
@@ -1,27 +1,28 @@
 /* hgHgvs.c - Parse the Human Genome Variation Society (HGVS) nomenclature for variants. */
 /* See http://varnomen.hgvs.org/ and https://github.com/mutalyzer/mutalyzer/ */
 
 /* Copyright (C) 2016 The Regents of the University of California
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "hgHgvs.h"
 #include "chromInfo.h"
 #include "genbank.h"
 #include "hdb.h"
 #include "pslTransMap.h"
 #include "regexHelper.h"
+#include "trackHub.h"
 
 // Regular expressions for HGVS-recognized sequence accessions: LRG or versioned RefSeq:
 #define lrgTranscriptExp "(LRG_[0-9]+t[0-9+])"
 #define lrgRegionExp "(LRG_[0-9+])"
 
 // NC = RefSeq reference assembly chromosome
 // NG = RefSeq incomplete genomic region (e.g. gene locus)
 // NM = RefSeq curated mRNA
 // NP = RefSeq curated protein
 // NR = RefSeq curated (non-coding) RNA
 #define geneSymbolExp "([A-Za-z0-9./_-]+)"
 #define optionalGeneSymbolExp "(\\(" geneSymbolExp "\\))?"
 #define versionedAccPrefixExp(p) "(" p "_[0-9]+(\\.[0-9]+)?)" optionalGeneSymbolExp
 //                                 ........................       accession and optional dot version
 //                                              ...........       optional dot version
@@ -409,97 +410,112 @@
 
 static boolean dbHasNcbiRefSeq(char *db)
 /* Test whether NCBI's RefSeq alignments are available in db. */
 {
 // hTableExists() caches results so this shouldn't make for loads of new SQL queries if called
 // more than once.
 return (hTableExists(db, "ncbiRefSeq") && hTableExists(db, "ncbiRefSeqPsl") &&
         hTableExists(db, "ncbiRefSeqCds") && hTableExists(db, "ncbiRefSeqLink") &&
         hTableExists(db, "ncbiRefSeqPepTable") &&
         hTableExists(db, "seqNcbiRefSeq") && hTableExists(db, "extNcbiRefSeq"));
 }
 
 static char *npForGeneSymbol(char *db, char *geneSymbol)
 /* Given a gene symbol, look up and return its NP_ accession; if not found return NULL. */
 {
-struct sqlConnection *conn = hAllocConn(db);
 char query[2048];
 if (dbHasNcbiRefSeq(db))
     {
     sqlSafef(query, sizeof(query), "select protAcc from ncbiRefSeqLink where name = '%s' "
              "and protAcc != 'n/a' and protAcc != '' "
              "order by length(protAcc), protAcc",
              geneSymbol);
     }
-else
+else if (hTableExists(db, "refGene"))
     {
     // Join with refGene to make sure it's an NP for this species.
     sqlSafef(query, sizeof(query), "select l.protAcc from %s l, refGene r "
              "where l.name = '%s' and l.mrnaAcc = r.name "
              "and l.protAcc != '' order by length(l.protAcc), l.protAcc"
              , refLinkTable, geneSymbol);
     }
+else
+    return NULL;
+struct sqlConnection *conn = hAllocConn(db);
 char *npAcc = sqlQuickString(conn, query);
 hFreeConn(&conn);
 return npAcc;
 }
 
 static char *nmForGeneSymbol(char *db, char *geneSymbol)
 /* Given a gene symbol, look up and return its NM_ accession; if not found return NULL. */
 {
-struct sqlConnection *conn = hAllocConn(db);
+if (trackHubDatabase(db))
+    return NULL;
 char query[2048];
-char *geneTable = (dbHasNcbiRefSeq(db) ? "ncbiRefSeq" : "refGene");
+char *geneTable = NULL;
+if (dbHasNcbiRefSeq(db))
+    geneTable = "ncbiRefSeq";
+else if (hTableExists(db, "refGene"))
+    geneTable = "refGene";
+if (geneTable == NULL)
+    return NULL;
 sqlSafef(query, sizeof(query), "select name from %s where name2 = '%s' "
          "order by length(name), name", geneTable, geneSymbol);
+struct sqlConnection *conn = hAllocConn(db);
 char *nmAcc = sqlQuickString(conn, query);
 hFreeConn(&conn);
 return nmAcc;
 }
 
 static char *npForNm(char *db, char *nmAcc)
 /* Given an NM_ accession, look up and return its NP_ accession; if not found return NULL. */
 {
-struct sqlConnection *conn = hAllocConn(db);
+if (trackHubDatabase(db))
+    return NULL;
 char query[2048];
 if (dbHasNcbiRefSeq(db))
     {
     // ncbiRefSeq tables use versioned NM_ accs, but the user might have passed in a
     // versionless NM_, so adjust query accordingly:
     if (strchr(nmAcc, '.'))
         sqlSafef(query, sizeof(query), "select protAcc from ncbiRefSeqLink where id = '%s'", nmAcc);
     else
         sqlSafef(query, sizeof(query), "select protAcc from ncbiRefSeqLink where id like '%s.%%'",
                  nmAcc);
     }
-else
+else if (hTableExists(db, "refGene"))
     {
     // Trim .version if present since our genbank tables don't use versioned names.
     char *trimmed = cloneFirstWordByDelimiter(nmAcc, '.');
     sqlSafef(query, sizeof(query), "select l.protAcc from %s l, refGene r "
              "where r.name = '%s' and l.mrnaAcc = r.name "
              "and l.protAcc != '' order by length(l.protAcc), l.protAcc",
              refLinkTable, trimmed);
     }
+else return NULL;
+struct sqlConnection *conn = hAllocConn(db);
 char *npAcc = sqlQuickString(conn, query);
 hFreeConn(&conn);
 return npAcc;
 }
 
 static char *getProteinSeq(char *db, char *acc)
 /* Return amino acid sequence for acc, or NULL if not found. */
 {
+if (trackHubDatabase(db))
+    return NULL;
 char *seq = NULL;
 if (startsWith("LRG_", acc))
     {
     if (hTableExists(db, "lrgPep"))
         {
         char query[2048];
         sqlSafef(query, sizeof(query), "select seq from lrgPep where name = '%s'", acc);
         struct sqlConnection *conn = hAllocConn(db);
         seq = sqlQuickString(conn, query);
         hFreeConn(&conn);
         }
     }
 else
     {
     if (dbHasNcbiRefSeq(db))
@@ -514,31 +530,31 @@
     else
         {
         aaSeq *aaSeq = hGenBankGetPep(db, acc, gbSeqTable);
         if (aaSeq)
             seq = aaSeq->dna;
         }
     }
 
 return seq;
 }
 
 static char refBaseForNp(char *db, char *npAcc, int pos)
 // Get the amino acid base in NP_'s sequence at 1-based offset pos.
 {
 char *seq = getProteinSeq(db, npAcc);
-char base = seq[pos-1];
+char base = seq ? seq[pos-1] : '\0';
 freeMem(seq);
 return base;
 }
 
 struct hgvsVariant *hgvsParsePseudoHgvs(char *db, char *term)
 /* Attempt to parse things that are not strict HGVS, but that people might intend as HGVS: */
 // Note: this doesn't support non-coding gene symbol terms (which should have nt alleles)
 {
 struct hgvsVariant *hgvs = NULL;
 regmatch_t substrs[11];
 int geneSymbolIx = 1;
 boolean isSubst;
 if ((isSubst = regexMatchSubstr(term, pseudoHgvsNMPDotSubstExp,
                                      substrs, ArraySize(substrs))) ||
          regexMatchSubstr(term, pseudoHgvsNMPDotRangeExp, substrs, ArraySize(substrs)))
@@ -683,58 +699,62 @@
         *retFoundAcc = cloneFirstWordByDelimiter(hgvs->seqAcc, '.');
     if (retFoundVersion)
         {
         char *p = strchr(hgvs->seqAcc, '.');
         if (p)
             *retFoundVersion = atoi(p+1);
         }
     // Don't free chromInfo, it's cached!
     }
 return coordsOK;
 }
 
 static char *getCdnaSeq(char *db, char *acc)
 /* Return cdna sequence for acc, or NULL if not found. */
 {
+if (trackHubDatabase(db))
+    return NULL;
 char *seq = NULL;
 if (startsWith("LRG_", acc))
     {
     if (hTableExists(db, "lrgCdna"))
         {
         char query[2048];
         sqlSafef(query, sizeof(query), "select seq from lrgCdna where name = '%s'", acc);
         struct sqlConnection *conn = hAllocConn(db);
         seq = sqlQuickString(conn, query);
         hFreeConn(&conn);
         }
     }
 else
     {
     struct dnaSeq *cdnaSeq = NULL;
     if (dbHasNcbiRefSeq(db))
         cdnaSeq = hDnaSeqGet(db, acc, "seqNcbiRefSeq", "extNcbiRefSeq");
     else
         cdnaSeq = hGenBankGetMrna(db, acc, gbSeqTable);
     if (cdnaSeq)
         seq = cdnaSeq->dna;
     }
 return seq;
 }
 
 static boolean getCds(char *db, char *acc, struct genbankCds *retCds)
 /* Get the CDS info for genbank or LRG acc; return FALSE if not found or not applicable. */
 {
+if (trackHubDatabase(db))
+    return FALSE;
 boolean gotCds = FALSE;
 char query[1024];
 if (startsWith("LRG_", acc))
     sqlSafef(query, sizeof(query),
              "select cds from lrgCds where id = '%s'", acc);
 else if (dbHasNcbiRefSeq(db) &&
          // This is a hack to allow us to fall back on refSeqAli if ncbiRefSeqPsl is incomplete:
          strchr(acc, '.'))
     sqlSafef(query, sizeof(query),
              "select cds from ncbiRefSeqCds where id = '%s'", acc);
 else
     sqlSafef(query, sizeof(query),
              "SELECT c.name FROM %s as c, %s as g WHERE (g.acc = '%s') AND "
              "(g.cds != 0) AND (g.cds = c.id)", cdsTable, gbCdnaInfoTable, acc);
 struct sqlConnection *conn = hAllocConn(db);
@@ -795,46 +815,50 @@
         hgvsRefBase = refBaseFromProt(p+1);
         if (hgvsRefBase != '\0')
             {
             int end1 = atoi(skipToNumeric(p+1));
             char seqRefBase = toupper(accSeq[end1-1]);
             if (seqRefBase != hgvsRefBase)
                 *retDiffRefAllele = cloneStringFromChar(seqRefBase);
             }
         }
     }
 }
 
 static int getGbVersion(char *db, char *acc)
 /* Return the local version that we have for acc. */
 {
+if (trackHubDatabase(db))
+    return 0;
 char query[2048];
 sqlSafef(query, sizeof(query), "select version from %s where acc = '%s'", gbSeqTable, acc);
 struct sqlConnection *conn = hAllocConn(db);
 int version = sqlQuickNum(conn, query);
 hFreeConn(&conn);
 return version;
 }
 
 static char *normalizeVersion(char *db, char *acc, int *retFoundVersion)
 /* LRG accessions don't have versions, so just return the same acc and set *retFoundVersion to 0.
  * The user may give us a RefSeq accession with or without a .version.
  * If ncbiRefSeq tables are present, return acc with version, looking up version if necessary.
  * If we look it up and can't find it, this returns NULL.
  * If instead we're using genbank tables, return acc with no version.
  * That ensures that acc will be found in our local tables. */
 {
+if (trackHubDatabase(db))
+    return NULL;
 char *normalizedAcc = NULL;
 int foundVersion = 0;
 if (startsWith("LRG_", acc))
     {
     normalizedAcc = cloneString(acc);
     }
 else if (dbHasNcbiRefSeq(db))
     {
     // ncbiRefSeq tables need versioned accessions.
     if (strchr(acc, '.'))
         normalizedAcc = cloneString(acc);
     else
         {
         char query[2048];
         sqlSafef(query, sizeof(query), "select name from ncbiRefSeq where name like '%s.%%'", acc);
@@ -1073,31 +1097,31 @@
     // variantPsl contains the anchor if a non-cdsStart anchor is used because
     // the actual position might be outside the bounds of the transcript sequence (intron/UTR)
     struct psl *variantPsl = pslFromHgvsNuc(hgvs, acc, txAli->qSize, txAli->qEnd, cds,
                                             retUpstream, retDownstream);
     mappedToGenome = pslTransMap(pslTransMapNoOpts, variantPsl, txAli);
     pslFree(&variantPsl);
     pslFree(&txAli);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return mappedToGenome;
 }
 
 static char *pslTableForAcc(char *db, char *acc)
 /* Based on acc (and whether db has NCBI RefSeq alignments), pick a PSL table where
- * acc should be found.  Don't free the returned string. */
+ * acc should be found (table may or may not exist).  Don't free the returned string. */
 {
 char *pslTable = NULL;
 if (startsWith("LRG_", acc))
     pslTable = "lrgTranscriptAli";
 else if (startsWith("NM_", acc))
     {
     // Use NCBI's alignments if they are available
     if (dbHasNcbiRefSeq(db))
         pslTable = "ncbiRefSeqPsl";
     else
         pslTable = "refSeqAli";
     }
 return pslTable;
 }
 
@@ -1187,33 +1211,35 @@
 
 static struct bed3 *hgvsMapPDotToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable)
 /* Return a bed3 with the variant's span on the genome, or NULL if unable to map.
  * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */
 {
 struct bed3 *region = NULL;
 char *acc = normalizeVersion(db, hgvs->seqAcc, NULL);
 if (acc && startsWith("NP_", acc))
     {
     // Translate the NP_*:p. to NM_*:c. and map NM_*:c. to the genome.
     struct sqlConnection *conn = hAllocConn(db);
     char query[2048];
     if (dbHasNcbiRefSeq(db))
         sqlSafef(query, sizeof(query), "select mrnaAcc from ncbiRefSeqLink where protAcc = '%s'",
                  acc);
-    else
+    else if (hTableExists(db, "refGene"))
         sqlSafef(query, sizeof(query), "select mrnaAcc from %s l, refGene r "
                  "where l.protAcc = '%s' and r.name = l.mrnaAcc", refLinkTable, acc);
+    else
+        return NULL;
     char *nmAcc = sqlQuickString(conn, query);
     hFreeConn(&conn);
     if (nmAcc)
         {
         struct hgvsVariant cDot;
         zeroBytes(&cDot, sizeof(cDot));
         cDot.type = hgvstCoding;
         cDot.seqAcc = nmAcc;
         cDot.start1 = ((hgvs->start1 - 1) * 3) + 1;
         cDot.end = ((hgvs->end - 1) * 3) + 3;
         cDot.change = hgvs->change;
         region = hgvsMapNucToGenome(db, &cDot, retPslTable);
         freeMem(nmAcc);
         }
     }