3e70e7601a08e28667531d5abe3fe8b04ee95ded
chmalee
  Fri Jul 16 11:23:58 2021 -0700
Return multiple transcripts for pseudo hgvs search, refs #15554

diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c
index a3af8af..53b8086 100644
--- src/hg/lib/hgHgvs.c
+++ src/hg/lib/hgHgvs.c
@@ -650,53 +650,53 @@
 
 struct hgvsVariant *hgvsParseTerm(char *term)
 /* If term is a parseable form of HGVS, return the parsed representation, otherwise NULL.
  * This does not check validity of accessions, coordinates or alleles. */
 {
 struct hgvsVariant *hgvs = hgvsParseCNDotPos(term);
 if (hgvs == NULL)
     hgvs = hgvsParsePDotSubst(term);
 if (hgvs == NULL)
     hgvs = hgvsParsePDotRange(term);
 if (hgvs == NULL)
     hgvs = hgvsParseGDotPos(term);
 return hgvs;
 }
 
-static char *npForGeneSymbol(char *db, char *geneSymbol)
-/* Given a gene symbol, look up and return its NP_ accession; if not found return NULL. */
+static struct slName *npForGeneSymbol(char *db, char *geneSymbol)
+/* Given a gene symbol, look up and return a list of NP_ accessions; if not found return NULL. */
 {
 char query[2048];
 if (hDbHasNcbiRefSeq(db))
     {
     sqlSafef(query, sizeof(query), "select protAcc from ncbiRefSeqLink where name = '%s' "
              "and protAcc != 'n/a' and protAcc != '' "
              "order by length(protAcc), protAcc",
              geneSymbol);
     }
 else if (hTableExists(db, "refGene"))
     {
     // Join with refGene to make sure it's an NP for this species.
     sqlSafef(query, sizeof(query), "select l.protAcc from %s l, refGene r "
              "where l.name = '%s' and l.mrnaAcc = r.name "
              "and l.protAcc != '' order by length(l.protAcc), l.protAcc"
              , refLinkTable, geneSymbol);
     }
 else
     return NULL;
 struct sqlConnection *conn = hAllocConn(db);
-char *npAcc = sqlQuickString(conn, query);
+struct slName *npAcc = sqlQuickList(conn, query);
 hFreeConn(&conn);
 return npAcc;
 }
 
 static char *nmForGeneSymbol(char *db, char *geneSymbol)
 /* Given a gene symbol, look up and return its NM_ accession; if not found return NULL. */
 {
 if (trackHubDatabase(db))
     return NULL;
 char query[2048];
 char *geneTable = NULL;
 if (hDbHasNcbiRefSeq(db))
     geneTable = "ncbiRefSeq";
 else if (hTableExists(db, "refGene"))
     geneTable = "refGene";
@@ -872,31 +872,32 @@
         }
     }
 return seq;
 }
 
 static char refBaseForNp(char *db, char *npAcc, int pos)
 // Get the amino acid base in NP_'s sequence at 1-based offset pos.
 {
 char *seq = getProteinSeq(db, npAcc);
 char base = seq ? seq[pos-1] : '\0';
 freeMem(seq);
 return base;
 }
 
 struct hgvsVariant *hgvsParsePseudoHgvs(char *db, char *term)
-/* Attempt to parse things that are not strict HGVS, but that people might intend as HGVS: */
+/* Attempt to parse things that are not strict HGVS, but that people might intend as HGVS:
+ * Return a list of struct hgvsVariant that may be what was intended  */
 // Note: this doesn't support non-coding gene symbol terms (which should have nt alleles)
 {
 struct hgvsVariant *hgvs = NULL;
 regmatch_t substrs[11];
 int geneSymbolIx = 1;
 boolean isSubst;
 if ((isSubst = regexMatchSubstr(term, pseudoHgvsNMPDotSubstExp,
                                      substrs, ArraySize(substrs))) ||
          regexMatchSubstr(term, pseudoHgvsNMPDotRangeExp, substrs, ArraySize(substrs)))
     {
     // User gave an NM_ accession but a protein change -- swap in the right NP_.
     int nmAccIx = 1;
     int geneSymbolIx = 4;
     int len = substrs[nmAccIx].rm_eo - substrs[nmAccIx].rm_so;
     char nmAcc[len+1];
@@ -917,61 +918,71 @@
             }
         else
             npTerm = dyStringCreate("%s:p.%s", npAcc, description);
         hgvs = hgvsParseTerm(npTerm->string);
         dyStringFree(&npTerm);
         freeMem(npAcc);
         }
     }
 else if ((isSubst = regexMatchSubstr(term, pseudoHgvsGeneSymbolProtSubstExp,
                                      substrs, ArraySize(substrs))) ||
          regexMatchSubstr(term, pseudoHgvsGeneSymbolProtRangeExp, substrs, ArraySize(substrs)))
     {
     int len = substrs[geneSymbolIx].rm_eo - substrs[geneSymbolIx].rm_so;
     char geneSymbol[len+1];
     safencpy(geneSymbol, sizeof(geneSymbol), term, len);
-    char *npAcc = npForGeneSymbol(db, geneSymbol);
-    if (isNotEmpty(npAcc))
+    struct slName *npAccList = npForGeneSymbol(db, geneSymbol);
+    if (npAccList != NULL)
+        {
+        struct slName *npItem = NULL;
+        for (npItem = npAccList; npItem != NULL; npItem = npItem->next)
             {
+            char *npAcc = npItem->name;
             // Make it a real HGVS term with the NP and pass that on to the usual parser.
             int descStartIx = 2;
             char *description = term + substrs[descStartIx].rm_so;
             struct dyString *npTerm = dyStringCreate("%s(%s):p.%s",
                                                      npAcc, geneSymbol, description);
-        hgvs = hgvsParseTerm(npTerm->string);
+            slAddHead(&hgvs, hgvsParseTerm(npTerm->string));
             dyStringFree(&npTerm);
-        freeMem(npAcc);
+            //freeMem(npAcc);
+            }
         }
     }
 else if (regexMatchSubstr(term, pseudoHgvsGeneSymbolProtPosExp, substrs, ArraySize(substrs)))
     {
     int len = substrs[geneSymbolIx].rm_eo - substrs[geneSymbolIx].rm_so;
     char geneSymbol[len+1];
     safencpy(geneSymbol, sizeof(geneSymbol), term, len);
-    char *npAcc = npForGeneSymbol(db, geneSymbol);
-    if (isNotEmpty(npAcc))
+    struct slName *npAccList = npForGeneSymbol(db, geneSymbol);
+    if (npAccList != NULL)
+        {
+        struct slName *npItem = NULL;
+        for (npItem = npAccList; npItem != NULL; npItem = npItem->next)
             {
+            char *npAcc = npItem->name;
             // Only position was provided, no change.  Look up ref base and make a synonymous subst
             // so it's parseable HGVS.
             int posIx = 2;
             int pos = regexSubstringInt(term, substrs[posIx]);
             char refBase = refBaseForNp(db, npAcc, pos);
             struct dyString *npTerm = dyStringCreate("%s(%s):p.%c%d=",
                                                      npAcc, geneSymbol, refBase, pos);
-        hgvs = hgvsParseTerm(npTerm->string);
+            slAddHead(&hgvs, hgvsParseTerm(npTerm->string));
             dyStringFree(&npTerm);
-        freeMem(npAcc);
+            //freeMem(npAcc);
+            }
         }
     }
 else if (regexMatchSubstr(term, pseudoHgvsGeneSympolCDotPosExp, substrs, ArraySize(substrs)))
     {
     int len = substrs[geneSymbolIx].rm_eo - substrs[geneSymbolIx].rm_so;
     char geneSymbol[len+1];
     safencpy(geneSymbol, sizeof(geneSymbol), term, len);
     char *nmAcc = nmForGeneSymbol(db, geneSymbol);
     if (isNotEmpty(nmAcc))
         {
         // Make it a real HGVS term with the NM and pass that on to the usual parser.
         int descStartIx = regexSubstrMatched(substrs[2]) ? 2 : 3;
         char *description = term + substrs[descStartIx].rm_so;
         struct dyString *nmTerm = dyStringCreate("%s(%s):c.%s",
                                                  nmAcc, geneSymbol, description);