aaf72102b545c05c42f66b7a3fc22d65b1ecf4fe
angie
  Mon Aug 8 14:12:39 2016 -0700
Added recognition of a small subset of HGVS terms: coding (c.) SNVs relative to RefSeq NM_ or LRG transcript IDs,
and protein (p.) simple substitutions relative to NP_.
Also accepted (not HGVS but similar and popular): geneSymbol and abbreviated protein subst like "ALK G1494E".
hgFind will map terms to the current genome if possible, and will display warnings about unrecognized accessions,
out-of-bounds coordinates and mismatching reference alleles.
refs #15071, #15554

diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c
new file mode 100644
index 0000000..d1913fe
--- /dev/null
+++ src/hg/lib/hgHgvs.c
@@ -0,0 +1,531 @@
+/* hgHgvs.c - Parse the Human Genome Variation Society (HGVS) nomenclature for variants. */
+/* See http://varnomen.hgvs.org/ and https://github.com/mutalyzer/mutalyzer/ */
+
+/* Copyright (C) 2016 The Regents of the University of California
+ * See README in this or parent directory for licensing information. */
+#include "common.h"
+#include "hgHgvs.h"
+#include "genbank.h"
+#include "hdb.h"
+#include "pslTransMap.h"
+#include "regexHelper.h"
+
+// Regular expressions for HGVS-recognized sequence accessions: LRG or versioned RefSeq:
+#define lrgTranscriptExp "(LRG_[0-9]+t[0-9+])"
+#define lrgRegionExp "(LRG_[0-9+])"
+
+// NC = RefSeq reference assembly chromosome
+// NG = RefSeq incomplete genomic region (e.g. gene locus)
+// NM = RefSeq curated mRNA
+// NP = RefSeq curated protein
+// NR = RefSeq curated (non-coding) RNA
+#define geneSymbolExp "([A-Z0-9./_-]+)"
+#define optionalGeneSymbolExp "(\\(" geneSymbolExp "\\))?"
+#define versionedAccPrefixExp(p) "(" p "_[0-9]+(\\.[0-9]+)?)" optionalGeneSymbolExp
+//                                 ........................       accession and optional dot version
+//                                              ...........       optional dot version
+//                                                         ...... optional gene symbol in ()s
+//                                                          ....  optional gene symbol
+#define versionedRefSeqNCExp versionedAccPrefixExp("NC")
+#define versionedRefSeqNGExp versionedAccPrefixExp("NG")
+#define versionedRefSeqNMExp versionedAccPrefixExp("NM")
+#define versionedRefSeqNPExp versionedAccPrefixExp("NP")
+#define versionedRefSeqNRExp versionedAccPrefixExp("NR")
+
+// Nucleotide substitution regexes
+// (c. = CDS, g. = genomic, m. = mitochondrial, n.= non-coding RNA, r. = RNA)
+#define hgvsNtSubstExp "([0-9]+)([ACGT]+)>([ACGT]+)"
+//                       ......                     1-based position
+//                               .......            original sequence
+//                                         .......  replacement sequence
+#define hgvsCDotSubstExp "c\\." hgvsNtSubstExp
+#define hgvsGDotSubstExp "g\\." hgvsNtSubstExp
+#define hgvsMDotSubstExp "m\\." hgvsNtSubstExp
+#define hgvsNDotSubstExp "n\\." hgvsNtSubstExp
+#define hgvsRDotSubstExp "r\\." hgvsNtSubstExp
+
+// Protein substitution regex
+#define AA3 "Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val|Ter"
+#define hgvsAminoAcidExp "([ARNDCQEGHILKMFPSTWYVX*]|" AA3 ")"
+#define hgvsAminoAcidSubstExp hgvsAminoAcidExp "([0-9]+)" hgvsAminoAcidExp
+#define hgvsPDotSubstExp "p\\." hgvsAminoAcidSubstExp
+//                                 ...                                  // original sequence
+//                                              ......                  // 1-based position
+//                                                           ...        // replacement sequence
+
+
+// Complete HGVS term regexes combining sequence identifiers and change operations
+#define hgvsFullRegex(seq, op) "^" seq ":" op "$"
+
+#define hgvsLrgCDotSubstExp hgvsFullRegex(lrgTranscriptExp, hgvsCDotSubstExp)
+// substring numbering:
+//      0.....................................................  whole matching string
+//      1...................                                    LRG transcript
+//                                   2.....                     1-based position
+//                                           3......            original sequence
+//                                                     4......  replacement sequence
+
+#define hgvsRefSeqNMCDotSubstExp hgvsFullRegex(versionedRefSeqNMExp, hgvsCDotSubstExp)
+// substring numbering:
+//      0.....................................................  whole matching string
+//      1...............                                        accession and optional dot version
+//             2........                                        optional dot version
+//                         3.....                               optional gene symbol in ()s
+//                          4...                                optional gene symbol
+//                                   5.....                     1-based position
+//                                           6......            original sequence
+//                                                     7......  replacement sequence
+
+#define hgvsRefSeqNPPDotSubstExp hgvsFullRegex(versionedRefSeqNPExp, hgvsPDotSubstExp)
+// substring numbering:
+//      0.....................................................  whole matching string
+//      1................                                       accession and optional dot version
+//                 2.....                                       optional dot version
+//                         3.....                               optional gene symbol in ()s
+//                          4...                                optional gene symbol
+//                                   5.....                     original sequence
+//                                           6......            1-based position
+//                                                     7......  replacement sequence
+
+// Pseudo-HGVS in common usage
+#define pseudoHgvsGeneSymbolSubstExp "^" geneSymbolExp "[: ]" hgvsAminoAcidSubstExp
+//      0.....................................................  whole matching string
+//      1...................                                    gene symbol
+//                                   2.....                     original sequence
+//                                           3......            1-based position
+//                                                     4......  replacement sequence
+
+static struct hgvsVariant *hgvsParseCDotSubst(char *term)
+/* If term is parseable as an HGVS CDS substitution, return the parsed representation,
+ * otherwise NULL. */
+{
+struct hgvsVariant *hgvs = NULL;
+boolean matches = FALSE;
+int accIx = 1;
+int posIx = 0;
+int refIx = 0;
+int altIx = 0;
+int geneSymbolIx = -1;
+regmatch_t substrs[8];
+if (regexMatchSubstrNoCase(term, hgvsLrgCDotSubstExp, substrs, ArraySize(substrs)))
+    {
+    matches = TRUE;
+    posIx = 2;
+    refIx = 3;
+    altIx = 4;
+    }
+else if (regexMatchSubstrNoCase(term, hgvsRefSeqNMCDotSubstExp, substrs, ArraySize(substrs)))
+    {
+    matches = TRUE;
+    geneSymbolIx = 4;
+    posIx = 5;
+    refIx = 6;
+    altIx = 7;
+    }
+if (matches)
+    {
+    AllocVar(hgvs);
+    hgvs->type = hgvstCoding;
+    hgvs->changeSymbol = cloneString(">");
+    hgvs->seqAcc = regexSubstringClone(term, substrs[accIx]);
+    hgvs->start1 = regexSubstringInt(term, substrs[posIx]);
+    hgvs->refAllele = regexSubstringClone(term, substrs[refIx]);
+    hgvs->altAllele = regexSubstringClone(term, substrs[altIx]);
+    if (geneSymbolIx >= 0 && regexSubstrMatched(substrs[geneSymbolIx]))
+        hgvs->seqGeneSymbol = regexSubstringClone(term, substrs[geneSymbolIx]);
+    hgvs->end = hgvs->start1 - 1 + strlen(hgvs->refAllele);
+    }
+return hgvs;
+}
+
+static char *aaAbbrsToLetters(char *aaStr)
+/* If the input string is composed of AA abbreviations like "Ala", "Asp" etc., convert them to
+ * single letter AAs like "A", "D" etc.  Otherwise return a copy of aaStr. */
+{
+char *letters = NULL;
+if (regexMatch(aaStr, "^(" AA3 ")+$"))
+    {
+    int len = strlen(aaStr) / 3;
+    letters = needMem(len + 1);
+    int i;
+    for (i = 0;  i < len;  i++)
+        letters[i] = aaAbbrToLetter(&aaStr[i*3]);
+    }
+else
+    letters = cloneString(aaStr);
+return letters;
+}
+
+static struct hgvsVariant *hgvsParsePDotSubst(char *term)
+/* If term is parseable as an HGVS protein substitution, return the parsed representation,
+ * otherwise NULL. */
+{
+struct hgvsVariant *hgvs = NULL;
+regmatch_t substrs[8];
+if (regexMatchSubstrNoCase(term, hgvsRefSeqNPPDotSubstExp, substrs, ArraySize(substrs)))
+    {
+    int accIx = 1;
+    int geneSymbolIx = 4;
+    int refIx = 5;
+    int posIx = 6;
+    int altIx = 7;
+    AllocVar(hgvs);
+    hgvs->type = hgvstProtein;
+    hgvs->seqAcc = regexSubstringClone(term, substrs[accIx]);
+    if (regexSubstrMatched(substrs[geneSymbolIx]))
+        hgvs->seqGeneSymbol = regexSubstringClone(term, substrs[geneSymbolIx]);
+    hgvs->start1 = regexSubstringInt(term, substrs[posIx]);
+    // Convert 3-letter abbreviations to single-letter codes like "Ala" -> "A",
+    // so strlen(allele) is the number of amino acids.
+    char buf[2048];
+    regexSubstringCopy(term, substrs[refIx], buf, sizeof(buf));
+    hgvs->refAllele = aaAbbrsToLetters(buf);
+    regexSubstringCopy(term, substrs[altIx], buf, sizeof(buf));
+    hgvs->altAllele = aaAbbrsToLetters(buf);
+    hgvs->end = hgvs->start1 - 1 + strlen(hgvs->refAllele);
+    }
+return hgvs;
+}
+
+struct hgvsVariant *hgvsParseTerm(char *term)
+/* If term is a parseable form of HGVS, return the parsed representation, otherwise NULL.
+ * This does not check validity of accessions or alleles. */
+{
+struct hgvsVariant *hgvs = hgvsParseCDotSubst(term);
+if (hgvs == NULL)
+    hgvs = hgvsParsePDotSubst(term);
+return hgvs;
+}
+
+struct hgvsVariant *hgvsParsePseudoHgvs(char *db, char *term)
+/* Attempt to parse things that are not strict HGVS, but that people might intend as HGVS: */
+{
+struct hgvsVariant *hgvs = NULL;
+regmatch_t substrs[5];
+if (regexMatchSubstrNoCase(term, pseudoHgvsGeneSymbolSubstExp, substrs, ArraySize(substrs)))
+    {
+    int geneSymbolIx = 1;
+    int refIx = 2;
+    int posIx = 3;
+    int altIx = 4;
+    char *geneSymbol = regexSubstringClone(term, substrs[geneSymbolIx]);
+    // Try to find an NP_ for geneSymbol, using refGene to make sure it's an NP for this species.
+    struct sqlConnection *conn = hAllocConn(db);
+    char query[2048];
+    sqlSafef(query, sizeof(query), "select l.protAcc from %s l, refGene r "
+             "where l.name = '%s' and l.mrnaAcc = r.name "
+             "and l.protAcc != '' order by l.protAcc"
+             , refLinkTable, geneSymbol);
+    char *npAcc = sqlQuickString(conn, query);
+    hFreeConn(&conn);
+    if (isNotEmpty(npAcc))
+        {
+        AllocVar(hgvs);
+        hgvs->type = hgvstProtein;
+        hgvs->seqAcc = npAcc;
+        hgvs->seqGeneSymbol = geneSymbol;
+        hgvs->start1 = regexSubstringInt(term, substrs[posIx]);
+        // Convert 3-letter abbreviations to single-letter codes like "Ala" -> "A",
+        // so strlen(allele) is the number of amino acids.
+        char buf[2048];
+        regexSubstringCopy(term, substrs[refIx], buf, sizeof(buf));
+        hgvs->refAllele = aaAbbrsToLetters(buf);
+        regexSubstringCopy(term, substrs[altIx], buf, sizeof(buf));
+        hgvs->altAllele = aaAbbrsToLetters(buf);
+        int refLen = strlen(hgvs->refAllele);
+        hgvs->end = hgvs->start1 - 1 + refLen;
+        }
+    // Note: this doesn't support non-coding gene symbol substs (which should have nt alleles)
+    }
+return hgvs;
+}
+
+static char *getCdnaSeq(char *db, char *acc,  char **retFoundAcc)
+/* Return cdna sequence for acc, or NULL if not found.  If retFoundAcc is not NULL,
+ * set it to the accession for which we grabbed seq (might have a .version chopped off). */
+{
+char *seq = NULL;
+char *foundAcc = NULL;
+if (startsWith("LRG_", acc))
+    {
+    if (hTableExists(db, "lrgCdna"))
+        {
+        char query[2048];
+        sqlSafef(query, sizeof(query), "select seq from lrgCdna where name = '%s'", acc);
+        struct sqlConnection *conn = hAllocConn(db);
+        seq = sqlQuickString(conn, query);
+        hFreeConn(&conn);
+        if (seq)
+            foundAcc = cloneString(acc);
+        }
+    }
+else
+    {
+    char *trimmedAcc = cloneFirstWordByDelimiter(acc, '.');
+    struct dnaSeq *gbSeq = hGenBankGetMrna(db, trimmedAcc, gbSeqTable);
+    if (gbSeq)
+        {
+        seq = gbSeq->dna;
+        foundAcc = trimmedAcc;
+        }
+    }
+if (retFoundAcc)
+    *retFoundAcc = foundAcc;
+return seq;
+}
+
+static char *getProteinSeq(char *db, char *acc,  char **retFoundAcc)
+/* Return amino acid sequence for acc, or NULL if not found.  If retFoundAcc is not NULL,
+ * set it to the accession for which we grabbed seq (might have a .version chopped off). */
+{
+char *seq = NULL;
+char *foundAcc = NULL;
+if (startsWith("LRG_", acc))
+    {
+    if (hTableExists(db, "lrgPep"))
+        {
+        char query[2048];
+        sqlSafef(query, sizeof(query), "select seq from lrgPep where name = '%s'", acc);
+        struct sqlConnection *conn = hAllocConn(db);
+        seq = sqlQuickString(conn, query);
+        hFreeConn(&conn);
+        if (seq)
+            foundAcc = cloneString(acc);
+        }
+    }
+else
+    {
+    char *trimmedAcc = cloneFirstWordByDelimiter(acc, '.');
+    aaSeq *aaSeq = hGenBankGetPep(db, trimmedAcc, gbSeqTable);
+    if (aaSeq)
+        {
+        seq = aaSeq->dna;
+        foundAcc = trimmedAcc;
+        }
+    }
+if (retFoundAcc)
+    *retFoundAcc = foundAcc;
+return seq;
+}
+
+static int getCdsStart(char *db, char *acc)
+/* Return the start coordinate for CDS in genbank or LRG acc, or -1 if not found. */
+{
+int cdsStart = -1;
+char query[1024];
+if (startsWith("LRG_", acc))
+    sqlSafef(query, sizeof(query),
+             "select cds from lrgCds where id = '%s'", acc);
+else
+    sqlSafef(query, sizeof(query),
+             "SELECT c.name FROM %s as c, %s as g WHERE (g.acc = '%s') AND "
+             "(g.cds != 0) AND (g.cds = c.id)", cdsTable, gbCdnaInfoTable, acc);
+struct sqlConnection *conn = hAllocConn(db);
+char cdsBuf[2048];
+char *cdsStr = sqlQuickQuery(conn, query, cdsBuf, sizeof(cdsBuf));
+hFreeConn(&conn);
+if (isNotEmpty(cdsStr))
+    {
+    struct genbankCds cds;
+    if (genbankCdsParse(cdsStr, &cds) && cds.startComplete && cds.start != cds.end)
+        cdsStart = cds.start;
+    }
+return cdsStart;
+}
+
+static int getGbVersion(char *db, char *acc)
+/* Return the local version that we have for acc. */
+{
+char query[2048];
+sqlSafef(query, sizeof(query), "select version from %s where acc = '%s'", gbSeqTable, acc);
+struct sqlConnection *conn = hAllocConn(db);
+int version = sqlQuickNum(conn, query);
+hFreeConn(&conn);
+return version;
+}
+
+boolean hgvsValidate(char *db, struct hgvsVariant *hgvs, char **retFoundAcc, int *retFoundVersion,
+                     char **retDiffRefAllele)
+/* Return TRUE if hgvs coords are within the bounds of the sequence for hgvs->seqAcc.
+ * If retFoundAcc is not NULL, set it to our local accession (which may be missing the .version
+ * of hgvs->seqAcc) or NULL if we can't find any match.
+ * If retFoundVersion is not NULL and hgvs->seqAcc has a version number (e.g. NM_005957.4),
+ * set retFoundVersion to our version from latest GenBank, otherwise 0 (no version for LRG).
+ * If coords are OK and retDiffRefAllele is not NULL: if our sequence at the coords
+ * matches hgvs->refAllele then set it to NULL; if mismatch then set it to our sequence. */
+{
+boolean coordsOK = FALSE;
+char *acc = hgvs->seqAcc;
+char *foundAcc = NULL;
+char *accSeq = NULL;
+int cdsOffset = 0;
+int refLen = strlen(hgvs->refAllele);
+if (hgvs->type == hgvstProtein)
+    accSeq = getProteinSeq(db, acc, &foundAcc);
+else
+    accSeq = getCdnaSeq(db, acc, &foundAcc);
+if (retFoundVersion)
+    {
+    *retFoundVersion = 0;
+    if (foundAcc && ! startsWith("LRG_", foundAcc))
+        *retFoundVersion = getGbVersion(db, foundAcc);
+    }
+if (hgvs->type == hgvstCoding && foundAcc)
+    cdsOffset = getCdsStart(db, foundAcc);
+if (accSeq && cdsOffset >= 0 &&
+    (cdsOffset + hgvs->start1 - 1 + refLen) <= strlen(accSeq))
+    {
+    coordsOK = TRUE;
+    }
+if (coordsOK && retDiffRefAllele)
+    {
+    char *ourSeq = cloneStringZ(accSeq + cdsOffset + hgvs->start1 - 1, refLen);
+    toUpperN(ourSeq, refLen);
+    if (sameString(hgvs->refAllele, ourSeq))
+        *retDiffRefAllele = NULL;
+    else
+        *retDiffRefAllele = ourSeq;
+    }
+if (retFoundAcc)
+    *retFoundAcc = foundAcc;
+return coordsOK;
+}
+
+static struct psl *pslFromHgvs(struct hgvsVariant *hgvs, int accSize, int cdsStart)
+/* Allocate and return a PSL modeling the variant in transcript named acc. */
+{
+if (hgvs == NULL)
+    return NULL;
+struct psl *psl;
+AllocVar(psl);
+int refLen = strlen(hgvs->refAllele);
+int altLen = strlen(hgvs->altAllele);
+struct dyString *dy = dyStringCreate("%s%s%s",
+                                     hgvs->refAllele, hgvs->changeSymbol, hgvs->altAllele);
+psl->qName = dyStringCannibalize(&dy);
+psl->qSize = altLen;
+psl->qStart = 0;
+psl->qEnd = altLen;
+psl->tName = cloneFirstWordByDelimiter(hgvs->seqAcc, '.');
+psl->tSize = accSize;
+safecpy(psl->strand, sizeof(psl->strand), "+");
+psl->tStart = cdsStart + hgvs->start1 - 1;
+psl->tEnd = psl->tStart + refLen;
+psl->blockCount = 1;
+AllocArray(psl->blockSizes, psl->blockCount);
+AllocArray(psl->qStarts, psl->blockCount);
+AllocArray(psl->tStarts, psl->blockCount);
+psl->blockSizes[0] = refLen <= altLen ? refLen : altLen;
+psl->qStarts[0] = psl->qStart;
+psl->tStarts[0] = psl->tStart;
+return psl;
+}
+
+static struct psl *hgvsMapCDotToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable)
+/* Return a psl with target coords from db, mapping the variant to the genome.
+ * Return NULL if unable to map.
+ * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */
+{
+struct psl *mappedToGenome = NULL;
+char *trackTable = NULL;
+char *pslTable = NULL;
+char *acc = cloneFirstWordByDelimiter(hgvs->seqAcc, '.');
+int cdsStart = -1;
+if (startsWith("NM_", acc))
+    {
+    // TODO: replace these with NCBI's alignments
+    trackTable = "refGene";
+    pslTable = "refSeqAli";
+    }
+else if (startsWith("LRG_", acc))
+    {
+    trackTable = "lrgTranscriptAli";
+    pslTable = "lrgTranscriptAli";
+    }
+cdsStart = getCdsStart(db, acc);
+if (trackTable && pslTable && cdsStart >= 0 &&
+    hTableExists(db, trackTable) && hTableExists(db, pslTable))
+    {
+    struct dyString *dyQuery = sqlDyStringCreate("select * from %s where qName = '%s'",
+                                                 pslTable, acc);
+    struct sqlConnection *conn = hAllocConn(db);
+    struct sqlResult *sr = sqlGetResult(conn, dyQuery->string);
+    int bin = hIsBinned(db, pslTable);
+    char **row;
+    if (sr && (row = sqlNextRow(sr)))
+        {
+        struct psl *txAli = pslLoad(row+bin);
+        struct psl *variantPsl = pslFromHgvs(hgvs, txAli->qSize, cdsStart);
+        mappedToGenome = pslTransMap(pslTransMapNoOpts, variantPsl, txAli);
+        }
+    sqlFreeResult(&sr);
+    hFreeConn(&conn);
+    }
+if (mappedToGenome && retPslTable)
+    *retPslTable = cloneString(pslTable);
+return mappedToGenome;
+}
+
+static char *aaToArbitraryCodons(char *aaStr)
+/* Translate amino acid string into codon string where we just take the first codon we
+ * find for each amino. */
+{
+int aaLen = strlen(aaStr);
+char *codonStr = needMem(aaLen*3 + 1);
+int i;
+for (i = 0;  i < aaLen;  i++)
+    aaToArbitraryCodon(aaStr[i], &codonStr[i*3]);
+return codonStr;
+}
+
+static struct psl *hgvsMapPDotToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable)
+/* Return a psl with target coords from db, mapping the variant to the genome.
+ * Return NULL if unable to map.
+ * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */
+{
+struct psl *mappedToGenome = NULL;
+char *acc = cloneFirstWordByDelimiter(hgvs->seqAcc, '.');
+if (startsWith("NP_", acc))
+    {
+    // Translate the NP_*:p. to NM_*:c. and map NM_*:c. to the genome.
+    struct sqlConnection *conn = hAllocConn(db);
+    char query[2048];
+    sqlSafef(query, sizeof(query), "select mrnaAcc from %s l, refGene r "
+          "where l.protAcc = '%s' and r.name = l.mrnaAcc", refLinkTable, acc);
+    char *nmAcc = sqlQuickString(conn, query);
+    hFreeConn(&conn);
+    if (nmAcc)
+        {
+        struct hgvsVariant cDot;
+        zeroBytes(&cDot, sizeof(cDot));
+        cDot.type = hgvstCoding;
+        cDot.seqAcc = nmAcc;
+        cDot.changeSymbol = ">";
+        // These aren't necessarily right since AA doesn't have as much info as codon.
+        // We could get the refAllele right using the reference sequence -- but altAllele
+        // could still be ambiguous.  In the meantime we're not really using the alleles
+        // for anything besides length so far...
+        cDot.refAllele = aaToArbitraryCodons(hgvs->refAllele);
+        cDot.altAllele = aaToArbitraryCodons(hgvs->altAllele);
+        cDot.start1 = ((hgvs->start1 - 1) * 3) + 1;
+        cDot.end = ((hgvs->end - 1) * 3) + 3;
+        mappedToGenome = hgvsMapCDotToGenome(db, &cDot, retPslTable);
+        }
+    }
+return mappedToGenome;
+}
+
+struct psl *hgvsMapToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable)
+/* Return a psl with target coords from db, mapping the variant to the genome.
+ * Return NULL if unable to map.
+ * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */
+{
+if (hgvs == NULL || !hgvsValidate(db, hgvs, NULL, NULL, NULL))
+    return NULL;
+struct psl *mappedToGenome = NULL;
+if (hgvs->type == hgvstCoding)
+    mappedToGenome = hgvsMapCDotToGenome(db, hgvs, retPslTable);
+else if (hgvs->type == hgvstProtein)
+    mappedToGenome = hgvsMapPDotToGenome(db, hgvs, retPslTable);
+return mappedToGenome;
+}