37adcffad6e119b07187ac7659654277a7347450
angie
  Fri Oct 27 13:39:28 2017 -0700
Add support for LRG genomic and protein HGVS search terms.

diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c
index ed54650..e3f24a6 100644
--- src/hg/lib/hgHgvs.c
+++ src/hg/lib/hgHgvs.c
@@ -1,46 +1,49 @@
 /* hgHgvs.c - Parse the Human Genome Variation Society (HGVS) nomenclature for variants. */
 /* See http://varnomen.hgvs.org/ and https://github.com/mutalyzer/mutalyzer/ */
 
 /* Copyright (C) 2016 The Regents of the University of California
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "hgHgvs.h"
+#include "bPlusTree.h"
 #include "chromInfo.h"
 #include "genbank.h"
 #include "hdb.h"
 #include "indelShift.h"
+#include "lrg.h"
 #include "pslTransMap.h"
 #include "regexHelper.h"
 #include "trackHub.h"
 
 void hgvsVariantFree(struct hgvsVariant **pHgvs)
 // Free *pHgvs and its contents, and set *pHgvs to NULL.
 {
 if (pHgvs && *pHgvs)
     {
     struct hgvsVariant *hgvs = *pHgvs;
     freez(&hgvs->seqAcc);
     freez(&hgvs->seqGeneSymbol);
     freez(&hgvs->changes);
     freez(pHgvs);
     }
 }
 
 // Regular expressions for HGVS-recognized sequence accessions: LRG or versioned RefSeq:
-#define lrgTranscriptExp "(LRG_[0-9]+t[0-9+])"
-#define lrgRegionExp "(LRG_[0-9+])"
+#define lrgTranscriptExp "(LRG_[0-9]+t[0-9]+)"
+#define lrgProteinExp "(LRG_[0-9]+p[0-9]+)"
+#define lrgRegionExp "(LRG_[0-9]+)"
 
 // NC = RefSeq reference assembly chromosome
 // NG = RefSeq incomplete genomic region (e.g. gene locus)
 // NM = RefSeq curated mRNA
 // NP = RefSeq curated protein
 // NR = RefSeq curated (non-coding) RNA
 #define geneSymbolExp "([A-Za-z0-9./_-]+)"
 #define optionalGeneSymbolExp "(\\(" geneSymbolExp "\\))?"
 #define versionedAccPrefixExp(p) "(" p "_[0-9]+(\\.[0-9]+)?)" optionalGeneSymbolExp
 //                                 ........................       accession and optional dot version
 //                                              ...........       optional dot version
 //                                                         ...... optional gene symbol in ()s
 //                                                          ....  optional gene symbol
 #define versionedRefSeqNCExp versionedAccPrefixExp("[NX]C")
 #define versionedRefSeqNGExp versionedAccPrefixExp("[NX]G")
@@ -91,30 +94,42 @@
 //                                                           ...        // replacement sequence
 
 // Protein range (or just single pos) regex
 #define hgvsAaRangeExp "(" hgvsAminoAcidExp ")" posIntExp "(_(" hgvsAminoAcidExp ")" posIntExp ")?(.*)"
 #define hgvsPDotRangeExp "p\\.\\(?" hgvsAaRangeExp "\\)?"
 //  original start AA           ...
 //  1-based start position                       ...
 //  optional range sep and AA+pos                          .....................................
 //  original end AA                                              ...
 //  1-based end position                                                          ...
 //  change description                                                                    ...
 
 // Complete HGVS term regexes combining sequence identifiers and change operations
 #define hgvsFullRegex(seq, op) "^" seq "[ :]+" op
 
+#define hgvsLrgGDotPosExp hgvsFullRegex(lrgRegionExp, hgvsGMDotPosExp)
+#define hgvsLrgGDotExp hgvsLrgGDotPosExp "(.*)"
+// substring numbering:
+//      0.....................................  whole matching string
+//      1.......................                LRG region
+//                              2.              g or m
+//                                3..           1-based start position
+//                                   4...       optional range separator and end position
+//                                     5..      1-based end position
+//                                        6...  change description
+
+
 #define hgvsRefSeqNCGDotPosExp hgvsFullRegex(versionedRefSeqNCExp, hgvsGMDotPosExp)
 #define hgvsRefSeqNCGDotExp hgvsRefSeqNCGDotPosExp "(.*)"
 // substring numbering:
 //      0.....................................  whole matching string
 //      1.................                      accession and optional dot version
 //               2........                      optional dot version
 //                       3......                (n/a) optional gene symbol in ()s
 //                        4....                 (n/a) optional gene symbol
 //                              5.              g or m
 //                                6..           1-based start position
 //                                   7...       optional range separator and end position
 //                                     8..      1-based end position
 //                                        9...  change description
 
 #define hgvsLrgCDotPosExp hgvsFullRegex(lrgTranscriptExp, hgvsCDotPosExp)
@@ -181,41 +196,60 @@
 //                       3.....                                           optional gene sym in ()s
 //                        4...                                            optional gene symbol
 //                              5...                                      n/a 4 n.: UTR anchor
 //                                  6...                                  mandatory first number
 //                                      7.......                          optional offset
 //                                      8...                              offset separator
 //                                          9...                          offset number
 //                                             10...............          optional range
 //                                             11...                      n/a 4 n.: UTR anchor
 //                                                12...                   first number
 //                                                     13.......          optional offset
 //                                                     14...              offset separator
 //                                                         15...          offset number
 //                                                             16........ sequence change
 
+#define hgvsLrgPDotSubstExp hgvsFullRegex(lrgProteinExp, hgvsPDotSubstExp)
+// substring numbering:
+//      0.....................................................  whole matching string
+//      1............................                           LRG protein
+//                                   2.....                     original sequence
+//                                           3......            1-based position
+//                                                     4......  replacement sequence
+
 #define hgvsRefSeqNPPDotSubstExp hgvsFullRegex(versionedRefSeqNPExp, hgvsPDotSubstExp)
 // substring numbering:
 //      0.....................................................  whole matching string
 //      1................                                       accession and optional dot version
 //                 2.....                                       optional dot version
 //                         3.....                               optional gene symbol in ()s
 //                          4...                                optional gene symbol
 //                                   5.....                     original sequence
 //                                           6......            1-based position
 //                                                     7......  replacement sequence
 
+#define hgvsLrgPDotRangeExp hgvsFullRegex(lrgProteinExp, hgvsPDotRangeExp)
+// substring numbering:
+//      0.....................................................  whole matching string
+//      1..........................                             accession and optional dot version
+//                                 2...                         original start AA
+//                                       3...                   1-based start position
+//                                          4.................  optional range sep and AA+pos
+//                                            5...              original end AA
+//                                                 6...         1-based end position
+//                                                     7......  change description
+
 #define hgvsRefSeqNPPDotRangeExp hgvsFullRegex(versionedRefSeqNPExp, hgvsPDotRangeExp)
 // substring numbering:
 //      0.....................................................  whole matching string
 //      1................                                       accession and optional dot version
 //                 2.....                                       optional dot version
 //                         3.....                               optional gene symbol in ()s
 //                          4...                                optional gene symbol
 //                                 5...                         original start AA
 //                                       6...                   1-based start position
 //                                          7.................  optional range sep and AA+pos
 //                                            8...              original end AA
 //                                                 9...         1-based end position
 //                                                     10.....  change description
 
 // Pseudo-HGVS in common usage
@@ -283,37 +317,50 @@
 //      1...................                                    gene symbol
 //                           2.....                             1-based position
 
 
 // Gene symbol, maybe punctuation, and a clear "c." position (and possibly change)
 #define pseudoHgvsGeneSympolCDotPosExp "^" geneSymbolExp "[: ]+" hgvsCDotPosExp
 //      0.....................................................  whole matching string
 //      1...................                                    gene symbol
 //                           2.....                             optional beginning of position exp
 //                                   3.....                     beginning of position exp
 
 static struct hgvsVariant *hgvsParseGDotPos(char *term)
 /* If term is parseable as an HGVS NC_...g. term, return the parsed representation, else NULL. */
 {
 struct hgvsVariant *hgvs = NULL;
+int accIx = 1;
+int startPosIx = 3;
+int endPosIx = 5;
+int changeIx = 6;
+boolean matches = FALSE;
 regmatch_t substrs[17];
-if (regexMatchSubstr(term, hgvsRefSeqNCGDotExp, substrs, ArraySize(substrs)))
+if (regexMatchSubstr(term, hgvsLrgGDotExp, substrs, ArraySize(substrs)))
+    matches = TRUE;
+else if (regexMatchSubstr(term, hgvsRefSeqNCGDotExp, substrs, ArraySize(substrs)))
+    {
+    matches = TRUE;
+    // The LRG accession regex has only one substring but the RefSeq acc regex has 4, so that
+    // affects all substring offsets after the accession.
+    int refSeqExtra = 3;
+    startPosIx += refSeqExtra;
+    endPosIx += refSeqExtra;
+    changeIx += refSeqExtra;
+    }
+if (matches)
     {
-    int accIx = 1;
-    int startPosIx = 6;
-    int endPosIx = 8;
-    int changeIx = 9;
     AllocVar(hgvs);
     // HGVS recommendation May 2017: replace m. with g. since mitochondrion is genomic too
     hgvs->type = hgvstGenomic;
     hgvs->seqAcc = regexSubstringClone(term, substrs[accIx]);
     hgvs->start1 = regexSubstringInt(term, substrs[startPosIx]);
     if (regexSubstrMatched(substrs[endPosIx]))
         hgvs->end = regexSubstringInt(term, substrs[endPosIx]);
     else
         hgvs->end = hgvs->start1;
     hgvs->changes = regexSubstringClone(term, substrs[changeIx]);
     }
 return hgvs;
 }
 
 static void extractComplexNum(char *term, regmatch_t *substrs, int substrOffset,
@@ -405,79 +452,108 @@
         {
         hgvs->end = hgvs->start1;
         hgvs->endIsUtr3 = hgvs->startIsUtr3;
         hgvs->endOffset = hgvs->startOffset;
         }
     hgvs->changes = regexSubstringClone(term, substrs[changeIx]);
     }
 return hgvs;
 }
 
 static struct hgvsVariant *hgvsParsePDotSubst(char *term)
 /* If term is parseable as an HGVS protein substitution, return the parsed representation,
  * otherwise NULL. */
 {
 struct hgvsVariant *hgvs = NULL;
+boolean matches = FALSE;
+int accIx = 1;
+int refIx = 2;
+int posIx = 3;
+int altIx = 4;
+int geneSymbolIx = -1;
 regmatch_t substrs[8];
-if (regexMatchSubstr(term, hgvsRefSeqNPPDotSubstExp, substrs, ArraySize(substrs)))
+if (regexMatchSubstr(term, hgvsLrgPDotSubstExp, substrs, ArraySize(substrs)))
+    matches = TRUE;
+else if (regexMatchSubstr(term, hgvsRefSeqNPPDotSubstExp, substrs, ArraySize(substrs)))
+    {
+    matches = TRUE;
+    // The LRG accession regex has only one substring but the RefSeq acc regex has 4, so that
+    // affects all substring offsets after the accession.
+    int refSeqExtra = 3;
+    refIx += refSeqExtra;
+    posIx += refSeqExtra;
+    altIx += refSeqExtra;
+    geneSymbolIx = 4;
+    }
+if (matches)
     {
-    int accIx = 1;
-    int geneSymbolIx = 4;
-    int refIx = 5;
-    int posIx = 6;
-    int altIx = 7;
     AllocVar(hgvs);
     hgvs->type = hgvstProtein;
     hgvs->seqAcc = regexSubstringClone(term, substrs[accIx]);
     int changeStart = substrs[refIx].rm_so;
     int changeEnd = substrs[altIx].rm_eo;
     int len = changeEnd - changeStart;
     char change[len+1];
     safencpy(change, sizeof(change), term+changeStart, len);
     hgvs->changes = cloneString(change);
-    if (regexSubstrMatched(substrs[geneSymbolIx]))
+    if (geneSymbolIx >= 0 && regexSubstrMatched(substrs[geneSymbolIx]))
         hgvs->seqGeneSymbol = regexSubstringClone(term, substrs[geneSymbolIx]);
     hgvs->start1 = regexSubstringInt(term, substrs[posIx]);
     hgvs->end = hgvs->start1;
     }
 return hgvs;
 }
 
 static struct hgvsVariant *hgvsParsePDotRange(char *term)
 /* If term is parseable as an HGVS protein range change, return the parsed representation,
  * otherwise NULL. */
 {
 struct hgvsVariant *hgvs = NULL;
+boolean matches = FALSE;
+int accIx = 1;
+int startRefIx = 2;
+int startPosIx = 3;
+int endPosIx = 6;
+int changeIx = 7;
+int geneSymbolIx = -1;
 regmatch_t substrs[11];
-if (regexMatchSubstr(term, hgvsRefSeqNPPDotRangeExp, substrs, ArraySize(substrs)))
+if (regexMatchSubstr(term, hgvsLrgPDotRangeExp, substrs, ArraySize(substrs)))
+    matches = TRUE;
+else if (regexMatchSubstr(term, hgvsRefSeqNPPDotRangeExp, substrs, ArraySize(substrs)))
+    {
+    matches = TRUE;
+    // The LRG accession regex has only one substring but the RefSeq acc regex has 4, so that
+    // affects all substring offsets after the accession.
+    int refSeqExtra = 3;
+    startRefIx += refSeqExtra;
+    startPosIx += refSeqExtra;
+    endPosIx += refSeqExtra;
+    changeIx += refSeqExtra;
+    geneSymbolIx = 4;
+    }
+if (matches)
     {
-    int accIx = 1;
-    int geneSymbolIx = 4;
-    int startRefIx = 5;
-    int startPosIx = 6;
-    int endPosIx = 9;
-    int changeIx = 10;
     AllocVar(hgvs);
     hgvs->type = hgvstProtein;
     hgvs->seqAcc = regexSubstringClone(term, substrs[accIx]);
     int changeStart = substrs[startRefIx].rm_so;
     int changeEnd = substrs[changeIx].rm_eo;
     int len = changeEnd - changeStart;
     char change[len+1];
     safencpy(change, sizeof(change), term+changeStart, len);
     hgvs->changes = cloneString(change);
-    if (regexSubstrMatched(substrs[geneSymbolIx]))
+    if (geneSymbolIx >= 0 && regexSubstrMatched(substrs[geneSymbolIx]))
         hgvs->seqGeneSymbol = regexSubstringClone(term, substrs[geneSymbolIx]);
     hgvs->start1 = regexSubstringInt(term, substrs[startPosIx]);
     if (regexSubstrMatched(substrs[endPosIx]))
         hgvs->end = regexSubstringInt(term, substrs[endPosIx]);
     else
         hgvs->end = hgvs->start1;
     }
 return hgvs;
 }
 
 struct hgvsVariant *hgvsParseTerm(char *term)
 /* If term is a parseable form of HGVS, return the parsed representation, otherwise NULL.
  * This does not check validity of accessions, coordinates or alleles. */
 {
 struct hgvsVariant *hgvs = hgvsParseCNDotPos(term);
@@ -558,45 +634,60 @@
     {
     // Trim .version if present since our genbank tables don't use versioned names.
     char *trimmed = cloneFirstWordByDelimiter(nmAcc, '.');
     sqlSafef(query, sizeof(query), "select l.protAcc from %s l, refGene r "
              "where r.name = '%s' and l.mrnaAcc = r.name "
              "and l.protAcc != '' order by length(l.protAcc), l.protAcc",
              refLinkTable, trimmed);
     }
 else return NULL;
 struct sqlConnection *conn = hAllocConn(db);
 char *npAcc = sqlQuickString(conn, query);
 hFreeConn(&conn);
 return npAcc;
 }
 
+static char *lrgProteinToTx(char *db, char *protAcc)
+/* Return the LRG_ transcript accession for protAcc.  Each LRG_NpM has a corresponding LRG_NtM. */
+{
+int accLen = strlen(protAcc);
+char txAcc[accLen+1];
+safecpy(txAcc, sizeof(txAcc), protAcc);
+char *p = strrchr(txAcc, 'p');
+if (p)
+    *p = 't';
+return cloneString(txAcc);
+}
+
 static char *getProteinSeq(char *db, char *acc)
 /* Return amino acid sequence for acc, or NULL if not found. */
 {
 if (trackHubDatabase(db))
     return NULL;
 char *seq = NULL;
 if (startsWith("LRG_", acc))
     {
     if (hTableExists(db, "lrgPep"))
         {
         char query[2048];
-        sqlSafef(query, sizeof(query), "select seq from lrgPep where name = '%s'", acc);
+        // lrgPep is indexed by transcript ID not protein ID
+        char *txAcc = lrgProteinToTx(db, acc);
+        sqlSafef(query, sizeof(query), "select seq from lrgPep where name = '%s'", txAcc);
         struct sqlConnection *conn = hAllocConn(db);
         seq = sqlQuickString(conn, query);
         hFreeConn(&conn);
+        freeMem(txAcc);
         }
     }
 else
     {
     if (hDbHasNcbiRefSeq(db))
         {
         char query[2048];
         sqlSafef(query, sizeof(query), "select seq from ncbiRefSeqPepTable "
                  "where name = '%s'", acc);
         struct sqlConnection *conn = hAllocConn(db);
         seq = sqlQuickString(conn, query);
         hFreeConn(&conn);
         }
     else
         {
@@ -722,105 +813,172 @@
     int endPosIx = 5;
     int changeIx = 6;
     AllocVar(hgvs);
     hgvs->type = hgvstGenomic;
     hgvs->seqAcc = regexSubstringClone(term, substrs[chrIx]);
     hgvs->start1 = regexSubstringInt(term, substrs[startPosIx]);
     if (regexSubstrMatched(substrs[endPosIx]))
         hgvs->end = regexSubstringInt(term, substrs[endPosIx]);
     else
         hgvs->end = hgvs->start1;
     hgvs->changes = regexSubstringClone(term, substrs[changeIx]);
     }
 return hgvs;
 }
 
+static struct bbiFile *getLrgBbi(char *db)
+/* Return bbiFile for LRG regions or NULL if not found. */
+{
+struct bbiFile *bbi = NULL;
+// I don't think this will be called often enough to warrant caching open bbi file (and index?).
+// I expect it to be called a couple times when the user enters a LRG genomic HGVS pos/search term.
+// It would be cleaner to get fileName from tdb or db -- but this is much quicker & easier:
+char fileName[1024];
+safef(fileName, sizeof(fileName), "/gbdb/%s/bbi/lrg.bb", db);
+char *fileNameRep = hReplaceGbdb(fileName);
+if (fileExists(fileNameRep))
+    bbi = bigBedFileOpen(fileNameRep);
+freeMem(fileNameRep);
+return bbi;
+}
+
+static struct lrg *loadLrgByName(char *db, char *lrgId)
+/* Retrieve lrg data from bigBed. */
+{
+struct lrg *lrg = NULL;
+struct bbiFile *bbi = getLrgBbi(db);
+if (bbi)
+    {
+    int fieldIx = 0;
+    struct bptFile *index = bigBedOpenExtraIndex(bbi, "name", &fieldIx);
+    if (index)
+        {
+        struct lm *lm = lmInit(0);
+        struct bigBedInterval *bb = bigBedNameQuery(bbi, index, fieldIx, lrgId, lm);
+        if (bb)
+            {
+            char *fields[bbi->fieldCount];
+            char chrom[512], startBuf[16], endBuf[16];
+            bigBedIntervalToRowLookupChrom(bb, NULL, bbi, chrom, sizeof(chrom),
+                                           startBuf, endBuf, fields, bbi->fieldCount);
+            lrg = lrgLoad(fields);
+            }
+        lmCleanup(&lm);
+        }
+    // Don't bptFileClose(&index) -- index shares pointers with bbi!
+    }
+bigBedFileClose(&bbi);
+return lrg;
+}
+
 static char refBaseFromNucSubst(char *change)
 /* If sequence change is a nucleotide substitution, return the reference base, else null char. */
 {
 if (regexMatchNoCase(change, "^([ACGTU])>"))
     return toupper(change[0]);
 return '\0';
 }
 
 static void hgvsStartEndToZeroBasedHalfOpen(struct hgvsVariant *hgvs, int *retStart, int *retEnd)
 /* Convert HGVS's fully closed, 1-based-unless-negative start and end to UCSC start and end. */
 {
 // If hgvs->start1 is negative, it is effectively 0-based, so subtract 1 only if positive.
 int start = hgvs->start1;
 if (start > 0)
     start--;
 // If hgvs->end is negative, it is effectively 0-based, so add 1 to get half-open end.
 int end = hgvs->end;
 if (end < 0)
     end++;
 if (retStart)
     *retStart = start;
 if (retEnd)
     *retEnd = end;
 }
 
 static boolean hgvsValidateGenomic(char *db, struct hgvsVariant *hgvs,
                                    char **retFoundAcc, int *retFoundVersion,
                                    char **retDiffRefAllele)
-/* Return TRUE if hgvs->seqAcc can be mapped to a chrom in db and coords are legal for the chrom.
+/* Return TRUE if hgvs->seqAcc can be mapped to a chrom/LRG in db and coords are in range.
  * If retFoundAcc is non-NULL, set it to the versionless seqAcc if chrom is found, else NULL.
- * If retFoundVersion is non-NULL, set it to seqAcc's version if chrom is found, else NULL.
+ * If retFoundVersion is non-NULL, set to seqAcc's version if chrom is found, -1 for LRG, else NULL.
  * If retDiffRefAllele is non-NULL and hgvs specifies a reference allele then compare it to
  * the genomic sequence at that location; set *retDiffRefAllele to NULL if they match, or to
  * genomic sequence if they differ. */
 {
 boolean coordsOK = FALSE;
+char hgvsBase = '\0';
 if (retDiffRefAllele)
+    {
+    hgvsBase = refBaseFromNucSubst(hgvs->changes);
     *retDiffRefAllele = NULL;
+    }
 if (retFoundAcc)
     *retFoundAcc = NULL;
 if (retFoundVersion)
     *retFoundVersion = 0;
+int start, end;
+hgvsStartEndToZeroBasedHalfOpen(hgvs, &start, &end);
+if (startsWith("LRG_", hgvs->seqAcc))
+    {
+    struct lrg *lrg = loadLrgByName(db, hgvs->seqAcc);
+    if (lrg && start >= 0 && start < lrg->lrgSize && end > 0 && end <= lrg->lrgSize)
+        {
+        coordsOK = TRUE;
+        if (hgvsBase != '\0')
+            {
+            struct dnaSeq *lrgSeq = lrgReconstructSequence(lrg, db);
+            lrgSeq->dna[start] = toupper(lrgSeq->dna[start]);
+            if (lrgSeq->dna[start] != hgvsBase)
+                *retDiffRefAllele = cloneStringZ(lrgSeq->dna+start, 1);
+            dnaSeqFree(&lrgSeq);
+            }
+        if (retFoundAcc)
+            *retFoundAcc = cloneString(hgvs->seqAcc);
+        }
+    lrgFree(&lrg);
+    }
+else
+    {
     char *chrom = hgOfficialChromName(db, hgvs->seqAcc);
     if (isNotEmpty(chrom))
         {
         struct chromInfo *ci = hGetChromInfo(db, chrom);
-    int start, end;
-    hgvsStartEndToZeroBasedHalfOpen(hgvs, &start, &end);
-    if (start >= 0 && start < ci->size && end > 0 && end < ci->size)
+        if (start >= 0 && start < ci->size && end > 0 && end <= ci->size)
             {
             coordsOK = TRUE;
-        if (retDiffRefAllele)
-            {
-            char hgvsBase = refBaseFromNucSubst(hgvs->changes);
             if (hgvsBase != '\0')
                 {
                 struct dnaSeq *refBase = hFetchSeq(ci->fileName, chrom, start, end);
                 touppers(refBase->dna);
                 if (refBase->dna[0] != hgvsBase)
                     *retDiffRefAllele = cloneString(refBase->dna);
                 dnaSeqFree(&refBase);
                 }
             }
-        }
         // Since we found hgvs->seqAcc, split it into versionless and version parts.
         if (retFoundAcc)
             *retFoundAcc = cloneFirstWordByDelimiter(hgvs->seqAcc, '.');
         if (retFoundVersion)
             {
             char *p = strchr(hgvs->seqAcc, '.');
             if (p)
                 *retFoundVersion = atoi(p+1);
             }
         // Don't free chromInfo, it's cached!
         }
+    }
 return coordsOK;
 }
 
 static char *getCdnaSeq(char *db, char *acc)
 /* Return cdna sequence for acc, or NULL if not found. */
 {
 if (trackHubDatabase(db))
     return NULL;
 char *seq = NULL;
 if (startsWith("LRG_", acc))
     {
     if (hTableExists(db, "lrgCdna"))
         {
         char query[2048];
         sqlSafef(query, sizeof(query), "select seq from lrgCdna where name = '%s'", acc);
@@ -1063,57 +1221,106 @@
 static struct bed *bed6New(char *chrom, unsigned chromStart, unsigned chromEnd, char *name,
                            int score, char strand)
 /* Alloc & return a new bed with only the first 6 fields populated. */
 {
 struct bed *bed6;
 AllocVar(bed6);
 bed6->chrom = cloneString(chrom);
 bed6->chromStart = chromStart;
 bed6->chromEnd = chromEnd;
 bed6->name = cloneString(name);
 bed6->score = score;
 bed6->strand[0] = strand;
 return bed6;
 }
 
+static int pslMapQStartToT(struct psl *psl, int qStartIn)
+/* Map a query start coord to a target start coord; return -1 if qStart is not in the alignment. */
+{
+int t = -1, qStart = qStartIn, ix;
+boolean isRc = pslQStrand(psl) == '-';
+if (isRc)
+    {
+    int qEnd = qStart + 1;
+    qStart = psl->qSize - qEnd;
+    }
+for (ix = 0;  ix < psl->blockCount;  ix++)
+    {
+    int qOffset = qStart - pslQStart(psl, ix);
+    if (qOffset >= 0 && qStart < pslQEnd(psl, ix))
+        {
+        t = pslTStart(psl, ix) + qOffset;
+        break;
+        }
+    }
+return t;
+}
+
+static int pslMapQEndToT(struct psl *psl, int qEnd)
+/* Map a query end coord to a target end coord; return -1 if qEnd is not in the alignment. */
+{
+int tEnd = -1;
+int qStart = qEnd - 1;
+int tStart = pslMapQStartToT(psl, qStart);
+if (tStart >= 0)
+    tEnd = tStart + 1;
+return tEnd;
+}
+
 boolean hgvsIsInsertion(struct hgvsVariant *hgvs)
 /* Return TRUE if hgvs is an insertion (end == start1+1, change starts with "ins"). */
 {
 return (hgvs->end == hgvs->start1+1 && startsWith("ins", hgvs->changes));
 }
 
 static void adjustInsStartEnd(struct hgvsVariant *hgvs, uint *retStart0, uint *retEnd)
 /* HGVS insertion coordinates include the base before and the base after the insertion point,
  * while we treat it as a zero-length point.  So if this is insertion, adjust the start and end. */
 {
 if (hgvsIsInsertion(hgvs))
     {
     *retStart0 = *retStart0 + 1;
     *retEnd = *retEnd - 1;
     }
 }
 
 static struct bed *hgvsMapGDotToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable)
-/* Given an NC_*g. term, if we can map NC_ to chrom then return the region, else NULL. */
+/* Given an NC_*g. or LRG_*g. term, if we can map to chrom then return the region, else NULL. */
 {
 struct bed *region = NULL;
+if (startsWith("LRG_", hgvs->seqAcc))
+    {
+    struct lrg *lrg = loadLrgByName(db, hgvs->seqAcc);
+    if (lrg)
+        {
+        uint chromSize = hChromSize(db, lrg->chrom);
+        struct psl *psl = lrgToPsl(lrg, chromSize);
+        int start = pslMapQStartToT(psl, hgvs->start1 - 1);
+        int end = pslMapQEndToT(psl, hgvs->end);
+        if (start >= 0 && end >= 0)
+            region = bed6New(lrg->chrom, start, end, "", 0, '+');
+        pslFree(&psl);
+        }
+    }
+else
+    {
     char *chrom = hgOfficialChromName(db, hgvs->seqAcc);
     if (isNotEmpty(chrom) && hgvs->start1 > 0)
-    {
         region = bed6New(chrom, hgvs->start1 - 1, hgvs->end, "", 0, '+');
-    adjustInsStartEnd(hgvs, &region->chromStart, &region->chromEnd);
     }
+if (region)
+    adjustInsStartEnd(hgvs, &region->chromStart, &region->chromEnd);
 if (retPslTable)
     *retPslTable = NULL;
 return region;
 }
 
 static void hgvsTranscriptToZeroBasedHalfOpen(struct hgvsVariant *hgvs,
                                               int maxCoord, struct genbankCds *cds,
                                               int *retStart, int *retEnd,
                                               int *retUpstreamBases, int *retDownstreamBases)
 /* Convert a transcript HGVS's start1 and end into UCSC coords plus upstream and downstream lengths
  * for when the transcript HGVS has coordinates that extend beyond its sequence boundaries.
  * ret* args must be non-NULL. */
 {
 hgvsStartEndToZeroBasedHalfOpen(hgvs, retStart, retEnd);
 if (hgvs->type == hgvstCoding)
@@ -1437,59 +1644,64 @@
         }
     }
 if (region)
     {
     adjustInsStartEnd(hgvs, &region->chromStart, &region->chromEnd);
     if (retPslTable)
         *retPslTable = cloneString(pslTable);
     }
 return region;
 }
 
 static struct bed *hgvsMapPDotToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable)
 /* Return a bed6 with the variant's span on the genome and strand, or NULL if unable to map.
  * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */
 {
-struct bed *region = NULL;
 char *acc = normalizeVersion(db, hgvs->seqAcc, NULL);
-if (acc && (startsWith("NP_", acc) || startsWith("XP_", acc)))
+if (isEmpty(acc))
+    return NULL;
+struct bed *region = NULL;
+char *txAcc = NULL;
+if (startsWith("LRG_", acc))
+    txAcc = lrgProteinToTx(db, acc);
+else if (startsWith("NP_", acc) || startsWith("XP_", acc))
     {
-    // Translate the NP_*:p. to NM_*:c. and map NM_*:c. to the genome.
     struct sqlConnection *conn = hAllocConn(db);
     char query[2048];
     if (hDbHasNcbiRefSeq(db))
         sqlSafef(query, sizeof(query), "select mrnaAcc from ncbiRefSeqLink where protAcc = '%s'",
                  acc);
     else if (hTableExists(db, "refGene"))
         sqlSafef(query, sizeof(query), "select mrnaAcc from %s l, refGene r "
                  "where l.protAcc = '%s' and r.name = l.mrnaAcc", refLinkTable, acc);
     else
         return NULL;
-    char *nmAcc = sqlQuickString(conn, query);
+    txAcc = sqlQuickString(conn, query);
     hFreeConn(&conn);
-    if (nmAcc)
+    }
+if (txAcc)
     {
+    // Translate the p. to c. and map c. to the genome.
     struct hgvsVariant cDot;
     zeroBytes(&cDot, sizeof(cDot));
     cDot.type = hgvstCoding;
-        cDot.seqAcc = nmAcc;
+    cDot.seqAcc = txAcc;
     cDot.start1 = ((hgvs->start1 - 1) * 3) + 1;
     cDot.end = ((hgvs->end - 1) * 3) + 3;
     cDot.changes = hgvs->changes;
     region = hgvsMapNucToGenome(db, &cDot, retPslTable);
-        freeMem(nmAcc);
-        }
+    freeMem(txAcc);
     }
 return region;
 }
 
 struct bed *hgvsMapToGenome(char *db, struct hgvsVariant *hgvs, char **retPslTable)
 /* Return a bed6 with the variant's span on the genome and strand, or NULL if unable to map.
  * If successful and retPslTable is not NULL, set it to the name of the PSL table used. */
 {
 if (hgvs == NULL)
     return NULL;
 struct bed *region = NULL;
 if (hgvs->type == hgvstProtein)
     region = hgvsMapPDotToGenome(db, hgvs, retPslTable);
 else
     region = hgvsMapNucToGenome(db, hgvs, retPslTable);