aaf72102b545c05c42f66b7a3fc22d65b1ecf4fe
angie
  Mon Aug 8 14:12:39 2016 -0700
Added recognition of a small subset of HGVS terms: coding (c.) SNVs relative to RefSeq NM_ or LRG transcript IDs,
and protein (p.) simple substitutions relative to NP_.
Also accepted (not HGVS but similar and popular): geneSymbol and abbreviated protein subst like "ALK G1494E".
hgFind will map terms to the current genome if possible, and will display warnings about unrecognized accessions,
out-of-bounds coordinates and mismatching reference alleles.
refs #15071, #15554

diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c
index 0d6273d..78da647 100644
--- src/hg/lib/hgFind.c
+++ src/hg/lib/hgFind.c
@@ -11,30 +11,31 @@
 #include "hash.h"
 #include "cheapcgi.h"
 #include "htmshell.h"
 #include "web.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "hui.h"
 #include "psl.h"
 #include "genePred.h"
 #include "genePredReader.h"
 #include "bed.h"
 #include "cytoBand.h"
 #include "cart.h"
 #include "hgFind.h"
 #include "hgFindSpec.h"
+#include "hgHgvs.h"
 #include "snp.h"
 #include "refLink.h"
 #include "kgAlias.h"
 #include "kgProtAlias.h"
 #include "findKGAlias.h"
 #include "findKGProtAlias.h"
 #include "tigrCmrGene.h"
 #include "minGeneInfo.h"
 #include "pipeline.h"
 #include "hgConfig.h"
 #include "trix.h"
 #include "trackHub.h"
 #include "udc.h"
 #include "hubConnect.h"
 #include "bigBedFind.h"
@@ -3040,30 +3041,88 @@
 	hfs = hfsFind(longList, search);
     if (hfs != NULL)
 	foundIt = hgFindUsingSpec(db, hfs, term, limitResults, hgp, FALSE, 0,0, FALSE);
     else
 	warn("Unrecognized singleSearch=%s in URL", search);
     }
 if (foundIt)
     {
     fixSinglePos(hgp);
     if (cart != NULL)
         cartSetString(cart, "hgFind.matches", hgp->tableList->posList->browserName);
     }
 return foundIt;
 }
 
+static int getDotVersion(char *acc)
+/* If acc ends with a .version, return the version, else 0. */
+{
+char *p = strchr(acc, '.');
+if (p)
+    return atoi(p+1);
+return 0;
+}
+
+static boolean matchesHgvs(char *db, char *term, struct hgPositions *hgp)
+/* Return TRUE if the search term looks like a variant encoded using the HGVS nomenclature */
+/* See http://varnomen.hgvs.org/ */
+{
+boolean foundIt = FALSE;
+struct hgvsVariant *hgvs = hgvsParseTerm(term);
+if (hgvs == NULL)
+    hgvs = hgvsParsePseudoHgvs(db, term);
+if (hgvs)
+    {
+    char *foundAcc = NULL, *diffRefAllele = NULL;
+    int foundVersion = 0;
+    boolean coordsOk = hgvsValidate(db, hgvs, &foundAcc, &foundVersion, &diffRefAllele);
+    if (foundAcc == NULL)
+        warn("Can't find accession for HGVS term '%s'", term);
+    else
+        {
+        int hgvsVersion = getDotVersion(hgvs->seqAcc);
+        char foundAccWithV[strlen(foundAcc)+20];
+        if (foundVersion)
+            safef(foundAccWithV, sizeof(foundAccWithV), "%s.%d", foundAcc, foundVersion);
+        else
+            safecpy(foundAccWithV, sizeof(foundAccWithV), foundAcc);
+        if (hgvsVersion && hgvsVersion != foundVersion)
+            warn("HGVS term '%s' is based on %s but UCSC has version %s",
+                 term, hgvs->seqAcc, foundAccWithV);
+        if (! coordsOk)
+            warn("HGVS term '%s' has coordinates outside the bounds of %s", term, foundAccWithV);
+        else if (diffRefAllele != NULL)
+            warn ("HGVS term '%s' reference value '%s' does not match %s value '%s'",
+                  term, hgvs->refAllele, foundAccWithV, diffRefAllele);
+        if (coordsOk)
+            {
+            char *pslTable = NULL;
+            struct psl *mapping = hgvsMapToGenome(db, hgvs, &pslTable);
+            if (mapping)
+                {
+                int padding = 5;
+                char *trackTable = startsWith("lrg", pslTable) ? "lrgTranscriptAli" : "refGene";
+                singlePos(hgp, "HGVS", NULL, trackTable, term, "",
+                          mapping->tName, mapping->tStart-padding, mapping->tEnd+padding);
+                foundIt = TRUE;
+                }
+            }
+        }
+    }
+return foundIt;
+}
+
 struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi,
 	char *hgAppNameIn, struct cart *cart, boolean multiTerm)
 /* Return table of positions that match term or NULL if none such. */
 {
 struct hgPositions *hgp = NULL, *hgpItem = NULL;
 regmatch_t substrs[4];
 boolean canonicalSpec = FALSE;
 boolean gbrowserSpec = FALSE;
 boolean lengthSpec = FALSE;
 boolean singleBaseSpec = FALSE;
 boolean relativeFlag = FALSE;
 int relStart = 0, relEnd = 0;
 
 hgAppName = hgAppNameIn;
 
@@ -3141,31 +3200,31 @@
 
     hgParseChromRange(db, term, &chrom, &start, &end);
     if (relativeFlag)
 	{
 	int chromSize = end;
 	end = start + relEnd;
 	start = start + relStart;
 	if (end > chromSize)
 	    end = chromSize;
 	if (start < 0)
 	    start = 0;
 	}
     singlePos(hgp, "Chromosome Range", NULL, "chromInfo", originalTerm,
 	      "", chrom, start, end);
     }
-else
+else if (!matchesHgvs(db, term, hgp))
     {
     struct hgFindSpec *shortList = NULL, *longList = NULL;
     struct hgFindSpec *hfs;
     boolean done = FALSE;
 
     // Disable singleBaseSpec for any term that is not hgOfficialChromName
     // because that mangles legitimate IDs that are [A-Z]:[0-9]+.
     if (singleBaseSpec)
 	{
 	singleBaseSpec = relativeFlag = FALSE;
 	term = cloneString(originalTerm);  // restore original term
 	relStart = relEnd = 0;
 	}
 
     if (!trackHubDatabase(db))