f78d6e76501646c8b8cc966103bafca896322509
angie
  Mon Oct 7 13:54:42 2013 -0700
Work in progress for #11460 (paste/upload variant input options...):adding an option for user to paste/upload variant identifiers which
will be translated into a sorted list of vcfRecords.

Currently we recognize only rs# IDs.  I was considering adding
dbVar IDs, but those could come from multiple sources (DGV, ClinVar,
ISCA) so I'm not sure.

Treating all symbolic/named alleles as deletions... non-ideal,
but fortunately those are a small minority in dbSNP.

Next: recognize HGVS IDs.

The grander vision of #11460 includes accepting VEP input format
and VCF, but I think those should be new SELECT options so we don't
get into quagmire of guessing format.

diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c
index 6643e56..f4a03ce 100644
--- src/hg/hgVai/hgVai.c
+++ src/hg/hgVai/hgVai.c
@@ -10,59 +10,66 @@
 #include "cart.h"
 #include "hui.h"
 #include "grp.h"
 #include "hCommon.h"
 #include "hgFind.h"
 #include "hPrint.h"
 #include "jsHelper.h"
 #include "memalloc.h"
 #include "textOut.h"
 #include "trackHub.h"
 #include "hubConnect.h"
 #include "twoBit.h"
 #include "gpFx.h"
 #include "udc.h"
 #include "knetUdc.h"
+#include "md5.h"
+#include "regexHelper.h"
 #include "annoGratorQuery.h"
 #include "annoGratorGpVar.h"
 #include "annoFormatVep.h"
 #include "annoStreamBigBed.h"
 
 #include "libifyMe.h"
 
 /* Global Variables */
 struct cart *cart;		/* CGI and other variables */
 struct hash *oldVars = NULL;	/* The cart before new cgi stuff added. */
 char *genome = NULL;		/* Name of genome - mouse, human, etc. */
 char *database = NULL;		/* Current genome database - hg17, mm5, etc. */
 char *regionType = NULL;	/* genome, ENCODE pilot regions, or specific position range. */
 struct grp *fullGroupList = NULL;	/* List of all groups. */
 struct trackDb *fullTrackList = NULL;	/* List of all tracks in database. */
 static struct pipeline *compressPipeline = (struct pipeline *)NULL;
 
 
 // Null terminated list of CGI Variables we don't want to save permanently:
 char *excludeVars[] = {"Submit", "submit", "hgva_startQuery", NULL,};
 
 #define hgvaRange "position"
 #define hgvaRegionType "hgva_regionType"
 #define hgvaRegionTypeEncode "encode"
 #define hgvaRegionTypeGenome "genome"
 #define hgvaRegionTypeRange "range"
 #define hgvaPositionContainer "positionContainer"
 #define hgvaSampleVariants "hgva_internally_generated_sample_variants"
 #define hgvaSampleVariantsLabel "Artificial Example Variants"
+#define hgvaUseVariantIds "hgva_useVariantIds"
+#define hgvaVariantIdsLabel "Variant Identifiers"
+#define hgvaVariantIds "hgva_variantIds"
+#define hgvaVariantPasteContainer "variantPasteContainer"
+
 void addSomeCss()
 /*#*** This should go in a .css file of course. */
 {
 printf("<style>\n"
 	"div.sectionLite { border-width: 1px; border-color: #"HG_COL_BORDER"; border-style: solid;"
 	"  background-color: #"HG_COL_INSIDE"; padding-left: 10px; padding-right: 10px; "
 	"  padding-top: 8px; padding-bottom: 5px; margin-top: 5px; margin-bottom: 5px }\n"
 	".sectionLiteHeader { font-weight: bold; font-size:larger; color:#000000;"
 	"  text-align:left; vertical-align:bottom; white-space:nowrap; }\n"
 	"div.sectionLiteHeader.noReorderRemove { padding-bottom:5px; }\n"
 	"div.sourceFilter { padding-top: 5px; padding-bottom: 5px }\n"
 	"</style>\n");
 }
 
 
@@ -293,48 +300,57 @@
 	}
     hashFree(&hash);
     slReverse(&groupRefList);
     *retGroupRefList = groupRefList;
     }
 }
 
 boolean isVariantCustomTrack(struct trackDb *tdb, void *filterData)
 /* This is a TdbFilterFunction to get custom or hub tracks with type pgSnp or vcfTabix. */
 {
 return ((sameString(tdb->grp, "user") || isHubTrack(tdb->track)) &&
 	(sameString(tdb->type, "pgSnp") || sameString(tdb->type, "vcfTabix")));
 }
 
 void selectVariants(struct slRef *varGroupList, struct slRef *varTrackList)
-/* Offer selection of user's variant custom tracks. */
+/* Offer selection of user's variant custom tracks, example variants, pasted input etc. */
 {
 printf("<div class='sectionLiteHeader'>Select Variants</div>\n");
 printf("If you have more than one custom track or hub track in "
        "<A HREF='../FAQ/FAQformat.html#format10' TARGET=_BLANK>pgSnp</A> or "
        "<A HREF='../goldenPath/help/vcf.html' TARGET=_BLANK>VCF</A> format, "
        "please select the one you wish to annotate:<BR>\n");
 printf("<B>variants: </B>");
-printf("<SELECT ID='hgva_variantTrack' NAME='hgva_variantTrack'>\n");
+printf("<SELECT ID='hgva_variantTrack' NAME='hgva_variantTrack' "
+       "onchange=\"hgva.changeVariantSource();\">\n");
 char *selected = cartUsualString(cart, "hgva_variantTrack", "");
 struct slRef *ref;
 for (ref = varTrackList;  ref != NULL;  ref = ref->next)
     {
     struct trackDb *tdb = ref->val;
     printOption(tdb->track, selected, tdb->longLabel);
     }
 printOption(hgvaSampleVariants, selected, hgvaSampleVariantsLabel);
+printOption(hgvaUseVariantIds, selected, hgvaVariantIdsLabel);
 printf("</SELECT><BR>\n");
+
+printf("<div id='"hgvaVariantPasteContainer"'%s>\n",
+       differentString(selected, hgvaUseVariantIds) ? " style='display: none;'" : "");
+char *oldPasted = cartUsualString(cart, hgvaVariantIds, "");
+cgiMakeTextArea(hgvaVariantIds, oldPasted, 10, 70);
+puts("</div>");
+
 printf("<B>maximum number of variants to be processed:</B>\n");
 char *curLimit = cartUsualString(cart, "hgva_variantLimit", "10000");
 char *limitLabels[] = { "10", "100", "1,000", "10,000", "100,000" };
 char *limitValues[] = { "10", "100", "1000", "10000", "100000" };
 cgiMakeDropListWithVals("hgva_variantLimit", limitLabels, limitValues, ArraySize(limitLabels),
 			curLimit);
 printCtAndHubButtons();
 puts("<BR>");
 }
 
 boolean isGeneTrack(struct trackDb *tdb, void *filterData)
 /* This is a TdbFilterFunction to get genePred tracks. */
 {
 return (startsWith("genePred", tdb->type));
 }
@@ -492,74 +508,84 @@
 struct slName *table;
 for (table = dbNsfpTables;  table != NULL;  table = table->next)
     {
     if (sameString(table->name, "dbNsfpPolyPhen2"))
 	{
 	printDbNsfpSource(table->name, HDIV);
 	printDbNsfpSource(table->name, HVAR);
 	}
     else
 	printDbNsfpSource(table->name, 0);
     }
 puts("<BR>");
 endCollapsibleSection();
 }
 
-boolean findSnpBed4(char *suffix, char **retFileName, struct trackDb **retTdb)
-/* If we can find the latest snpNNNsuffix track using 'show tables rlike "regex"',
- * or better yet a bigBed file for it (faster),
- * set the appropriate ret* and return TRUE, otherwise return FALSE. */
+char *findLatestSnpTable(char *suffix)
+/* Return the name of the 'snp1__<suffix>' table with the highest build number, if any. */
 {
 if (startsWith(hubTrackPrefix, database))
-    return FALSE;
+    return NULL;
 if (suffix == NULL)
     suffix = "";
 char query[64];
 sqlSafef(query, sizeof(query), "show tables like 'snp1__%s'", suffix);
 struct sqlConnection *conn = hAllocConn(database);
 struct slName *snpNNNTables = sqlQuickList(conn, query);
 hFreeConn(&conn);
-if (snpNNNTables == NULL)
-    return FALSE;
-boolean foundIt = FALSE;
 // Skip to last in list -- highest number (show tables can't use rlike or 'order by'):
 struct slName *table = snpNNNTables;
 while (table->next != NULL && isdigit(table->next->name[4]) && isdigit(table->next->name[5]))
     table = table->next;
+char *tableName = NULL;
+if (table != NULL)
+    tableName = cloneString(table->name);
+slNameFreeList(&snpNNNTables);
+return tableName;
+}
+
+boolean findSnpBed4(char *suffix, char **retFileName, struct trackDb **retTdb)
+/* If we can find the latest snpNNNsuffix table, or better yet a bigBed file for it (faster),
+ * set the appropriate ret* and return TRUE, otherwise return FALSE. */
+{
+char *table = findLatestSnpTable(suffix);
+if (table == NULL)
+    return FALSE;
+boolean foundIt = FALSE;
 // Do we happen to have a bigBed version?  Better yet, bed4 only for current uses:
 char fileName[HDB_MAX_PATH_STRING];
-safef(fileName, sizeof(fileName), "/gbdb/%s/vai/%s.bed4.bb", database, table->name);
+safef(fileName, sizeof(fileName), "/gbdb/%s/vai/%s.bed4.bb", database, table);
 if (fileExists(fileName))
     {
     if (retFileName != NULL)
 	*retFileName = cloneString(fileName);
     foundIt = TRUE;
     }
 else
     {
     // Not bed4; try just .bb:
-    safef(fileName, sizeof(fileName), "/gbdb/%s/vai/%s.bb", database, table->name);
+    safef(fileName, sizeof(fileName), "/gbdb/%s/vai/%s.bb", database, table);
     if (fileExists(fileName))
 	{
 	if (retFileName != NULL)
 	    *retFileName = cloneString(fileName);
 	foundIt = TRUE;
 	}
     }
 if (foundIt && retTdb == NULL)
     return TRUE;
-struct trackDb *tdb = tdbForTrack(database, table->name, &fullTrackList);
+struct trackDb *tdb = tdbForTrack(database, table, &fullTrackList);
 if (tdb != NULL)
     {
     if (retTdb != NULL)
 	*retTdb = tdb;
     return TRUE;
     }
 return foundIt;
 }
 
 void selectDbSnp(boolean gotSnp)
 /* Offer to include rsID (and other fields, or leave that for advanced output??) if available */
 {
 if (!gotSnp)
     return;
 startCollapsibleSection("dbSnp", "Known variation", TRUE);
@@ -1166,30 +1192,44 @@
 fprintf(f, "##reference=%s\n", db);
 fputs("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n", f);
 }
 
 static void mutateBase(char *pBase)
 /* Change *pBase into a different base. */
 {
 static char *bases = "ACGT";
 char c;
 // In real life the probabilities aren't even, but this is easier.
 while ((c = bases[(uint)(floor(drand48() * 4.0))]) == *pBase)
     ;
 *pBase = c;
 }
 
+static void writeMinimalVcfRow(FILE *f, struct vcfRecord *rec)
+/* Write a minimalist VCF row (coords, name, alleles and placeholders). */
+{
+// VCF columns: #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+fprintf(f, "%s\t%u\t%s\t%s\t",
+	rec->chrom, rec->chromStart+1, rec->name, rec->alleles[0]);
+// Comma-separated alt alleles:
+int i;
+for (i = 1;  i < rec->alleleCount;  i++)
+    fprintf(f, "%s%s",  (i > 1 ? "," : ""), rec->alleles[i]);
+// Placeholder qual, filter, info.
+fprintf(f, "\t.\t.\t.\n");
+}
+
 static void writeMinimalVcfRowFromBed(FILE *f, struct bed4 *bed, struct dnaSeq *seq, uint seqOffset)
 /* Write a minimalist VCF row with a mutation of the reference sequence at the given
  * position. */
 {
 uint indelBase = 0, start = bed->chromStart, end = bed->chromEnd;
 if (end != start+1)
     {
     // VCF treats indels in a special way: pos is the base to the left of the indel,
     // and both ref and alt alleles include the base to the left of the indel.
     indelBase = 1;
     }
 // Get reference allele sequence:
 uint refAlLen = end - start + indelBase;
 char ref[refAlLen+1];
 uint seqStart = start - indelBase - seqOffset;
@@ -1348,36 +1388,36 @@
 return bedList;
 }
 
 // margin for intergenic variants around transcript:
 #define IGFUDGE 5
 
 static struct annoStreamer *makeSampleVariantsStreamer(struct annoAssembly *assembly,
 						       struct trackDb *geneTdb, int maxOutRows)
 /* Construct a VCF file of example variants for db (unless it already exists)
  * and return an annoStreamer for that file.  If possible, make the variants hit a gene. */
 {
 char *sampleFile = sampleVariantsPath(assembly);
 boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildSampleVariants", FALSE);
 if (! fileExists(sampleFile) || forceRebuild)
     {
-    FILE *f = mustOpen(sampleFile, "w");
-    writeMinimalVcfHeader(f, assembly->name);
     struct annoStreamer *geneStream = streamerFromTrack(assembly, geneTdb->table, geneTdb, NULL,
 							NO_MAXROWS);
     boolean gotCoding = FALSE, gotNonCoding = FALSE;
     struct genePred *gpList = genesFromPosition(geneStream, &gotCoding, &gotNonCoding);
+    FILE *f = mustOpen(sampleFile, "w");
+    writeMinimalVcfHeader(f, assembly->name);
     if (gpList == NULL)
 	{
 	warn("Unable to find any gene transcripts in '%s' (%s)",
 	     geneTdb->shortLabel, geneTdb->track);
 	writeArbitraryVariants(f, assembly);
 	}
     else
 	{
 	struct bed4 *bedList = NULL;
 	uint chromSize = annoAssemblySeqSize(assembly, gpList->chrom);
 	uint minSeqStart = chromSize, maxSeqEnd = 0;
 	struct genePred *gp;
 	for (gp = gpList;  gp != NULL;  gp = gp->next)
 	    {
 	    int txSeqStart = gp->txStart - (UPSTREAMFUDGE + IGFUDGE);
@@ -1399,30 +1439,352 @@
 						      minSeqStart, maxSeqEnd);
 	struct bed4 *bed;
 	for (bed = bedList;  bed != NULL;  bed = bed->next)
 	    {
 	    writeMinimalVcfRowFromBed(f, bed, txSeq, minSeqStart);
 	    }
 	dnaSeqFree(&txSeq);
 	bed4FreeList(&bedList);
 	}
     geneStream->close(&geneStream);
     carefulClose(&f);
     }
 return annoStreamVcfNew(sampleFile, FALSE, assembly, maxOutRows);
 }
 
+static char *variantIdPath(struct annoAssembly *assembly, char *variantIds)
+/* Use the md5sum of the user's pasted/uploaded variants to make a hopefully
+ * unique trash filename. */
+{
+char *md5sum = md5HexForString(variantIds);
+char *subDir = "hgv";
+mkdirTrashDirectory(subDir);
+struct dyString *dy = dyStringCreate("%s/%s/%s_%s.vcf", trashDir(), subDir, assembly->name, md5sum);
+return dyStringCannibalize(&dy);
+}
+
+static struct slName *hashListNames(struct hash *hash)
+/* Return a list of all element names in the hash (if any). */
+{
+struct slName *list = NULL;
+struct hashCookie cookie = hashFirst(hash);
+struct hashEl *hel;
+while ((hel = hashNext(&cookie)) != NULL)
+    slAddHead(&list, slNameNew(hel->name));
+return list;
+}
+
+static char *encloseInAngleBrackets(char *stringIn)
+/* If stringIn begins and ends with ()'s, replace them with <> and return stringIn.
+ * Otherwise, alloc a new string and surround stringIn with <>. */
+{
+char *stringOut = stringIn;
+int stringInLen = strlen(stringIn);
+if (stringIn[0] == '(' && stringIn[stringInLen-1] == ')')
+    {
+    stringIn[0] = '<';
+    stringIn[stringInLen-1] = '>';
+    }
+else
+    {
+    int stringOutLen = stringInLen + 2 + 1;
+    stringOut = needMem(stringOutLen);
+    safef(stringOut, stringOutLen, "<%s>", stringIn);
+    }
+return stringOut;
+}
+
+
+static char **parseDbSnpAltAlleles(char *refAl, char *obsAls, boolean minusStrand,
+				   int *retAltAlCount, boolean *retNeedLeftBase)
+/* Given a non-symbolic reference allele and slash-sep observed alleles from dbSNP,
+ * return an array of +-strand alleles that are not the same as the reference.
+ * If any allele is "-" (deleted, zero-length), then set retNeedLeftBase to TRUE
+ * because in this case VCF requires that the reference base to the left of the indel
+ * must be added to all alleles, and the start coord also moves one base to the left.
+ * Also, if any alt allele is symbolic, padding is required.
+ * Note: this trashes obsAls.  Resulting array can be freed but not its contents. */
+{
+int obsCount = countChars(obsAls, '/') + 1;
+char *obsWords[obsCount];
+chopByChar(obsAls, '/', obsWords, obsCount);
+char **altAls;
+AllocArray(altAls, obsCount);
+int altCount = 0, i;
+boolean needLeftBase = isEmpty(refAl) || sameString(refAl, "-");
+for (i = 0;  i < obsCount;  i++)
+    {
+    char *altAl = obsWords[i];
+    if (differentString(altAl, refAl))
+	{
+	if (sameString(altAl, "-"))
+	    {
+	    altAls[altCount] = "";
+	    needLeftBase = TRUE;
+	    }
+	else
+	    {
+	    // It would be nice to expand the "(CA)11/12/14/15/16/17/18/19/20" syntax of
+	    // some dbSNP observed's.  What are these?: "(D1S243)", "(D1S2870)"
+	    // Unfortunately for observed="lengthTooLong" we just can't get the correct allele
+	    // sequence. (76,130 of those in snp138)
+	    // Hmmm, I guess we could at least stick in the right number of N's if we can
+	    // parse "(245 BP INSERTION)".  (2403 rows rlike "[0-9]+ BP ?INSERTION" in snp138)
+	    if (!isAllNt(altAl, strlen(altAl)))
+		{
+		// Symbolic allele: left base required, and enclose it in <>'s.
+		needLeftBase = TRUE;
+		altAl = encloseInAngleBrackets(altAl);
+		}
+	    else if (minusStrand)
+		reverseComplement(altAl, strlen(altAl));
+	    altAls[altCount] = altAl;
+	    }
+	altCount++;
+	}
+    }
+*retAltAlCount = altCount;
+*retNeedLeftBase = needLeftBase;
+return altAls;
+}
+
+char *firstNCommaSep(struct slName *nameList, int n)
+/* Return a comma-separated string with the first n names in nameList. */
+{
+struct dyString *dy = dyStringNew(0);
+int i;
+struct slName *el;
+for (i=0, el=nameList;  i < 5 && el != NULL;  i++, el = el->next)
+    {
+    if (i > 0)
+	dyStringAppend(dy, ", ");
+    dyStringPrintf(dy, "'%s'", el->name);
+    }
+return dyStringCannibalize(&dy);
+}
+
+//#*** Variant ID-matching should be metadata-driven too.  termRegex -> data source.
+static const char *rsIdRegex = "^rs[0-9]+$";
+
+static void rsIdsToVcfRecords(struct annoAssembly *assembly, struct slName *rsIds,
+			      struct vcfFile *vcff, struct vcfRecord **pRecList,
+			      struct slName **pCommentList)
+/* If possible, look up coordinates and alleles of dbSNP rs IDs. */
+{
+if (rsIds == NULL)
+    return;
+char *table = findLatestSnpTable(NULL);
+struct sqlConnection *conn = hAllocConn(assembly->name);
+// Build a 'name in (...)' query, and build a hash of IDs so we can test whether all were found
+struct dyString *dq = sqlDyStringCreate("select chrom, chromStart, chromEnd, name, strand, "
+					"refUCSC, observed from %s where name in (",
+					table);
+struct hash *idHash = hashNew(0);
+struct slName *id;
+for (id = rsIds;  id != NULL;  id = id->next)
+    {
+    tolowers(id->name);
+    dyStringPrintf(dq, "%s'%s'", (id != rsIds ? "," : ""), id->name);
+    hashStoreName(idHash, id->name);
+    }
+dyStringAppend(dq, ");");
+struct sqlResult *sr = sqlGetResult(conn, dq->string);
+// Construct a minimal VCF row to make a vcfRecord for each variant.
+char *vcfRow[9];
+vcfRow[5] = vcfRow[6] = vcfRow[7] = "."; // placeholder for qual, filter, info
+// It would be cool to someday add snpNNN's exceptions column to the filter or info.
+struct dyString *dyAltAlStr = dyStringNew(0);
+int i;
+char **row;
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    char *chrom = row[0];
+    uint chromStart = atoll(row[1]);
+    uint chromEnd = atoll(row[2]);
+    char *name = row[3];
+    char *strand = row[4];
+    char refAl[max(strlen(row[5]), chromEnd-chromStart) + 2];
+    safecpy(refAl, sizeof(refAl), row[5]);
+    if (sameString(refAl, "-"))
+	refAl[0] = '\0';
+    else if (! isAllNt(refAl, strlen(refAl)))
+	{
+	// refUCSC is symbolic like "( 2548bp insertion )" -- just use the actual reference
+	// sequence for now, to keep things simple downstream.
+	//#*** Need something more efficient, like a sequence cache inside assembly!
+	struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom, chromStart, chromEnd);
+	safecpy(refAl, sizeof(refAl), seq->dna);
+	toUpperN(refAl, seq->size);
+	dnaSeqFree(&seq);
+	}
+    char *obsAls = row[6];
+    int altAlCount = 0;
+    boolean needLeftBase = FALSE;
+    char **altAls = parseDbSnpAltAlleles(refAl, obsAls, sameString(strand, "-"),
+					 &altAlCount, &needLeftBase);
+    needLeftBase |= (chromStart == chromEnd);  // should be redundant, but just in case.
+    hashRemove(idHash, name);
+    uint vcfStart = chromStart + 1;
+    dyStringClear(dyAltAlStr);
+    // If this is a non-symbolic indel, we'll have to move start one base to the left
+    // and add the reference base to the left of the actual alleles
+    if (needLeftBase)
+	{
+	vcfStart--;
+	//#*** Need something more efficient, like a sequence cache inside assembly!
+	struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom,
+						    chromStart-1, chromStart);
+	toUpperN(seq->dna, 1);
+	char leftBase = seq->dna[0];
+	dnaSeqFree(&seq);
+	char refAlPlus[strlen(refAl)+2];
+	safef(refAlPlus, sizeof(refAlPlus), "%c%s", leftBase, refAl);
+	safecpy(refAl, sizeof(refAl), refAlPlus);
+	for (i = 0;  i < altAlCount;  i++)
+	    {
+	    if (i > 0)
+		dyStringAppendC(dyAltAlStr, ',');
+	    if (isAllNt(altAls[i], strlen(altAls[i])))
+		dyStringPrintf(dyAltAlStr, "%c%s", leftBase, altAls[i]);
+	    else
+		dyStringAppend(dyAltAlStr, altAls[i]);
+	    }
+	}
+    else
+	{
+	// Not an indel, just make comma-sep string of alt alleles.
+	for (i = 0;  i < altAlCount;  i++)
+	    {
+	    if (i > 0)
+		dyStringAppendC(dyAltAlStr, ',');
+	    dyStringAppend(dyAltAlStr, altAls[i]);
+	    }
+	}
+    char vcfStartStr[64];
+    safef(vcfStartStr, sizeof(vcfStartStr), "%d", vcfStart);
+    vcfRow[0] = chrom;
+    vcfRow[1] = vcfStartStr;
+    vcfRow[2] = name;
+    vcfRow[3] = refAl;
+    vcfRow[4] = dyAltAlStr->string;
+    struct vcfRecord *rec = vcfRecordFromRow(vcff, vcfRow);
+    slAddHead(pRecList, rec);
+    }
+dyStringFree(&dyAltAlStr);
+// Check for IDs not found
+struct slName *notFoundIds = hashListNames(idHash);
+if (notFoundIds != NULL)
+    {
+    char *namesNotFound = firstNCommaSep(notFoundIds, 5);
+    struct dyString *dy = dyStringCreate("%d rs# IDs not found, e.g. %s",
+					 slCount(notFoundIds), namesNotFound);
+    slAddTail(pCommentList, slNameNew(dy->string));
+    freeMem(namesNotFound);
+    dyStringFree(&dy);
+    }
+slNameFreeList(&notFoundIds);
+}
+
+static struct vcfRecord *parseVariantIds(struct annoAssembly *assembly, char *variantIdText,
+					 struct slName **pCommentList)
+/* Return a sorted list of minimal vcfRecords (coords, name, ref and alt alleles)
+ * corresponding to each pasted variant. */
+{
+struct vcfRecord *recList = NULL;
+char *p = cloneString(variantIdText), *id;
+struct slName *rsIds = NULL, *unknownIds = NULL;
+subChar(p, ',', ' ');
+while ((id = nextWord(&p)) != NULL)
+    {
+    if (regexMatchNoCase(id, rsIdRegex))
+	slNameAddHead(&rsIds, id);
+    else
+	slNameAddHead(&unknownIds, id);
+    }
+if (unknownIds != NULL)
+    {
+    slReverse(&unknownIds);
+    char *firstUnknownIds = firstNCommaSep(unknownIds, 5);
+    struct dyString *dy = dyStringCreate("%d variant identifiers are unrecognized, e.g. %s",
+					 slCount(unknownIds), firstUnknownIds);
+    slAddTail(pCommentList, slNameNew(dy->string));
+    freeMem(firstUnknownIds);
+    dyStringFree(&dy);
+    }
+struct vcfFile *vcff = vcfFileNew();
+rsIdsToVcfRecords(assembly, rsIds, vcff, &recList, pCommentList);
+slSort(&recList, vcfRecordCmp);
+slNameFreeList(&rsIds);
+slNameFreeList(&unknownIds);
+return recList;
+}
+
+static void adjustRangeForVariants(struct vcfRecord *recList, char **pChrom,
+				   uint *pStart, uint *pEnd)
+/* Given a *sorted* list of VCF records, look at the first and last records and if
+ * they fall outside of the range, expand the range to include them. */
+{
+if (pChrom != NULL && *pChrom != NULL && recList != NULL)
+    {
+    // We have a non-empty sorted list of records, and search is not genome-wide;
+    // look at first and last variants to see if they're not already included in search range.
+    struct vcfRecord *first = recList, *last = recList;
+    while (last->next)
+	last = last->next;
+    if (differentString(*pChrom, first->chrom) || differentString(*pChrom, last->chrom))
+	{
+	// Variants span multiple chroms; force genome-wide search.
+	*pChrom = NULL;
+	*pStart = *pEnd = 0;
+	}
+    else
+	{
+	// Variant(s) are on the same chrom as search range; check starts and ends.
+	if (*pStart > first->chromEnd)
+	    *pStart = first->chromEnd;
+	if (*pEnd < last->chromStart)
+	    *pEnd = last->chromStart;
+	}
+    }
+}
+
+static struct annoStreamer *makeVariantIdStreamer(struct annoAssembly *assembly, int maxOutRows,
+						  char **pChrom, uint *pStart, uint *pEnd,
+						  struct slName **pCommentList)
+/* Translate user's pasted/uploaded variant IDs into minimal VCF if possible.
+ * Return a VCF streamer for those, and if the current search position is too narrow
+ * to include all of the variants, widen it as necessary. */
+{
+// Hash variant text to get trash filename.  Use if exists, otherwise build it.
+char *variantIds = cartString(cart, hgvaVariantIds);
+char *varFile = variantIdPath(assembly, variantIds);
+boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildVariantIds", FALSE);
+if (! fileExists(varFile) || forceRebuild)
+    {
+    struct vcfRecord *varList = parseVariantIds(assembly, variantIds, pCommentList);
+//#*** If no variants were recognized, we should probably show main page with a warning.
+    adjustRangeForVariants(varList, pChrom, pStart, pEnd);
+    FILE *f = mustOpen(varFile, "w");
+    writeMinimalVcfHeader(f, assembly->name);
+    struct vcfRecord *var;
+    for (var = varList;  var != NULL;  var = var->next)
+	writeMinimalVcfRow(f, var);
+    carefulClose(&f);
+    }
+return annoStreamVcfNew(varFile, FALSE, assembly, maxOutRows);
+}
+
 static struct trackDb *getVariantTrackDb(char *variantTrack)
 /* Get a tdb for the user's selected variant track, or warn and return NULL
  * (main page will be displayed) */
 {
 struct trackDb *varTdb = tdbForTrack(database, variantTrack, &fullTrackList);
 if (varTdb == NULL)
     {
     if (isHubTrack(variantTrack))
 	warn("Can't find hub track '%s'; try the \"check hub\" button in "
 	     "<A href=\"%s?%s&%s=%s\">Track Data Hubs</A>.",
 	     variantTrack,
 	     hgHubConnectName(), cartSidUrlString(cart), hgHubConnectCgiDestUrl, cgiScriptName());
     else
 	warn("Can't find tdb for variant track '%s'", variantTrack);
     }
@@ -1441,40 +1803,47 @@
 struct annoAssembly *assembly = getAnnoAssembly(database);
 
 char *geneTrack = cartString(cart, "hgva_geneTrack");
 struct trackDb *geneTdb = tdbForTrack(database, geneTrack, &fullTrackList);
 if (geneTdb == NULL)
     {
     warn("Can't find tdb for gene track %s", geneTrack);
     doUi();
     return;
     }
 
 int maxVarRows = cartUsualInt(cart, "hgva_variantLimit", 10);
 struct annoStreamer *primary = NULL;
 char *primaryLongLabel = NULL;
 char *variantTrack = cartString(cart, "hgva_variantTrack");
+struct slName *commentList = NULL;
 if (sameString(variantTrack, hgvaSampleVariants))
     {
     primary = makeSampleVariantsStreamer(assembly, geneTdb, maxVarRows);
     primaryLongLabel = hgvaSampleVariantsLabel;
     // Sample variants can't always be made within the currently selected position range,
     // so just for these, force search to be genome-wide.
     chrom = NULL;
     start = 0;
     end = 0;
     }
+else if (sameString(variantTrack, hgvaUseVariantIds))
+    {
+    // Override search position if cart position doesn't include all variants:
+    primary = makeVariantIdStreamer(assembly, maxVarRows, &chrom, &start, &end, &commentList);
+    primaryLongLabel = hgvaVariantIdsLabel;
+    }
 else
     {
     struct trackDb *varTdb = getVariantTrackDb(variantTrack);
     if (varTdb == NULL)
 	{
 	doUi();
 	return;
 	}
     primary = streamerFromTrack(assembly, varTdb->table, varTdb, chrom, maxVarRows);
     primaryLongLabel = varTdb->longLabel;
     }
 
 enum annoGratorOverlap geneOverlapRule = agoMustOverlap;
 struct annoGrator *gpVarGrator = gratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom,
 						   NO_MAXROWS, primary->asObj, geneOverlapRule);
@@ -1513,30 +1882,33 @@
 slReverse(&gratorList);
 
 if (doHtml)
     {
     webStart(cart, database, "Annotated Variants in VEP/HTML format");
     }
 else
     {
     // Undo the htmlPushEarlyHandlers() because after this point they make ugly text:
     popWarnHandler();
     popAbortHandler();
     textOpen();
     webStartText();
     }
 struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
+struct slName *comment;
+for (comment = commentList;  comment != NULL;  comment = comment->next)
+    vepOut->comment(vepOut, comment->name);
 if (chrom != NULL)
     annoGratorQuerySetRegion(query, chrom, start, end);
 annoGratorQueryExecute(query);
 annoGratorQueryFree(&query);
 
 if (doHtml)
     webEnd();
 else
     textOutClose(&compressPipeline);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 long enteredMainTime = clock1000();