f78d6e76501646c8b8cc966103bafca896322509
angie
  Mon Oct 7 13:54:42 2013 -0700
Work in progress for #11460 (paste/upload variant input options...):adding an option for user to paste/upload variant identifiers which
will be translated into a sorted list of vcfRecords.

Currently we recognize only rs# IDs.  I was considering adding
dbVar IDs, but those could come from multiple sources (DGV, ClinVar,
ISCA) so I'm not sure.

Treating all symbolic/named alleles as deletions... non-ideal,
but fortunately those are a small minority in dbSNP.

Next: recognize HGVS IDs.

The grander vision of #11460 includes accepting VEP input format
and VCF, but I think those should be new SELECT options so we don't
get into quagmire of guessing format.

diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c
index 54ad3e4..c7e4970 100644
--- src/hg/lib/gpFx.c
+++ src/hg/lib/gpFx.c
@@ -499,40 +499,46 @@
 	p[3] = '\0';
 	break;
 	}
     p += 3;
     }
 }
 
 static char *gpFxModifyCodingSequence(char *oldCodingSeq, struct genePred *pred,
 				      int startInCds, int endInCds, struct allele *allele,
 				      int *retCdsBasesAdded, struct lm *lm)
 /* Return a new coding sequence that is oldCodingSeq with allele applied. */
 {
 boolean isRc = (pred->strand[0] == '-');
 char *newAlleleSeq = allele->sequence;
 int newAlLen = strlen(newAlleleSeq);
-if (isRc)
+if (! isAllNt(newAlleleSeq, newAlLen))
+    {
+    // symbolic -- may be deletion or insertion, but we can't tell. :(
+    newAlleleSeq = "";
+    newAlLen = 0;
+    }
+if (isRc && newAlLen > 0)
     {
-    newAlleleSeq = lmCloneString(lm, allele->sequence);
+    newAlleleSeq = lmCloneString(lm, newAlleleSeq);
     reverseComplement(newAlleleSeq, newAlLen);
     }
 int variantSizeOnCds = endInCds - startInCds;
 if (variantSizeOnCds < 0)
     errAbort("gpFx: endInCds (%d) < startInCds (%d)", endInCds, startInCds);
 char *newCodingSeq = mergeAllele(oldCodingSeq, startInCds, variantSizeOnCds,
-				 newAlleleSeq, allele->length, lm);
+				 newAlleleSeq, newAlLen, lm);
 // If newCodingSequence has an early stop, truncate there:
 truncateAtStopCodon(newCodingSeq);
 int variantSizeOnRef = allele->variant->chromEnd - allele->variant->chromStart;
 if (retCdsBasesAdded)
     *retCdsBasesAdded = allele->length - variantSizeOnRef;
 return newCodingSeq;
 }
 
 static boolean isSafeFromNMD(int exonNum, struct variant *variant, struct genePred *pred)
 /* Return TRUE if variant in strand-corrected exonNum is in the region
  * of pred that would make it safe from Nonsense-Mediated Decay (NMD).
  * NMD is triggered by the presence of a stop codon that appears
  * before ~50bp before the end of the last exon.  In other words, if
  * there's a stop codon with a detectable downstream splice junction,
  * translation is prevented -- see