f78d6e76501646c8b8cc966103bafca896322509 angie Mon Oct 7 13:54:42 2013 -0700 Work in progress for #11460 (paste/upload variant input options...):adding an option for user to paste/upload variant identifiers which will be translated into a sorted list of vcfRecords. Currently we recognize only rs# IDs. I was considering adding dbVar IDs, but those could come from multiple sources (DGV, ClinVar, ISCA) so I'm not sure. Treating all symbolic/named alleles as deletions... non-ideal, but fortunately those are a small minority in dbSNP. Next: recognize HGVS IDs. The grander vision of #11460 includes accepting VEP input format and VCF, but I think those should be new SELECT options so we don't get into quagmire of guessing format. diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c index 54ad3e4..c7e4970 100644 --- src/hg/lib/gpFx.c +++ src/hg/lib/gpFx.c @@ -499,40 +499,46 @@ p[3] = '\0'; break; } p += 3; } } static char *gpFxModifyCodingSequence(char *oldCodingSeq, struct genePred *pred, int startInCds, int endInCds, struct allele *allele, int *retCdsBasesAdded, struct lm *lm) /* Return a new coding sequence that is oldCodingSeq with allele applied. */ { boolean isRc = (pred->strand[0] == '-'); char *newAlleleSeq = allele->sequence; int newAlLen = strlen(newAlleleSeq); -if (isRc) +if (! isAllNt(newAlleleSeq, newAlLen)) + { + // symbolic -- may be deletion or insertion, but we can't tell. :( + newAlleleSeq = ""; + newAlLen = 0; + } +if (isRc && newAlLen > 0) { - newAlleleSeq = lmCloneString(lm, allele->sequence); + newAlleleSeq = lmCloneString(lm, newAlleleSeq); reverseComplement(newAlleleSeq, newAlLen); } int variantSizeOnCds = endInCds - startInCds; if (variantSizeOnCds < 0) errAbort("gpFx: endInCds (%d) < startInCds (%d)", endInCds, startInCds); char *newCodingSeq = mergeAllele(oldCodingSeq, startInCds, variantSizeOnCds, - newAlleleSeq, allele->length, lm); + newAlleleSeq, newAlLen, lm); // If newCodingSequence has an early stop, truncate there: truncateAtStopCodon(newCodingSeq); int variantSizeOnRef = allele->variant->chromEnd - allele->variant->chromStart; if (retCdsBasesAdded) *retCdsBasesAdded = allele->length - variantSizeOnRef; return newCodingSeq; } static boolean isSafeFromNMD(int exonNum, struct variant *variant, struct genePred *pred) /* Return TRUE if variant in strand-corrected exonNum is in the region * of pred that would make it safe from Nonsense-Mediated Decay (NMD). * NMD is triggered by the presence of a stop codon that appears * before ~50bp before the end of the last exon. In other words, if * there's a stop codon with a detectable downstream splice junction, * translation is prevented -- see