3640a4d6b3303a6bebc7c5b2fc5abcf7f4fae0b2 angie Wed Sep 28 11:56:00 2016 -0700 Partial support for changes in VCF4.2 and latest samtools mpileup output: - Tolerate 'Number=R' and new INFO attributes Source and Version - Tolerate mpileup's '<X>' alt (no alternate allele was observed) - The 4.3 spec includes '<*>' from gVCF, also meaning no alt al obsvd. - GT is no longer required; user's example has PL instead, so parse that into genotypes. - hgVai now annotates "variants" with <X> and <*> as no_sequence_alteration - annoFormatVep now uses html encoding for html output in various places so that "<X>" is displayed properly (custom track labels and various item names could also have undesirable characters). I am not encoding the extras' descriptions because those are internal and some have <a>'s. refs #15625 diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c index 4eab84d..2d91199 100644 --- src/hg/lib/gpFx.c +++ src/hg/lib/gpFx.c @@ -975,42 +975,56 @@ } static void checkVariantList(struct variant *variant) // check to see that we either have one variant (possibly with multiple // alleles) or that if we have a list of variants, they only have // one allele a piece. { if (variant->next == NULL) // just one variant return; for(; variant; variant = variant->next) if (variant->numAlleles != 1) errAbort("gpFxPredEffect needs either 1 variant, or only 1 allele in all variants"); } +static struct gpFx *gpFxNoVariation(struct variant *variant, struct lm *lm) +/* Return a gpFx with SO term no_sequence_alteration, for VCF rows that aren't really variants. */ +{ +char *seq = NULL; +struct allele *allele; +for (allele = variant->alleles; allele != NULL; allele = allele->next) + if (allele->isReference) + { + seq = allele->sequence; + // Don't break out of the loop -- pick the last one we see because the first is likely + // the "real" reference allele, while the other(s) is something like "<X>" or "<*>". + } +return gpFxNew(seq, "", no_sequence_alteration, none, lm); +} + struct gpFx *gpFxPredEffect(struct variant *variant, struct genePred *pred, struct dnaSeq *transcriptSequence, struct lm *lm) // return the predicted effect(s) of a variation list on a genePred { struct gpFx *effectsList = NULL; // make sure we can deal with the variants that are coming in checkVariantList(variant); for (; variant != NULL; variant = variant->next) { - // If only the reference allele has been observed, skip it: - //#*** Some might like to keep variants e.g. in VCF output... - //#*** aha, Ensembl has requested a term for 'no change' from SONG. - //#*** Add that to soTerm when it exists... if (! hasAltAllele(variant->alleles)) - return NULL; - + effectsList = slCat(effectsList, gpFxNoVariation(variant, lm)); + else + { // check to see if SNP is up or downstream effectsList = slCat(effectsList, gpFxCheckUpDownstream(variant, pred, lm)); // check to see if SNP is in the transcript - effectsList = slCat(effectsList, gpFxCheckTranscript(variant, pred, transcriptSequence, lm)); + effectsList = slCat(effectsList, + gpFxCheckTranscript(variant, pred, transcriptSequence, lm)); + } } return effectsList; }