3640a4d6b3303a6bebc7c5b2fc5abcf7f4fae0b2 angie Wed Sep 28 11:56:00 2016 -0700 Partial support for changes in VCF4.2 and latest samtools mpileup output: - Tolerate 'Number=R' and new INFO attributes Source and Version - Tolerate mpileup's '<X>' alt (no alternate allele was observed) - The 4.3 spec includes '<*>' from gVCF, also meaning no alt al obsvd. - GT is no longer required; user's example has PL instead, so parse that into genotypes. - hgVai now annotates "variants" with <X> and <*> as no_sequence_alteration - annoFormatVep now uses html encoding for html output in various places so that "<X>" is displayed properly (custom track labels and various item names could also have undesirable characters). I am not encoding the extras' descriptions because those are internal and some have <a>'s. refs #15625 diff --git src/hg/lib/pgSnp.c src/hg/lib/pgSnp.c index 61a3f19..2412e6d 100644 --- src/hg/lib/pgSnp.c +++ src/hg/lib/pgSnp.c @@ -829,31 +829,34 @@ } struct pgSnp *pgSnpFromVcfRecord(struct vcfRecord *rec) /* Convert VCF rec to pgSnp; don't free rec->file (vcfFile) until * you're done with pgSnp because pgSnp points to rec->chrom. */ { struct dyString *dy = dyStringNew(0); struct pgSnp *pgs; AllocVar(pgs); pgs->chrom = rec->chrom; pgs->chromStart = rec->chromStart; pgs->chromEnd = rec->chromEnd; // Build up slash-separated allele string from rec->alleles, starting with ref allele: dyStringAppend(dy, rec->alleles[0]); int alCount = rec->alleleCount, i; -if (rec->alleleCount == 2 && sameString(rec->alleles[1], ".")) +if (rec->alleleCount == 2 && + (sameString(rec->alleles[1], ".") || + sameString(rec->alleles[1], "<X>") || + sameString(rec->alleles[1], "<*>"))) // ignore N/A alternate allele alCount = 1; else if (rec->alleleCount >= 2) { // append /-sep'd alternate alleles for (i = 1; i < rec->alleleCount; i++) dyStringPrintf(dy, "/%s", rec->alleles[i]); } pgs->name = cloneStringZ(dy->string, dy->stringSize+1); pgs->alleleCount = alCount; pgs->alleleFreq = alleleCountsFromVcfRecord(rec, alCount); // Build up comma-sep list... supposed to be per-allele quality scores but I think // the VCF spec only gives us one BQ... for the reference position? should ask. dyStringClear(dy); for (i = 0; i < rec->infoCount; i++)