b98e1d0ccffb3a4c1d69f6c69ab98a842840aa57 angie Fri Apr 24 13:58:33 2020 -0700 Adding VCF subtrack for David: filter to recurrent bi-allelic variants only. refs #25188 diff --git src/hg/utils/otto/nextstrainNcov/nextstrain.py src/hg/utils/otto/nextstrainNcov/nextstrain.py index 8269bd9..d22e03a 100755 --- src/hg/utils/otto/nextstrainNcov/nextstrain.py +++ src/hg/utils/otto/nextstrainNcov/nextstrain.py @@ -580,15 +580,48 @@ outF.write(varName + '\n'); outF.close() # bedGraph for parsimony scores and mutation counts with open('nextstrainParsimony.bedGraph', 'w') as outF: for ps in parsimonyScores: pos, score = ps outF.write('\t'.join(map(str, [ chrom, pos-1, pos, score ])) + '\n') outF.close() with open('nextstrainMutCounts.bedGraph', 'w') as outF: for ps in mutationCounts: pos, score = ps outF.write('\t'.join(map(str, [ chrom, pos-1, pos, score ])) + '\n') outF.close() + +# Informative-only VCF +with open('nextstrainRecurrentBiallelic.vcf', 'w') as outF: + writeVcfHeaderExceptSamples(outF) + outF.write('\t'.join(sampleNames) + '\n') + for iv in informativeVariants: + pos, ref, alt = iv + varName = ref + str(pos) + alt + # Ignore any discarded variants at this position, but handle back-mutations if any, + # by calling mergeVariants on this and its back-mutation + backMutVarName = alt + str(pos) + ref + variants = [ [ pos, varName, ref, alt ] ] + if (variantCounts.get(backMutVarName)): + variants.append([ pos, backMutVarName, alt, ref ]) + pv, alts, altCounts, sampleAlleles, backMutSamples = mergeVariants(variants) + if (len(alts) != 1): + warn('Expected exactly one alt from merging ' + varName + ' and ' + backMutVarName + + ', but got [' + ', '.join(alts) + ']') + info = 'AC=' + str(altCounts[0]) + info += ';AN=' + str(sampleCount) + aaChange = tallyAaChanges(varName) + if (len(aaChange)): + info += ';AACHANGE=' + aaChange + if (len(backMutSamples)): + info += ';BACKMUTS=' + ','.join(backMutSamples) + genotypes = [] + for sample, alIx in zip(samples, sampleAlleles): + gt = str(alIx) + genotypes.append(gt + ':' + sample['clade']) + outF.write('\t'.join([ '\t'.join([ chrom, str(pos), varName, ref, alt, + '.', 'PASS', info, 'GT']), + '\t'.join(genotypes) ]) + '\n') + outF.close()