b98e1d0ccffb3a4c1d69f6c69ab98a842840aa57
angie
  Fri Apr 24 13:58:33 2020 -0700
Adding VCF subtrack for David: filter to recurrent bi-allelic variants only.  refs #25188

diff --git src/hg/utils/otto/nextstrainNcov/nextstrain.py src/hg/utils/otto/nextstrainNcov/nextstrain.py
index 8269bd9..d22e03a 100755
--- src/hg/utils/otto/nextstrainNcov/nextstrain.py
+++ src/hg/utils/otto/nextstrainNcov/nextstrain.py
@@ -580,15 +580,48 @@
         outF.write(varName + '\n');
     outF.close()
 
 # bedGraph for parsimony scores and mutation counts
 with open('nextstrainParsimony.bedGraph', 'w') as outF:
     for ps in parsimonyScores:
         pos, score = ps
         outF.write('\t'.join(map(str, [ chrom, pos-1, pos, score ])) + '\n')
     outF.close()
 
 with open('nextstrainMutCounts.bedGraph', 'w') as outF:
     for ps in mutationCounts:
         pos, score = ps
         outF.write('\t'.join(map(str, [ chrom, pos-1, pos, score ])) + '\n')
     outF.close()
+
+# Informative-only VCF
+with open('nextstrainRecurrentBiallelic.vcf', 'w') as outF:
+    writeVcfHeaderExceptSamples(outF)
+    outF.write('\t'.join(sampleNames) + '\n')
+    for iv in informativeVariants:
+        pos, ref, alt = iv
+        varName = ref + str(pos) + alt
+        # Ignore any discarded variants at this position, but handle back-mutations if any,
+        # by calling mergeVariants on this and its back-mutation
+        backMutVarName = alt + str(pos) + ref
+        variants = [ [ pos, varName, ref, alt ] ]
+        if (variantCounts.get(backMutVarName)):
+            variants.append([ pos, backMutVarName, alt, ref ])
+        pv, alts, altCounts, sampleAlleles, backMutSamples = mergeVariants(variants)
+        if (len(alts) != 1):
+            warn('Expected exactly one alt from merging ' + varName + ' and ' + backMutVarName +
+                 ', but got [' + ', '.join(alts) + ']')
+        info = 'AC=' + str(altCounts[0])
+        info += ';AN=' + str(sampleCount)
+        aaChange = tallyAaChanges(varName)
+        if (len(aaChange)):
+            info += ';AACHANGE=' + aaChange
+        if (len(backMutSamples)):
+            info += ';BACKMUTS=' + ','.join(backMutSamples)
+        genotypes = []
+        for sample, alIx in zip(samples, sampleAlleles):
+            gt = str(alIx)
+            genotypes.append(gt + ':' + sample['clade'])
+        outF.write('\t'.join([ '\t'.join([ chrom, str(pos), varName, ref, alt,
+                                           '.', 'PASS', info, 'GT']),
+                               '\t'.join(genotypes) ]) + '\n')
+    outF.close()