1395e00347b5e2aa8cdd2ad8039febd174fe951f
angie
  Thu May 14 14:39:34 2020 -0700
Fixing FORMAT column in per-clade and recurrent bi-allelic VCF output.  (was causing about a million silent calls to vcfFileErr).  refs #25188

diff --git src/hg/utils/otto/nextstrainNcov/nextstrain.py src/hg/utils/otto/nextstrainNcov/nextstrain.py
index 775fad5..08906aa 100755
--- src/hg/utils/otto/nextstrainNcov/nextstrain.py
+++ src/hg/utils/otto/nextstrainNcov/nextstrain.py
@@ -381,31 +381,31 @@
                     if (alIx > 0):
                         altCounts[alIx - 1] += 1
                         acTotal += 1
             if (acTotal > 0):
                 info = 'AC=' + ','.join(map(str, altCounts))
                 info += ';AN=' + str(cladeSampleCounts[cladeName])
                 aaChange = tallyAaChanges(varNameMerged)
                 if (len(aaChange)):
                     info += ';AACHANGE=' + aaChange
                 cladeBackMuts = [ sampleName for sampleName in backMutSamples
                                   if sampleName in cladeSampleNames[cladeName] ]
                 if (len(cladeBackMuts)):
                     info += ';BACKMUTS=' + ','.join(cladeBackMuts)
                 outV.write('\t'.join([ chrom,
                                        '\t'.join(map(str, pv)),
-                                       '\t'.join(['.', 'PASS', info, 'GT']),
+                                       '\t'.join(['.', 'PASS', info, 'GT:CLADE']),
                                        '\t'.join(genotypes) ]) + '\n')
 
 # BED+ file for clades
 with open('nextstrainClade.bed', 'w') as outC:
     for name, clade in clades.items():
         if (clade.get('thickStart')):
             outC.write('\t'.join(map(str,
                                      [ chrom, 0, 29903, name, 0, '.',
                                        clade['thickStart'], clade['thickEnd'], clade['color'],
                                        len(clade['varSizes']) + 2,
                                        '1,' + ','.join(map(str, clade['varSizes'])) + ',1,',
                                        '0,' + ','.join(map(str, clade['varStarts'])) + ',29902,',
                                        clade['varNames'],
                                        numDateToYmdStr(clade['dateInferred']),
                                        numDateToYmdStr(clade['dateConfMin']),
@@ -613,17 +613,17 @@
         if (len(alts) != 1):
             warn('Expected exactly one alt from merging ' + varName + ' and ' + backMutVarName +
                  ', but got [' + ', '.join(alts) + ']')
         info = 'AC=' + str(altCounts[0])
         info += ';AN=' + str(sampleCount)
         aaChange = tallyAaChanges(varName)
         if (len(aaChange)):
             info += ';AACHANGE=' + aaChange
         if (len(backMutSamples)):
             info += ';BACKMUTS=' + ','.join(backMutSamples)
         genotypes = []
         for sample, alIx in zip(samples, sampleAlleles):
             gt = str(alIx)
             genotypes.append(gt + ':' + sample['clade'])
         outF.write('\t'.join([ '\t'.join([ chrom, str(pos), varName, ref, alt,
-                                           '.', 'PASS', info, 'GT']),
+                                           '.', 'PASS', info, 'GT:CLADE']),
                                '\t'.join(genotypes) ]) + '\n')