b226cb3af001057dea4635de2d4c6c52f0ebe047
angie
  Sat Jun 20 14:34:08 2020 -0700
New scripts & vcf module for working with non-Nextstrain VCF and trees, e.g. Rob Lanfear's 40k sample build.  Updates to other VCF & tree utils.  sorta refs #25278, #25382

diff --git src/hg/utils/otto/nextstrainNcov/nextstrainVcf.py src/hg/utils/otto/nextstrainNcov/nextstrainVcf.py
index d338368..bc94c1e 100644
--- src/hg/utils/otto/nextstrainNcov/nextstrainVcf.py
+++ src/hg/utils/otto/nextstrainNcov/nextstrainVcf.py
@@ -1,27 +1,27 @@
 # Utils for dealing with VCF generated by nextstrain.py, with a particular style of ID
 # and clades appearing in genotypes...
 
 from collections import defaultdict
 
-def readVcfSamples(vcfFile):
+def readVcfSampleClades(vcfFile):
     """Read VCF sample IDs from the #CHROM line, and parse out clades from the first row GT cols"""
     samples = []
     sampleClades = defaultdict()
     with open(vcfFile, 'r') as vcfF:
         line = vcfF.readline().strip()
         while (line):
             if (line.startswith('#CHROM')):
                 samples = line.split('\t')[9:]
             elif (not line.startswith('#')):
                 gts = line.split('\t')[9:]
                 if (len(gts) != len(samples)):
                     die("VCF file '%s' has %d samples but %d genotypes in first row" %
                         (vcfFile, len(samples), len(gts)));
                 for sample, gt in zip(samples, gts):
                     gtVal, clade = gt.split(':')
                     sampleClades[sample] = clade
                 break
             line = vcfF.readline().strip()
         vcfF.close()
     return samples, sampleClades