b226cb3af001057dea4635de2d4c6c52f0ebe047
angie
  Sat Jun 20 14:34:08 2020 -0700
New scripts & vcf module for working with non-Nextstrain VCF and trees, e.g. Rob Lanfear's 40k sample build.  Updates to other VCF & tree utils.  sorta refs #25278, #25382

diff --git src/hg/utils/otto/nextstrainNcov/sampleLineage.py src/hg/utils/otto/nextstrainNcov/sampleLineage.py
index dbf43c3..d48e506 100755
--- src/hg/utils/otto/nextstrainNcov/sampleLineage.py
+++ src/hg/utils/otto/nextstrainNcov/sampleLineage.py
@@ -1,35 +1,35 @@
 #!/usr/bin/env python3
 
 import logging, argparse, sys
-import lineageColors, nextstrainVcf, utils, virusNames
+import lineageColors, vcf, utils, virusNames
 
 def main():
     parser = argparse.ArgumentParser(description="""
 Read sample names from sampleFile.
 Read sample IDs that are a concatenation of EPI ID, sample name and approximate date,
-for resolving sampleFile IDs and lineageFile IDs, from a Nextstrain VCF file.
+for resolving sampleFile IDs and lineageFile IDs, from a VCF file.
 Read lineage assignments from lineageFile.
 Write out 3 tab-sep columns:
 sample, lineage, lineageColor.
 """
     )
     parser.add_argument('sampleFile', help='File containing sample IDs')
-    parser.add_argument('vcfFile', help='VCF file derived from Nextstrain data')
+    parser.add_argument('vcfFile', help='VCF file with genotype columns for the sample samples')
     parser.add_argument('lineageFile', help='Two-column tab-sep file mapping sample to lineage')
     args = parser.parse_args()
 
     samples = utils.listFromFile(args.sampleFile)
-    (vcfSamples, vcfSampleClades) = nextstrainVcf.readVcfSamples(args.vcfFile)
+    vcfSamples = vcf.readSamples(args.vcfFile)
     idLookup = virusNames.makeIdLookup(vcfSamples)
     lineages = utils.dictFromFile(args.lineageFile)
     nsLineages = dict([ (virusNames.maybeLookupSeqName(name, idLookup), lin)
                         for name, lin in lineages.items() ])
     for sample in samples:
         nsSample = virusNames.maybeLookupSeqName(sample, idLookup)
         lineage = nsLineages.get(nsSample)
         if (not lineage):
             lineage = ''
         color = "#%06x" % (lineageColors.lineageToColor(lineage))
         print('\t'.join([sample, lineage, color]))
 
 main()