b226cb3af001057dea4635de2d4c6c52f0ebe047 angie Sat Jun 20 14:34:08 2020 -0700 New scripts & vcf module for working with non-Nextstrain VCF and trees, e.g. Rob Lanfear's 40k sample build. Updates to other VCF & tree utils. sorta refs #25278, #25382 diff --git src/hg/utils/otto/nextstrainNcov/sampleLineage.py src/hg/utils/otto/nextstrainNcov/sampleLineage.py index dbf43c3..d48e506 100755 --- src/hg/utils/otto/nextstrainNcov/sampleLineage.py +++ src/hg/utils/otto/nextstrainNcov/sampleLineage.py @@ -1,35 +1,35 @@ #!/usr/bin/env python3 import logging, argparse, sys -import lineageColors, nextstrainVcf, utils, virusNames +import lineageColors, vcf, utils, virusNames def main(): parser = argparse.ArgumentParser(description=""" Read sample names from sampleFile. Read sample IDs that are a concatenation of EPI ID, sample name and approximate date, -for resolving sampleFile IDs and lineageFile IDs, from a Nextstrain VCF file. +for resolving sampleFile IDs and lineageFile IDs, from a VCF file. Read lineage assignments from lineageFile. Write out 3 tab-sep columns: sample, lineage, lineageColor. """ ) parser.add_argument('sampleFile', help='File containing sample IDs') - parser.add_argument('vcfFile', help='VCF file derived from Nextstrain data') + parser.add_argument('vcfFile', help='VCF file with genotype columns for the sample samples') parser.add_argument('lineageFile', help='Two-column tab-sep file mapping sample to lineage') args = parser.parse_args() samples = utils.listFromFile(args.sampleFile) - (vcfSamples, vcfSampleClades) = nextstrainVcf.readVcfSamples(args.vcfFile) + vcfSamples = vcf.readSamples(args.vcfFile) idLookup = virusNames.makeIdLookup(vcfSamples) lineages = utils.dictFromFile(args.lineageFile) nsLineages = dict([ (virusNames.maybeLookupSeqName(name, idLookup), lin) for name, lin in lineages.items() ]) for sample in samples: nsSample = virusNames.maybeLookupSeqName(sample, idLookup) lineage = nsLineages.get(nsSample) if (not lineage): lineage = '' color = "#%06x" % (lineageColors.lineageToColor(lineage)) print('\t'.join([sample, lineage, color])) main()