b226cb3af001057dea4635de2d4c6c52f0ebe047 angie Sat Jun 20 14:34:08 2020 -0700 New scripts & vcf module for working with non-Nextstrain VCF and trees, e.g. Rob Lanfear's 40k sample build. Updates to other VCF & tree utils. sorta refs #25278, #25382 diff --git src/hg/utils/otto/nextstrainNcov/sampleCladeLineage.py src/hg/utils/otto/nextstrainNcov/sampleCladeLineage.py index e3459fa..52ca0e0 100755 --- src/hg/utils/otto/nextstrainNcov/sampleCladeLineage.py +++ src/hg/utils/otto/nextstrainNcov/sampleCladeLineage.py @@ -2,27 +2,27 @@ import logging, argparse, sys import nextstrainVcf, utils, virusNames def main(): parser = argparse.ArgumentParser(description=""" Read samples and clade assignments from a Nextstrain VCF file. Read lineage assignments from lineageFile. Write out 3 tab-sep columns: NS sample ID, clade, lineage. """ ) parser.add_argument('vcfFile', help='VCF file derived from Nextstrain data') parser.add_argument('lineageFile', help='Two-column tab-sep file mapping sample to lineage') args = parser.parse_args() - (vcfSamples, vcfSampleClades) = nextstrainVcf.readVcfSamples(args.vcfFile) + (vcfSamples, vcfSampleClades) = nextstrainVcf.readVcfSampleClades(args.vcfFile) idLookup = virusNames.makeIdLookup(vcfSamples) lineages = utils.dictFromFile(args.lineageFile) nsLineages = dict([ (virusNames.maybeLookupSeqName(name, idLookup), lin) for name, lin in lineages.items() ]) for sample, clade in vcfSampleClades.items(): lineage = nsLineages.get(sample) if (not lineage): lineage = '' print('\t'.join([sample, clade, lineage])) main()