83eb31f4ea1896449dce8576697627091c6ff313 angie Thu Jan 21 10:54:13 2021 -0800 Prevent excess warnings about varPaths problems. diff --git src/hg/utils/otto/nextstrainNcov/nextstrain.py src/hg/utils/otto/nextstrainNcov/nextstrain.py index a452e00..b25d3d6 100755 --- src/hg/utils/otto/nextstrainNcov/nextstrain.py +++ src/hg/utils/otto/nextstrainNcov/nextstrain.py @@ -641,39 +641,42 @@ # File with samples and their clades, labs and variant paths apostropheSRe = re.compile("'s"); firstLetterRe = re.compile('(\w)\w+'); spacePunctRe = re.compile('\W'); def abbreviateLab(lab): """Lab names are very long and sometimes differ by punctuation or typos. Abbreviate for easier comparison.""" labAbbrev = apostropheSRe.sub('', lab) labAbbrev = firstLetterRe.sub(r'\1', labAbbrev, count=0) labAbbrev = spacePunctRe.sub('', labAbbrev, count=0) return labAbbrev with open('nextstrainSamples.varPaths', 'w') as outF: + errCount = 0 for sample in samples: lab = sample['lab'] labAbbrev = abbreviateLab(lab) try: outF.write('\t'.join([sampleName(sample), sample['clade'], labAbbrev, lab, sample['varStr']]) + '\n'); except: + if (errCount == 0): print("Problem writing varPaths for sample '", sampleName(sample), "', varStr '", - sample['varStr']) + sample['varStr'], "'") + errCount += 1 # Narrow down variants to "informative" set (bi-allelic, each allele supported by # sufficient number of samples): minSamples = 2 discardedAlleles = [] blacklist = [] informativeVariants = [] for mv in mergedVars: pv, alts, altCounts, sampleAlleles, backMutSamples = mv pos, varNameMerged, ref, altStr = pv recurrentAlts = [] recurrentAltCounts = [] for alt, altCount in zip(alts, altCounts): if (altCount < minSamples):