466fc186d05dd8e8bd27f91752bcb03d8edd63a6 angie Tue Jan 19 11:03:38 2021 -0800 Add try/except to prevent some encoding problem when writing varPaths from crashing the script. diff --git src/hg/utils/otto/nextstrainNcov/nextstrain.py src/hg/utils/otto/nextstrainNcov/nextstrain.py index 076c068..a452e00 100755 --- src/hg/utils/otto/nextstrainNcov/nextstrain.py +++ src/hg/utils/otto/nextstrainNcov/nextstrain.py @@ -644,32 +644,36 @@ apostropheSRe = re.compile("'s"); firstLetterRe = re.compile('(\w)\w+'); spacePunctRe = re.compile('\W'); def abbreviateLab(lab): """Lab names are very long and sometimes differ by punctuation or typos. Abbreviate for easier comparison.""" labAbbrev = apostropheSRe.sub('', lab) labAbbrev = firstLetterRe.sub(r'\1', labAbbrev, count=0) labAbbrev = spacePunctRe.sub('', labAbbrev, count=0) return labAbbrev with open('nextstrainSamples.varPaths', 'w') as outF: for sample in samples: lab = sample['lab'] labAbbrev = abbreviateLab(lab) + try: outF.write('\t'.join([sampleName(sample), sample['clade'], labAbbrev, lab, sample['varStr']]) + '\n'); + except: + print("Problem writing varPaths for sample '", sampleName(sample), "', varStr '", + sample['varStr']) # Narrow down variants to "informative" set (bi-allelic, each allele supported by # sufficient number of samples): minSamples = 2 discardedAlleles = [] blacklist = [] informativeVariants = [] for mv in mergedVars: pv, alts, altCounts, sampleAlleles, backMutSamples = mv pos, varNameMerged, ref, altStr = pv recurrentAlts = [] recurrentAltCounts = [] for alt, altCount in zip(alts, altCounts): if (altCount < minSamples):