3684310aca6ed1ad61468676c28a7dac37e4af59 chmalee Wed Jul 1 09:27:40 2020 -0700 Make clingen dosage otto job more resilient to poorly formatted downloads diff --git src/hg/utils/otto/clinGen/processClinGenDosage.py src/hg/utils/otto/clinGen/processClinGenDosage.py index feb63e5..3563fec 100755 --- src/hg/utils/otto/clinGen/processClinGenDosage.py +++ src/hg/utils/otto/clinGen/processClinGenDosage.py @@ -128,43 +128,53 @@ bed["chromEnd"] = chromEnd bed["name"] = name bed["score"] = score bed["strand"] = "." bed["thickStart"] = chromStart bed["thickEnd"] = chromEnd bed["itemRgb"] = getColor(bed, dosageType) bed["Size"] = chromEnd - chromStart extra = extraInfo[name] bed.update(extra) bed["_mouseOver"] = getMouseover(bed,dosageType) return bed def processClinGenDosage(inf, dosageType): global bedLines + lineCount = 1 for line in inf: if line.startswith('#') or line.startswith('track') or line.startswith('browser'): + lineCount += 1 continue trimmed = line.strip() + try: chrom, chromStart, chromEnd, name, score = trimmed.split("\t") + except ValueError: + sys.stderr.write("Error: ignoring ill formatted bed line %s:%d\n" % (inf.name, lineCount)) + lineCount += 1 + continue try: bedLines[name] = makeBedLine(chrom, int(chromStart), int(chromEnd), name, int(score), dosageType) except: + # error here comes from something to do with the associated gene_curation_list and not + # the dosage file itself if score.startswith("Not"): bedLines[name] = makeBedLine(chrom, int(chromStart), int(chromEnd), name, -1, dosageType) else: print(sys.exc_info()[0]) sys.stderr.write("bad input line:\n%s\n" % line) sys.exit(1) + lineCount += 1 dumpBedLines(dosageType) def main(): args = setupCommandLine() if args.extraFile: parseExtraFile(args.extraFile, args.dosageType) if args.infile == "stdin": processClinGenDosage(sys.stdin,args.dosageType) else: with open(args.infile) as inf: processClinGenDosage(inf, args.dosageType) if __name__=="__main__": main()