3684310aca6ed1ad61468676c28a7dac37e4af59
chmalee
  Wed Jul 1 09:27:40 2020 -0700
Make clingen dosage  otto job more resilient to poorly formatted downloads

diff --git src/hg/utils/otto/clinGen/processClinGenDosage.py src/hg/utils/otto/clinGen/processClinGenDosage.py
index feb63e5..3563fec 100755
--- src/hg/utils/otto/clinGen/processClinGenDosage.py
+++ src/hg/utils/otto/clinGen/processClinGenDosage.py
@@ -128,43 +128,53 @@
     bed["chromEnd"] = chromEnd
     bed["name"] = name
     bed["score"] = score
     bed["strand"] = "."
     bed["thickStart"] = chromStart
     bed["thickEnd"] = chromEnd
     bed["itemRgb"] = getColor(bed, dosageType)
     bed["Size"] = chromEnd - chromStart
     extra = extraInfo[name]
     bed.update(extra)
     bed["_mouseOver"] = getMouseover(bed,dosageType)
     return bed
 
 def processClinGenDosage(inf, dosageType):
     global bedLines
+    lineCount = 1
     for line in inf:
         if line.startswith('#') or line.startswith('track') or line.startswith('browser'):
+            lineCount += 1
             continue
         trimmed = line.strip()
+        try:
             chrom, chromStart, chromEnd, name, score = trimmed.split("\t")
+        except ValueError:
+            sys.stderr.write("Error: ignoring ill formatted bed line %s:%d\n" % (inf.name, lineCount))
+            lineCount += 1
+            continue
         try:
             bedLines[name] = makeBedLine(chrom, int(chromStart), int(chromEnd), name, int(score), dosageType)
         except:
+            # error here comes from something to do with the associated gene_curation_list and not
+            # the dosage file itself
             if score.startswith("Not"):
                 bedLines[name] = makeBedLine(chrom, int(chromStart), int(chromEnd), name, -1, dosageType)
             else:
                 print(sys.exc_info()[0])
                 sys.stderr.write("bad input line:\n%s\n" % line)
                 sys.exit(1)
+        lineCount += 1
     dumpBedLines(dosageType)
 
 def main():
     args = setupCommandLine()
     if args.extraFile:
         parseExtraFile(args.extraFile, args.dosageType)
     if args.infile == "stdin":
         processClinGenDosage(sys.stdin,args.dosageType)
     else:
         with open(args.infile) as inf:
             processClinGenDosage(inf, args.dosageType)
 
 if __name__=="__main__":
     main()