src/hg/utils/otto/genCC/doGenCC.py 409b76b74d5e20204f44a74ef250aca5b7ca61db

409b76b74d5e20204f44a74ef250aca5b7ca61db
lrnassar
  Wed May 22 04:24:46 2024 -0700
Commenting out the part of genCC otto that prints out bad format lines, it causes a lot of spam and is not accionable unless the number of entries suddenly explodes. No RM.

diff --git src/hg/utils/otto/genCC/doGenCC.py src/hg/utils/otto/genCC/doGenCC.py
index a099d72..43642089 100644
--- src/hg/utils/otto/genCC/doGenCC.py
+++ src/hg/utils/otto/genCC/doGenCC.py
@@ -149,32 +149,33 @@
                         chromEnd = geneDic['txEnd']
                         strand = geneDic['strand']
                         ensGene = ""
                         ensTranscript = geneDic['ensTranscript']
                         refSeqAccession = ""
                         outputHg38File.write("%s\t%s\t%s\t%s\t0\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (chrom,chromStart,chromEnd,genCCname,
                             strand,chromStart,chromEnd,classificationRgb,ensTranscript,ensGene,refSeqAccession,"\t".join(line)))
                     except:
                         n+=1
             else:
                 badItems.append(line)
 
     inputGenCCFile.close()
     outputHg38File.close()
     print(str(len(badItems))+" lines were skipped in the file because they have incorrect formatting. This is usually because they had newline and tab characters in the info column. Usually fewer than 60 items are skipped this way. If the number is much greater, verify the script and update the estimate. The lines are printed below.\n\n")
-    for item in badItems:
-        print("\t".join(item).encode('utf-8'))
+    print("Note: This part of the code was commented out to reduce spam. Uncomment to see the lines.")
+    #for item in badItems:
+    #    print("\t".join(item).encode('utf-8'))
     print("\n\nhg38 genCC bed file completed. Total number of failed entries: "+str(n))
 
 def buildFileHg19(genCCfile,outPutFile):
     hg38GenCCbedFile = open(genCCfile,'r',encoding="utf-8")
     outputHg19File = open(outPutFile,'w',encoding='utf-8')
     n=0
     for line in hg38GenCCbedFile:
         line = line.rstrip()
         line = line.split("\t")
         geneSymbol = line[14]
         nmAccession = line[11]
         if nmAccession != "":
             try:
                 geneDic = fetchGeneInfoHg19(nmAccession,'ncbiRefSeq','hg19')
                 chrom = geneDic['chrom']