src/hg/utils/otto/genCC/doGenCC.py 76d933ccf24bd1a2a360f9b6a437c2e8445c7583

76d933ccf24bd1a2a360f9b6a437c2e8445c7583
lrnassar
  Wed Oct 18 11:01:51 2023 -0700
Fixing up a small encoding bug in the GenCC otto job and clarification of the printed message. Refs #31795

diff --git src/hg/utils/otto/genCC/doGenCC.py src/hg/utils/otto/genCC/doGenCC.py
index eafee9e..a099d72 100644
--- src/hg/utils/otto/genCC/doGenCC.py
+++ src/hg/utils/otto/genCC/doGenCC.py
@@ -148,33 +148,33 @@
                         chromStart = geneDic['txStart']
                         chromEnd = geneDic['txEnd']
                         strand = geneDic['strand']
                         ensGene = ""
                         ensTranscript = geneDic['ensTranscript']
                         refSeqAccession = ""
                         outputHg38File.write("%s\t%s\t%s\t%s\t0\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (chrom,chromStart,chromEnd,genCCname,
                             strand,chromStart,chromEnd,classificationRgb,ensTranscript,ensGene,refSeqAccession,"\t".join(line)))
                     except:
                         n+=1
             else:
                 badItems.append(line)
 
     inputGenCCFile.close()
     outputHg38File.close()
-    print(str(len(badItems))+" lines were skipped in the file because they did not they have incorrect formatting. This is usually because they had newline and tab characters in the info column. Usually fewer than 60 items are skipped this way.\n\n")
+    print(str(len(badItems))+" lines were skipped in the file because they have incorrect formatting. This is usually because they had newline and tab characters in the info column. Usually fewer than 60 items are skipped this way. If the number is much greater, verify the script and update the estimate. The lines are printed below.\n\n")
     for item in badItems:
-        print(item)
+        print("\t".join(item).encode('utf-8'))
     print("\n\nhg38 genCC bed file completed. Total number of failed entries: "+str(n))
 
 def buildFileHg19(genCCfile,outPutFile):
     hg38GenCCbedFile = open(genCCfile,'r',encoding="utf-8")
     outputHg19File = open(outPutFile,'w',encoding='utf-8')
     n=0
     for line in hg38GenCCbedFile:
         line = line.rstrip()
         line = line.split("\t")
         geneSymbol = line[14]
         nmAccession = line[11]
         if nmAccession != "":
             try:
                 geneDic = fetchGeneInfoHg19(nmAccession,'ncbiRefSeq','hg19')
                 chrom = geneDic['chrom']