c5026c9eeaff1077f67822e478031ef9552fbd47
lrnassar
  Tue Oct 22 17:35:33 2024 -0700
I made a bonehead move when I first made this validation and did not re-assign the variable. This meant that the md5sum values always were md5sum+file name, and since the file names are always different the otto job always ran. This fixes the validation to only run when there is new data. No RM.

diff --git src/hg/utils/otto/genCC/doGenCC.py src/hg/utils/otto/genCC/doGenCC.py
index 43642089..28f9942 100644
--- src/hg/utils/otto/genCC/doGenCC.py
+++ src/hg/utils/otto/genCC/doGenCC.py
@@ -205,33 +205,33 @@
                 chromStart = geneDic['txStart']
                 chromEnd = geneDic['txEnd']
                 strand = geneDic['strand']
                 outputHg19File.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (chrom,chromStart,chromEnd,
                                         "\t".join(line[3:6]),chromStart,chromEnd,"\t".join(line[8:])))
             except:
                 n+=1
                 print("No match for: "+geneSymbol)
     hg38GenCCbedFile.close()
     outputHg19File.close()
     print("hg19 genCC bed file completed. Total number of failed entries: "+str(n))
 
 def checkIfUpdateIsNeeded():
     bash("wget -q https://search.thegencc.org/download/action/submissions-export-tsv -O /hive/data/outside/otto/genCC/newSubmission.tsv")
     newMd5sum = bash("md5sum /hive/data/outside/otto/genCC/newSubmission.tsv")
-    newMd5sum.split("  ")[0]
+    newMd5sum = newMd5sum.split("  ")[0]
     oldMd5sum = bash("md5sum /hive/data/outside/otto/genCC/prevSubmission.tsv")
-    oldMd5sum.split("  ")[0]
+    oldMd5sum = oldMd5sum.split("  ")[0]
     if oldMd5sum != newMd5sum:
         return(True)
     else:
         return(False)
     
 if checkIfUpdateIsNeeded():
     date = str(datetime.now()).split(" ")[0]
     workDir = "/hive/data/outside/otto/genCC/"+date
     bash("mkdir -p "+workDir)
     hg19outPutFile = workDir+"/hg19genCC.bed"
     hg38outPutFile = workDir+"/hg38genCC.bed"
     bash("cp /hive/data/outside/otto/genCC/newSubmission.tsv "+workDir)
     genCCtsvFile = "/hive/data/outside/otto/genCC/newSubmission.tsv"
          
     buildFileHg38(genCCtsvFile,hg38outPutFile)