d0dafe3247d2e3c5c892c498784e4065590df665
jnavarr5
  Wed Apr 15 12:14:43 2026 -0700
Adding a flag to continue with the script if the item count is more than 10% (after QA approves), no redmine.

diff --git src/hg/utils/otto/genCC/doGenCC.py src/hg/utils/otto/genCC/doGenCC.py
index 645eca1de1f..a9bb5c9a1c3 100644
--- src/hg/utils/otto/genCC/doGenCC.py
+++ src/hg/utils/otto/genCC/doGenCC.py
@@ -1,24 +1,30 @@
 #!/usr/bin/env python3
 #Made otto by Lou 9/14/2023
 
+import argparse
 import subprocess
 import csv
 import re
 import sys
 from datetime import datetime
 
+parser = argparse.ArgumentParser(description='Build and update GenCC track data.')
+parser.add_argument('--force', action='store_true',
+                    help='Continue even if item count difference is more than 10%%')
+args = parser.parse_args()
+
 def bash(cmd):
     """Run the cmd in bash subprocess"""
     try:
         rawBashOutput = subprocess.run(cmd, check=True, shell=True,\
                                        stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT)
         bashStdoutt = rawBashOutput.stdout
     except subprocess.CalledProcessError as e:
         raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
     return(bashStdoutt)
 
 def getLatestGencodeTable(db, pattern):
     """Find the latest GENCODE table matching a pattern like 'wgEncodeGencodeComp%' or
     'wgEncodeGencodeComp%lift37'. Returns the table name with the highest version number."""
     output = bash('hgsql -Ne "show tables like \''+pattern+'\'" '+db)
     tables = output.rstrip().split("\n")
@@ -285,31 +291,35 @@
     newMd5sum = newMd5sum.split("  ")[0]
     oldMd5sum = bash("md5sum /hive/data/outside/otto/genCC/prevSubmission.tsv")
     oldMd5sum = oldMd5sum.split("  ")[0]
     if oldMd5sum != newMd5sum:
         return(True)
     else:
         return(False)
 
 def checkItemCounts(oldBb, newBb):
     """Compare item counts between old and new bigBed files. Exit if difference > 10%."""
     oldItemCount = bash('bigBedInfo '+oldBb+' | grep "itemCount"')
     oldItemCount = int(oldItemCount.rstrip().split("itemCount: ")[1].replace(",",""))
     newItemCount = bash('bigBedInfo '+newBb+' | grep "itemCount"')
     newItemCount = int(newItemCount.rstrip().split("itemCount: ")[1].replace(",",""))
     if abs(newItemCount - oldItemCount) > 0.1 * max(newItemCount, oldItemCount):
-        sys.exit("Item count difference >10% for "+newBb+": old="+str(oldItemCount)+" new="+str(newItemCount))
+        msg = "Item count difference >10% for "+newBb+": old="+str(oldItemCount)+" new="+str(newItemCount)
+        if args.force:
+            print("WARNING:\n "+msg+"\n\n (continuing due to QA approval)")
+        else:
+            sys.exit(msg)
     print(oldBb+" old: "+str(oldItemCount)+" new: "+str(newItemCount))
 
 if checkIfUpdateIsNeeded():
     date = str(datetime.now()).split(" ")[0]
     workDir = "/hive/data/outside/otto/genCC/"+date
     bash("mkdir -p "+workDir)
     hg19outPutFile = workDir+"/hg19genCC.bed"
     hg38outPutFile = workDir+"/hg38genCC.bed"
     bash("cp /hive/data/outside/otto/genCC/newSubmission.tsv "+workDir)
     genCCtsvFile = "/hive/data/outside/otto/genCC/newSubmission.tsv"
 
     # Auto-detect latest GENCODE Comprehensive tables
     gencodeHg38 = getLatestGencodeTable('hg38', 'wgEncodeGencodeComp%')
     gencodeHg19 = getLatestGencodeTable('hg19', 'wgEncodeGencodeComp%lift37')