d0dafe3247d2e3c5c892c498784e4065590df665 jnavarr5 Wed Apr 15 12:14:43 2026 -0700 Adding a flag to continue with the script if the item count is more than 10% (after QA approves), no redmine. diff --git src/hg/utils/otto/genCC/doGenCC.py src/hg/utils/otto/genCC/doGenCC.py index 645eca1de1f..a9bb5c9a1c3 100644 --- src/hg/utils/otto/genCC/doGenCC.py +++ src/hg/utils/otto/genCC/doGenCC.py @@ -1,24 +1,30 @@ #!/usr/bin/env python3 #Made otto by Lou 9/14/2023 +import argparse import subprocess import csv import re import sys from datetime import datetime +parser = argparse.ArgumentParser(description='Build and update GenCC track data.') +parser.add_argument('--force', action='store_true', + help='Continue even if item count difference is more than 10%%') +args = parser.parse_args() + def bash(cmd): """Run the cmd in bash subprocess""" try: rawBashOutput = subprocess.run(cmd, check=True, shell=True,\ stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT) bashStdoutt = rawBashOutput.stdout except subprocess.CalledProcessError as e: raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output)) return(bashStdoutt) def getLatestGencodeTable(db, pattern): """Find the latest GENCODE table matching a pattern like 'wgEncodeGencodeComp%' or 'wgEncodeGencodeComp%lift37'. Returns the table name with the highest version number.""" output = bash('hgsql -Ne "show tables like \''+pattern+'\'" '+db) tables = output.rstrip().split("\n") @@ -285,31 +291,35 @@ newMd5sum = newMd5sum.split(" ")[0] oldMd5sum = bash("md5sum /hive/data/outside/otto/genCC/prevSubmission.tsv") oldMd5sum = oldMd5sum.split(" ")[0] if oldMd5sum != newMd5sum: return(True) else: return(False) def checkItemCounts(oldBb, newBb): """Compare item counts between old and new bigBed files. Exit if difference > 10%.""" oldItemCount = bash('bigBedInfo '+oldBb+' | grep "itemCount"') oldItemCount = int(oldItemCount.rstrip().split("itemCount: ")[1].replace(",","")) newItemCount = bash('bigBedInfo '+newBb+' | grep "itemCount"') newItemCount = int(newItemCount.rstrip().split("itemCount: ")[1].replace(",","")) if abs(newItemCount - oldItemCount) > 0.1 * max(newItemCount, oldItemCount): - sys.exit("Item count difference >10% for "+newBb+": old="+str(oldItemCount)+" new="+str(newItemCount)) + msg = "Item count difference >10% for "+newBb+": old="+str(oldItemCount)+" new="+str(newItemCount) + if args.force: + print("WARNING:\n "+msg+"\n\n (continuing due to QA approval)") + else: + sys.exit(msg) print(oldBb+" old: "+str(oldItemCount)+" new: "+str(newItemCount)) if checkIfUpdateIsNeeded(): date = str(datetime.now()).split(" ")[0] workDir = "/hive/data/outside/otto/genCC/"+date bash("mkdir -p "+workDir) hg19outPutFile = workDir+"/hg19genCC.bed" hg38outPutFile = workDir+"/hg38genCC.bed" bash("cp /hive/data/outside/otto/genCC/newSubmission.tsv "+workDir) genCCtsvFile = "/hive/data/outside/otto/genCC/newSubmission.tsv" # Auto-detect latest GENCODE Comprehensive tables gencodeHg38 = getLatestGencodeTable('hg38', 'wgEncodeGencodeComp%') gencodeHg19 = getLatestGencodeTable('hg19', 'wgEncodeGencodeComp%lift37')