481b82eaafcf77f9b82bce0159ff84e9284c3325 hiram Tue Jun 2 10:36:03 2026 -0700 correctly capturing any stderr or stdout and only send it to cron stderr in case of error refs #31811 diff --git src/hg/utils/otto/userRequests/ottoRequestPush.py src/hg/utils/otto/userRequests/ottoRequestPush.py index a5ee51f633e..748797e7499 100755 --- src/hg/utils/otto/userRequests/ottoRequestPush.py +++ src/hg/utils/otto/userRequests/ottoRequestPush.py @@ -7,30 +7,32 @@ genArkMakeCommands sequence, rsyncs the UCSC-native .over.chain.gz to both hgdownload hosts, and advances each request's status (6 on full success, 7 on rsync failure, stays at 5 on clade-side failure). Differences from ottoRequestPush.py: all shared helpers live in ottoLib.py. Push-specific helpers (pendingRequests, mark*, pushUcscChain) and main() stay here. This is a parallel script for review/cutover - the live cron continues to invoke ottoRequestPush.py. """ import os import subprocess import sys +import tempfile +import atexit import ottoLib scriptDir = os.path.dirname(os.path.abspath(__file__)) # share the live cron's lock so the two scripts cannot run concurrently # in the same cladeAsmHub directories lockPath = os.path.join(scriptDir, "ottoRequestPush.lock") # UCSC native .over.chain.gz files get rsync'd to both hgdownload hosts. pushUser = "qateam" pushHosts = ["hgdownload1.soe.ucsc.edu", "hgdownload3.gi.ucsc.edu"] def pendingRequests(): """Status=5 liftOver requests as [(id, fromDb, toDb), ...].""" @@ -90,30 +92,50 @@ print("# ERROR: pushUcscChain: mkdir failed on %s: %s" % (host, result.stderr.strip()), file=sys.stderr) return False result = subprocess.run( ["rsync", "-avL", src, "%s:%s" % (target, dstFile)], capture_output=True, text=True, ) if result.returncode != 0: print("# ERROR: pushUcscChain: rsync to %s failed: %s" % (host, result.stderr.strip()), file=sys.stderr) return False return True def main(): + # Set up temporary log file for capturing all output + pid = os.getpid() + logFile = f"/dev/shm/ottoPush.{pid}.txt" + + # Save original stdout/stderr for potential error reporting + originalStderr = sys.stderr + + # Ensure cleanup happens even if script is killed + def cleanup(): + if os.path.exists(logFile): + os.remove(logFile) + + atexit.register(cleanup) + + try: + # Redirect stdout and stderr to the log file + with open(logFile, 'w') as log: + sys.stdout = log + sys.stderr = log + lockFh = ottoLib.acquireSingletonLock(lockPath) # noqa: F841 requests = pendingRequests() if not requests: return dbs = set() for _, fromDb, toDb in requests: dbs.update((fromDb, toDb)) accessions = {db for db in dbs if ottoLib.gcPattern.match(db)} dbDbClades = ottoLib.loadDbDbClades() genarkInfo = ottoLib.lookupGenark(accessions) grouped = ottoLib.groupByClade(dbs, dbDbClades, genarkInfo) # bring the otto kent tree up to date before any cladeAsmHub make if not ottoLib.gitPullKentTree(): sys.exit(1) @@ -173,32 +195,70 @@ if not pushUcscChain(target, query): pushFailedDirs.append("%s -> %s" % (target, query)) pushOk = False break if pushOk: completedIds.append(reqId) else: failedIds.append(reqId) pushFailures.append((reqId, fromDb, toDb, pushFailedDirs)) markComplete(completedIds) markFailed(failedIds) if cladeFailures: print("# the following request(s) stay at status=5 due to failed " - "clade pushes:", file=sys.stderr) + "clade pushes:") for reqId, fromDb, toDb, badClades in cladeFailures: print("# id=%d %s -> %s (failed clade(s): %s)" - % (reqId, fromDb, toDb, ", ".join(badClades)), - file=sys.stderr) + % (reqId, fromDb, toDb, ", ".join(badClades))) if pushFailures: print("# the following request(s) set to status=7 due to rsync " - "failures:", file=sys.stderr) + "failures:") for reqId, fromDb, toDb, dirs in pushFailures: print("# id=%d %s -> %s (failed: %s)" - % (reqId, fromDb, toDb, "; ".join(dirs)), - file=sys.stderr) + % (reqId, fromDb, toDb, "; ".join(dirs))) + + # Restore stdout/stderr before potential exit + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + + except Exception as e: + # Restore stdout/stderr first + sys.stdout = sys.__stdout__ + sys.stderr = originalStderr + + # Print the captured log to stderr for cron visibility + if os.path.exists(logFile): + with open(logFile, 'r') as log: + for line in log: + print(line, end='', file=originalStderr) + + # Print the exception that caused the failure + print(f"# FATAL ERROR: {e}", file=originalStderr) + sys.exit(1) + + except SystemExit as e: + # Handle sys.exit() calls - restore streams first + sys.stdout = sys.__stdout__ + sys.stderr = originalStderr + + # If exit code is non-zero, print the log for debugging + if e.code != 0: + if os.path.exists(logFile): + with open(logFile, 'r') as log: + for line in log: + print(line, end='', file=originalStderr) + + # Re-raise the SystemExit + raise + + finally: + # Always restore streams and cleanup + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + cleanup() if __name__ == "__main__": main()