src/hg/utils/otto/userRequests/ottoRequestPush.py 481b82eaafcf77f9b82bce0159ff84e9284c3325

481b82eaafcf77f9b82bce0159ff84e9284c3325
hiram
  Tue Jun 2 10:36:03 2026 -0700
correctly capturing any stderr or stdout and only send it to cron stderr in case of error refs #31811

diff --git src/hg/utils/otto/userRequests/ottoRequestPush.py src/hg/utils/otto/userRequests/ottoRequestPush.py
index a5ee51f633e..748797e7499 100755
--- src/hg/utils/otto/userRequests/ottoRequestPush.py
+++ src/hg/utils/otto/userRequests/ottoRequestPush.py
@@ -7,30 +7,32 @@
 genArkMakeCommands sequence, rsyncs the UCSC-native .over.chain.gz to
 both hgdownload hosts, and advances each request's status (6 on full
 success, 7 on rsync failure, stays at 5 on clade-side failure).
 
 Differences from ottoRequestPush.py: all shared helpers live in
 ottoLib.py.  Push-specific helpers (pendingRequests, mark*,
 pushUcscChain) and main() stay here.
 
 This is a parallel script for review/cutover - the live cron continues
 to invoke ottoRequestPush.py.
 """
 
 import os
 import subprocess
 import sys
+import tempfile
+import atexit
 
 import ottoLib
 
 scriptDir = os.path.dirname(os.path.abspath(__file__))
 # share the live cron's lock so the two scripts cannot run concurrently
 # in the same cladeAsmHub directories
 lockPath = os.path.join(scriptDir, "ottoRequestPush.lock")
 
 # UCSC native .over.chain.gz files get rsync'd to both hgdownload hosts.
 pushUser = "qateam"
 pushHosts = ["hgdownload1.soe.ucsc.edu", "hgdownload3.gi.ucsc.edu"]
 
 
 def pendingRequests():
     """Status=5 liftOver requests as [(id, fromDb, toDb), ...]."""
@@ -90,30 +92,50 @@
             print("# ERROR: pushUcscChain: mkdir failed on %s: %s"
                   % (host, result.stderr.strip()), file=sys.stderr)
             return False
         result = subprocess.run(
             ["rsync", "-avL", src, "%s:%s" % (target, dstFile)],
             capture_output=True, text=True,
         )
         if result.returncode != 0:
             print("# ERROR: pushUcscChain: rsync to %s failed: %s"
                   % (host, result.stderr.strip()), file=sys.stderr)
             return False
     return True
 
 
 def main():
+    # Set up temporary log file for capturing all output
+    pid = os.getpid()
+    logFile = f"/dev/shm/ottoPush.{pid}.txt"
+
+    # Save original stdout/stderr for potential error reporting
+    originalStderr = sys.stderr
+
+    # Ensure cleanup happens even if script is killed
+    def cleanup():
+        if os.path.exists(logFile):
+            os.remove(logFile)
+
+    atexit.register(cleanup)
+
+    try:
+        # Redirect stdout and stderr to the log file
+        with open(logFile, 'w') as log:
+            sys.stdout = log
+            sys.stderr = log
+
             lockFh = ottoLib.acquireSingletonLock(lockPath)  # noqa: F841
             requests = pendingRequests()
             if not requests:
                 return
             dbs = set()
             for _, fromDb, toDb in requests:
                 dbs.update((fromDb, toDb))
             accessions = {db for db in dbs if ottoLib.gcPattern.match(db)}
             dbDbClades = ottoLib.loadDbDbClades()
             genarkInfo = ottoLib.lookupGenark(accessions)
             grouped = ottoLib.groupByClade(dbs, dbDbClades, genarkInfo)
 
             # bring the otto kent tree up to date before any cladeAsmHub make
             if not ottoLib.gitPullKentTree():
                 sys.exit(1)
@@ -173,32 +195,70 @@
                     if not pushUcscChain(target, query):
                         pushFailedDirs.append("%s -> %s" % (target, query))
                         pushOk = False
                         break
                 if pushOk:
                     completedIds.append(reqId)
                 else:
                     failedIds.append(reqId)
                     pushFailures.append((reqId, fromDb, toDb, pushFailedDirs))
 
             markComplete(completedIds)
             markFailed(failedIds)
 
             if cladeFailures:
                 print("# the following request(s) stay at status=5 due to failed "
-              "clade pushes:", file=sys.stderr)
+                      "clade pushes:")
                 for reqId, fromDb, toDb, badClades in cladeFailures:
                     print("#   id=%d %s -> %s (failed clade(s): %s)"
-                  % (reqId, fromDb, toDb, ", ".join(badClades)),
-                  file=sys.stderr)
+                          % (reqId, fromDb, toDb, ", ".join(badClades)))
 
             if pushFailures:
                 print("# the following request(s) set to status=7 due to rsync "
-              "failures:", file=sys.stderr)
+                      "failures:")
                 for reqId, fromDb, toDb, dirs in pushFailures:
                     print("#   id=%d %s -> %s (failed: %s)"
-                  % (reqId, fromDb, toDb, "; ".join(dirs)),
-                  file=sys.stderr)
+                          % (reqId, fromDb, toDb, "; ".join(dirs)))
+
+            # Restore stdout/stderr before potential exit
+            sys.stdout = sys.__stdout__
+            sys.stderr = sys.__stderr__
+
+    except Exception as e:
+        # Restore stdout/stderr first
+        sys.stdout = sys.__stdout__
+        sys.stderr = originalStderr
+
+        # Print the captured log to stderr for cron visibility
+        if os.path.exists(logFile):
+            with open(logFile, 'r') as log:
+                for line in log:
+                    print(line, end='', file=originalStderr)
+
+        # Print the exception that caused the failure
+        print(f"# FATAL ERROR: {e}", file=originalStderr)
+        sys.exit(1)
+
+    except SystemExit as e:
+        # Handle sys.exit() calls - restore streams first
+        sys.stdout = sys.__stdout__
+        sys.stderr = originalStderr
+
+        # If exit code is non-zero, print the log for debugging
+        if e.code != 0:
+            if os.path.exists(logFile):
+                with open(logFile, 'r') as log:
+                    for line in log:
+                        print(line, end='', file=originalStderr)
+
+        # Re-raise the SystemExit
+        raise
+
+    finally:
+        # Always restore streams and cleanup
+        sys.stdout = sys.__stdout__
+        sys.stderr = sys.__stderr__
+        cleanup()
 
 
 if __name__ == "__main__":
     main()