705b694649940189cdb66af57560c97a0cd606f7
lrnassar
  Fri May 15 14:42:52 2026 -0700
Three small rtsUpdate fixes uncovered during first live test on Non_Coding_SNVs_hg38: line-buffer stdout so git diff output interleaves correctly with surrounding prints, replace deprecated datetime.utcnow() with timezone-aware datetime.now(datetime.timezone.utc), and print a trailing newline after confirm() so the post-prompt status message doesn't land on the same line as the prompt. refs #32768

diff --git src/hg/utils/rts/rtsUpdate src/hg/utils/rts/rtsUpdate
index 4c634089494..3213c3f480b 100755
--- src/hg/utils/rts/rtsUpdate
+++ src/hg/utils/rts/rtsUpdate
@@ -1,520 +1,528 @@
 #!/usr/bin/env python3
 # rtsUpdate -- propagate Recommended Track Set sessions from a curator
 # session into the View user across hgwdev -> hgwbeta -> genome-centdb.
 # Refs #32768, #34907.
 
 import argparse
 import datetime
 import getpass
 import pathlib
 import re
 import subprocess
 import sys
 import tempfile
 import time
 from urllib.parse import urlparse
 
 HOME = pathlib.Path.home()
 RTS_DIR = HOME / "kent/src/hg/utils/rts"
 INC_DIR = HOME / "kent/src/hg/htdocs/inc"
 LOGFILE = RTS_DIR / "rtsUpdate.log"
 VERBOTEN = RTS_DIR / "verboten.lst"
 
 TARGET_USER = "View"
 
 HOSTS = {
     "dev":  ("hgwdev",        "hgcentraltest"),
     "beta": ("hgwbeta",       "hgcentralbeta"),
     "rr":   ("genome-centdb", "hgcentral"),
 }
 
 VALIDATE_URL = {
     "dev":  "https://hgwdev.gi.ucsc.edu",
     "beta": "https://hgwbeta.soe.ucsc.edu",
     "rr":   "https://genome.ucsc.edu",
 }
 
 # Session and userName must match this. Excludes anything that could be SQL- or
 # shell-special; permits the URL-encoding form ('%' and digits) used for sessions
 # whose human-readable names contain spaces (e.g. 'CNVs%20Clinical').
 SAFE_NAME = re.compile(r"^[A-Za-z0-9_%.\-]+$")
 
 
 def log(msg):
-    ts = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+    ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
     line = f"{ts} {getpass.getuser()} {msg}\n"
     LOGFILE.parent.mkdir(parents=True, exist_ok=True)
     with open(LOGFILE, "a") as f:
         f.write(line)
 
 
 def die(msg, code=1):
     print(f"ERROR: {msg}", file=sys.stderr)
     sys.exit(code)
 
 
 def confirm(prompt):
     while True:
         try:
             ans = input(prompt + " (yes/no): ").strip().lower()
         except EOFError:
             return False
         if ans == "yes":
+            print()
             return True
         if ans == "no":
+            print()
             return False
         print("  please type 'yes' or 'no'")
 
 
 def validate_name(name, label):
     if not name or not SAFE_NAME.match(name):
         die(f"unsafe {label}: {name!r} (must match {SAFE_NAME.pattern})")
 
 
 # ---------- hgsql shim ----------
 
 def hgsql_read(host_key, sql):
     """Run a SELECT.  Returns list[list[str]].  Caller is responsible for
     interpolating only validated values into sql."""
     host, db = HOSTS[host_key]
     cmd = ["hgsql", "-h", host, db, "-N", "-B", "-e", sql]
     p = subprocess.run(cmd, capture_output=True, text=True)
     if p.returncode != 0:
         die(f"hgsql read {host_key} failed: {p.stderr.strip()}")
     out = p.stdout
     if out.endswith("\n"):
         out = out[:-1]
     if not out:
         return []
     return [row.split("\t") for row in out.split("\n")]
 
 
 def hgsql_write(host_key, sql):
     """Pipe SQL via stdin.  Caller is responsible for safety (binary blobs go
     through hex_blob; identifiers through validate_name)."""
     host, db = HOSTS[host_key]
     cmd = ["hgsql", "-h", host, db]
     p = subprocess.run(cmd, input=sql, capture_output=True, text=True)
     if p.returncode != 0:
         die(f"hgsql write {host_key} failed: {p.stderr.strip()}\n--- sql ---\n"
             f"{sql[:500]}{'...' if len(sql) > 500 else ''}")
     return p.stdout
 
 
 def hex_blob(s):
     """MySQL hex literal for a string -- binary-safe substitute for a quoted value."""
     h = s.encode("utf-8").hex()
     if not h:
         return "''"
     return f"_binary 0x{h}"
 
 
 # ---------- RTS domain ----------
 
 def load_verboten():
     pats = []
     for line in VERBOTEN.read_text().splitlines():
         line = line.strip()
         if line and not line.startswith("#"):
             pats.append(re.compile(line))
     return pats
 
 
 def split_pairs(contents):
     return [p for p in contents.split("&") if p]
 
 
 def join_pairs(pairs):
     return "&".join(pairs)
 
 
 def scrub(pairs, verboten):
     out = []
     for p in pairs:
         key = p.split("=", 1)[0]
         if any(rx.match(key) for rx in verboten):
             continue
         out.append(p)
     return sorted(set(out))
 
 
 def find_db_for_session(session):
     """Return the assembly db for a session by scanning recTrackSets.<db>.tab."""
     for tab in sorted(INC_DIR.glob("recTrackSets.*.tab")):
         m = re.match(r"recTrackSets\.([A-Za-z0-9]+)\.tab$", tab.name)
         if not m:
             continue
         db = m.group(1)
         for line in tab.read_text().splitlines():
             if line.startswith("#") or not line.strip():
                 continue
             cols = line.split("\t")
             if len(cols) >= 3 and cols[2] == session:
                 return db
     return None
 
 
 def find_db_in_tree(session):
     """Fallback: scan rts/<db>/<session> for an existing dump file."""
     for d in sorted(RTS_DIR.iterdir()):
         if d.is_dir() and (d / session).exists():
             if SAFE_NAME.match(d.name):
                 return d.name
     return None
 
 
 def db_from_pairs(pairs):
     for p in pairs:
         if p.startswith("db="):
             return p[3:]
     return None
 
 
 def kent_path(db, session):
     return RTS_DIR / db / session
 
 
 def write_kent_file(db, session, pairs):
     path = kent_path(db, session)
     path.parent.mkdir(parents=True, exist_ok=True)
     path.write_text("\n".join(pairs) + "\n")
     return path
 
 
 def read_kent_file(db, session):
     path = kent_path(db, session)
     if not path.exists():
         die(f"kent tree file missing: {path} -- run `rtsUpdate fetch` first")
     pairs = [ln.rstrip() for ln in path.read_text().splitlines() if ln.strip()]
     return pairs
 
 
 def fetch_session_row(host_key, user, session):
     """Return (contents, settings, exists)."""
     validate_name(user, "userName")
     validate_name(session, "sessionName")
     sql = (f"SELECT contents, settings FROM namedSessionDb "
            f"WHERE userName='{user}' AND sessionName='{session}'")
     rows = hgsql_read(host_key, sql)
     if not rows:
         return "", "", False
     row = rows[0]
     contents = row[0] if len(row) > 0 else ""
     settings = row[1] if len(row) > 1 else ""
     return contents, settings, True
 
 
 def show_pair_diff(label_a, pairs_a, label_b, pairs_b, verbose):
     """Symmetric diff between two pair lists."""
     set_a = set(pairs_a)
     set_b = set(pairs_b)
     only_a = sorted(set_a - set_b)
     only_b = sorted(set_b - set_a)
     print(f"  - {len(only_a)} vars in [{label_a}] not in [{label_b}]")
     print(f"  + {len(only_b)} vars in [{label_b}] not in [{label_a}]")
     if verbose:
         for p in only_a:
             print(f"    -{p}")
         for p in only_b:
             print(f"    +{p}")
 
 
 def show_git_diff_for(path):
     """Run `git -C kent diff -- <path>`, after `git add -N` so new files show."""
     rel = str(path)
     try:
         subprocess.run(["git", "-C", str(HOME / "kent"), "add", "-N", rel],
                        check=False, capture_output=True)
     except FileNotFoundError:
         return
     print("\n--- git diff (kent tree) ---")
     subprocess.run(["git", "-C", str(HOME / "kent"), "--no-pager",
                     "diff", "--no-color", "--", rel],
                    check=False)
 
 
 def apply_session_write(host_key, session, contents, dry_run):
     """Insert or update View/<session> on host_key.  Returns SQL that was run
     (or would be run, if dry_run)."""
     validate_name(session, "sessionName")
     _, _, exists = fetch_session_row(host_key, TARGET_USER, session)
     if exists:
         sql = (f"UPDATE namedSessionDb "
                f"SET contents = {hex_blob(contents)}, lastUse = NOW() "
                f"WHERE userName='{TARGET_USER}' AND sessionName='{session}';\n")
         op = "UPDATE"
     else:
         sql = (f"INSERT INTO namedSessionDb "
                f"(userName, sessionName, contents, shared, firstUse, lastUse, useCount, settings) "
                f"VALUES ('{TARGET_USER}', '{session}', {hex_blob(contents)}, "
                f"1, NOW(), NOW(), 0, '');\n")
         op = "INSERT"
     if dry_run:
         print(f"  [dry-run] would {op} on {host_key}")
         return sql
     hgsql_write(host_key, sql)
     print(f"  {op} applied on {host_key}")
     return sql
 
 
 # ---------- subcommands ----------
 
 def cmd_fetch(args):
     verboten = load_verboten()
 
     if args.src_url:
         u = urlparse(args.src_url)
         m = re.match(r"^/s/([^/]+)/([^/]+)/?$", u.path)
         if not m:
             die(f"bad --src-url; expected .../s/<user>/<sessionName>: {args.src_url}")
         src_user, src_session = m.group(1), m.group(2)
     else:
         if not (args.src_user and args.src_session):
             die("must provide --src-url OR (--src-user AND --src-session)")
         src_user, src_session = args.src_user, args.src_session
 
     target = args.target_session
     validate_name(src_user, "src userName")
     validate_name(src_session, "src sessionName")
     validate_name(target, "target sessionName")
 
     src_host = args.src_host
     src_h, src_db = HOSTS[src_host]
     print(f"Fetching {src_user}/{src_session} from {src_host} ({src_h}:{src_db})...")
 
     contents, _, exists = fetch_session_row(src_host, src_user, src_session)
     if not exists:
         die(f"source session {src_user}/{src_session} not found on {src_host}")
     pairs = split_pairs(contents)
     print(f"  fetched {len(pairs)} cart variables ({len(contents)} bytes)")
 
     scrubbed = scrub(pairs, verboten)
     dropped = len(pairs) - len(scrubbed)
     print(f"  scrubbed: dropped {dropped} verboten/duplicate vars, kept {len(scrubbed)}")
 
     db = find_db_for_session(target)
     if db is None:
         if not args.allow_new:
             die(f"target session {target!r} not found in any recTrackSets.<db>.tab\n"
                 f"  Add a row to recTrackSets.<db>.tab first, OR re-run with --allow-new.")
         db = db_from_pairs(scrubbed)
         if not db:
             die("--allow-new given but cannot determine assembly: no db= cart var in source")
         validate_name(db, "db (from cart contents)")
         print(f"  --allow-new: assembly determined from cart contents: {db}")
     else:
         print(f"  assembly resolved from recTrackSets.{db}.tab: {db}")
 
     path = write_kent_file(db, target, scrubbed)
     print(f"  wrote {path}")
     show_git_diff_for(path)
 
     print(f"\nReading current dev row (View/{target} on hgcentraltest)...")
     dev_contents, _, dev_exists = fetch_session_row("dev", TARGET_USER, target)
     if dev_exists:
         dev_pairs = sorted(set(split_pairs(dev_contents)))
         print(f"  dev currently: {len(dev_pairs)} cart vars")
         print("\n--- diff: dev vs new (kent tree) ---")
         show_pair_diff("dev", dev_pairs, "tree", scrubbed, args.verbose)
     else:
         print(f"  (no existing row on dev -- INSERT will create it)")
 
     if not args.commit:
         print("\n--dry-run-- (no --commit flag); nothing written to dev. Re-run with --commit to apply.")
         return 0
 
     if not confirm(f"\nApply this update to dev (View/{target} on hgcentraltest)?"):
         print("Aborted.")
         return 1
 
     new_contents = join_pairs(scrubbed)
     apply_session_write("dev", target, new_contents, dry_run=False)
 
     log(f"fetch src={src_host}:{src_user}/{src_session} target={target} db={db} dev=updated")
 
     print()
     print("Dev updated. Validate at:")
     print(f"  https://hgwdev-{getpass.getuser()}.gi.ucsc.edu/cgi-bin/hgTracks?"
           f"hgS_doOtherUser=submit&hgS_otherUserName=View&hgS_otherUserSessionName={target}")
     print(f"  https://hgwdev.gi.ucsc.edu/cgi-bin/hgTracks?"
           f"hgS_doOtherUser=submit&hgS_otherUserName=View&hgS_otherUserSessionName={target}")
     print()
     print(f"Next:  rtsUpdate push --target-session {target} --to beta --commit")
     return 0
 
 
 def cmd_push(args):
     target = args.target_session
     dest = args.to
     validate_name(target, "target sessionName")
     if dest not in ("beta", "rr"):
         die(f"--to must be 'beta' or 'rr' (got {dest!r})")
 
     db = find_db_for_session(target) or find_db_in_tree(target)
     if db is None:
         die(f"cannot find {target!r} in recTrackSets.<db>.tab nor in rts/<db>/")
 
     pairs = read_kent_file(db, target)
     if not pairs:
         die(f"kent tree file is empty: {kent_path(db, target)}")
     new_contents = join_pairs(pairs)
 
     dest_h, dest_db = HOSTS[dest]
     print(f"\nReading current row from {dest} ({dest_h}:{dest_db})...")
     dest_contents, dest_settings, dest_exists = fetch_session_row(dest, TARGET_USER, target)
     if not dest_exists and not args.allow_create:
         die(f"View/{target} does not exist on {dest}; pass --allow-create to INSERT")
 
-    ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
+    ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%SZ")
     bdir = pathlib.Path(tempfile.gettempdir()) / f"rtsUpdate-{ts}" / dest
     bdir.mkdir(parents=True, exist_ok=True)
     bfile = bdir / f"{target}.tsv"
     if dest_exists:
         bfile.write_text(f"{TARGET_USER}\t{target}\t{dest_contents}\t{dest_settings}\n")
         print(f"  pre-write backup: {bfile}")
     else:
         print(f"  no existing row on {dest} -- nothing to back up")
 
     dest_pairs = sorted(set(split_pairs(dest_contents))) if dest_exists else []
     print(f"\n--- diff: kent tree ({db}/{target}) vs {dest} ---")
     show_pair_diff(dest, dest_pairs, "tree", pairs, args.verbose)
 
     if dest == "rr":
         if not args.i_confirm_rr:
             die("RR push requires --i-confirm-rr in addition to interactive yes")
         print()
         print("!" * 64)
         print(" PRODUCTION DATABASE: genome-centdb / hgcentral")
         print(f" This change will be visible at https://genome.ucsc.edu/s/View/{target}")
         print(" within seconds.  Pausing 5 seconds before prompt.")
         print("!" * 64)
         time.sleep(5)
 
     if not args.commit:
         print(f"\n--dry-run-- (no --commit flag); nothing written to {dest}.")
         return 0
 
     if not confirm(f"\nApply this update to {dest} (View/{target})?"):
         print("Aborted.")
         return 1
 
     print("  drift check: re-reading dest row...")
     drift_contents, _, drift_exists = fetch_session_row(dest, TARGET_USER, target)
     if drift_exists != dest_exists or drift_contents != dest_contents:
         die(f"DRIFT: {dest} row changed since diff was shown. "
             f"Aborting; backup at {bfile if dest_exists else '(none)'}.\n"
             f"Re-run the command to see the new diff.")
 
     apply_session_write(dest, target, new_contents, dry_run=False)
     log(f"push target={target} dest={dest} backup={bfile if dest_exists else 'none'}")
 
     print()
     if dest == "beta":
         print(f"Beta updated. Validate at:")
         print(f"  https://hgwbeta.soe.ucsc.edu/cgi-bin/hgTracks?"
               f"hgS_doOtherUser=submit&hgS_otherUserName=View&hgS_otherUserSessionName={target}")
         print()
         print(f"Next:  rtsUpdate push --target-session {target} --to rr --i-confirm-rr --commit")
     else:
         print(f"RR updated. Validate at:")
         print(f"  https://genome.ucsc.edu/s/View/{target}")
         print()
         print("Reminders:")
         print("  1. Commit and push your kent tree changes:")
         print(f"       cd ~/kent/src/hg/utils/rts && git add {db}/{target}")
         print(f"       (also any recTrackSets.<db>.tab updates)")
         print(f"       git commit -m 'Updating RTS {target}. refs #32768'")
         print("       git push")
         print("  2. Coordinate with cluster-admin to mirror the View user")
         print("     contents to euro/asia (out of scope for this script).")
     return 0
 
 
 def cmd_diff(args):
     target = args.target_session
     validate_name(target, "target sessionName")
     db = find_db_for_session(target) or find_db_in_tree(target)
     if db is None:
         die(f"cannot find {target!r} in recTrackSets.<db>.tab nor in rts/<db>/")
     print(f"target: View/{target}  (db={db})")
 
     tree_path = kent_path(db, target)
     if tree_path.exists():
         tree_pairs = read_kent_file(db, target)
         print(f"\n[tree] {tree_path}: {len(tree_pairs)} vars")
     else:
         tree_pairs = []
         print(f"\n[tree] {tree_path}: (missing)")
 
     for host_key in ("dev", "beta", "rr"):
         host, db_name = HOSTS[host_key]
         c, _, exists = fetch_session_row(host_key, TARGET_USER, target)
         host_pairs = sorted(set(split_pairs(c))) if exists else []
         if not exists:
             print(f"\n[{host_key}] {host}:{db_name} (row missing)")
             continue
         print(f"\n[{host_key}] {host}:{db_name}: {len(host_pairs)} vars")
         show_pair_diff("tree", tree_pairs, host_key, host_pairs, args.verbose)
     return 0
 
 
 # ---------- main ----------
 
 def build_parser():
     p = argparse.ArgumentParser(
         prog="rtsUpdate",
         description="Update Recommended Track Set sessions (userName='View') across "
                     "hgwdev -> hgwbeta -> genome-centdb.  Refs #32768, #34907.")
     sub = p.add_subparsers(dest="cmd", required=True)
 
     pf = sub.add_parser("fetch",
                         help="fetch source session, scrub, write kent tree, update dev")
     pf.add_argument("--src-url",
                     help="full URL like https://genome.ucsc.edu/s/<user>/<session>")
     pf.add_argument("--src-user",
                     help="source userName (alternative to --src-url)")
     pf.add_argument("--src-session",
                     help="source sessionName (alternative to --src-url)")
     pf.add_argument("--src-host", choices=("rr", "dev", "beta"), default="rr",
                     help="where to read the source from (default: rr)")
     pf.add_argument("--target-session", required=True,
                     help="target sessionName under userName='View'")
     pf.add_argument("--commit", action="store_true",
                     help="actually write to dev (default: dry-run)")
     pf.add_argument("--allow-new", action="store_true",
                     help="allow target session not yet in recTrackSets.<db>.tab")
     pf.add_argument("--verbose", "-v", action="store_true",
                     help="show every var in diffs")
 
     pp = sub.add_parser("push", help="push kent tree contents to beta or rr")
     pp.add_argument("--target-session", required=True)
     pp.add_argument("--to", required=True, choices=("beta", "rr"))
     pp.add_argument("--commit", action="store_true",
                     help="actually write (default: dry-run)")
     pp.add_argument("--allow-create", action="store_true",
                     help="INSERT row if missing on dest")
     pp.add_argument("--i-confirm-rr", action="store_true",
                     help="required in addition to interactive yes for --to rr")
     pp.add_argument("--verbose", "-v", action="store_true")
 
     pd = sub.add_parser("diff",
                         help="read-only: compare kent tree vs dev/beta/rr")
     pd.add_argument("--target-session", required=True)
     pd.add_argument("--verbose", "-v", action="store_true")
 
     return p
 
 
 def main():
+    # Line-buffer stdout so prints interleave correctly with subprocess output
+    # (git diff, hgsql) that writes directly via the OS.
+    try:
+        sys.stdout.reconfigure(line_buffering=True)
+    except AttributeError:
+        pass
     args = build_parser().parse_args()
     if args.cmd == "fetch":
         return cmd_fetch(args)
     if args.cmd == "push":
         return cmd_push(args)
     if args.cmd == "diff":
         return cmd_diff(args)
     return 0
 
 
 if __name__ == "__main__":
     sys.exit(main() or 0)