1b38723574b05053668a698a9856652a077fce2f lrnassar Mon May 4 14:48:55 2026 -0700 First draft of rtsUpdate, a utility to streamline pushing Recommended Track Set sessions from a curator session through dev -> beta -> RR. Replaces the manual download-and-resave-on-every-mirror process. Each step is gated by an interactive confirmation; RR push has an additional --i-confirm-rr flag and a 5-second pause. Cart contents written via MySQL _binary 0xHEX literals for binary safety; sessionName regex-validated; WHERE clauses are exact-equality only. The kent tree (rts//) is the canonical backup, tracked by git. Not yet exercised against a live update. refs #32768 diff --git src/hg/utils/rts/rtsUpdate src/hg/utils/rts/rtsUpdate new file mode 100755 index 00000000000..4c634089494 --- /dev/null +++ src/hg/utils/rts/rtsUpdate @@ -0,0 +1,520 @@ +#!/usr/bin/env python3 +# rtsUpdate -- propagate Recommended Track Set sessions from a curator +# session into the View user across hgwdev -> hgwbeta -> genome-centdb. +# Refs #32768, #34907. + +import argparse +import datetime +import getpass +import pathlib +import re +import subprocess +import sys +import tempfile +import time +from urllib.parse import urlparse + +HOME = pathlib.Path.home() +RTS_DIR = HOME / "kent/src/hg/utils/rts" +INC_DIR = HOME / "kent/src/hg/htdocs/inc" +LOGFILE = RTS_DIR / "rtsUpdate.log" +VERBOTEN = RTS_DIR / "verboten.lst" + +TARGET_USER = "View" + +HOSTS = { + "dev": ("hgwdev", "hgcentraltest"), + "beta": ("hgwbeta", "hgcentralbeta"), + "rr": ("genome-centdb", "hgcentral"), +} + +VALIDATE_URL = { + "dev": "https://hgwdev.gi.ucsc.edu", + "beta": "https://hgwbeta.soe.ucsc.edu", + "rr": "https://genome.ucsc.edu", +} + +# Session and userName must match this. Excludes anything that could be SQL- or +# shell-special; permits the URL-encoding form ('%' and digits) used for sessions +# whose human-readable names contain spaces (e.g. 'CNVs%20Clinical'). +SAFE_NAME = re.compile(r"^[A-Za-z0-9_%.\-]+$") + + +def log(msg): + ts = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + line = f"{ts} {getpass.getuser()} {msg}\n" + LOGFILE.parent.mkdir(parents=True, exist_ok=True) + with open(LOGFILE, "a") as f: + f.write(line) + + +def die(msg, code=1): + print(f"ERROR: {msg}", file=sys.stderr) + sys.exit(code) + + +def confirm(prompt): + while True: + try: + ans = input(prompt + " (yes/no): ").strip().lower() + except EOFError: + return False + if ans == "yes": + return True + if ans == "no": + return False + print(" please type 'yes' or 'no'") + + +def validate_name(name, label): + if not name or not SAFE_NAME.match(name): + die(f"unsafe {label}: {name!r} (must match {SAFE_NAME.pattern})") + + +# ---------- hgsql shim ---------- + +def hgsql_read(host_key, sql): + """Run a SELECT. Returns list[list[str]]. Caller is responsible for + interpolating only validated values into sql.""" + host, db = HOSTS[host_key] + cmd = ["hgsql", "-h", host, db, "-N", "-B", "-e", sql] + p = subprocess.run(cmd, capture_output=True, text=True) + if p.returncode != 0: + die(f"hgsql read {host_key} failed: {p.stderr.strip()}") + out = p.stdout + if out.endswith("\n"): + out = out[:-1] + if not out: + return [] + return [row.split("\t") for row in out.split("\n")] + + +def hgsql_write(host_key, sql): + """Pipe SQL via stdin. Caller is responsible for safety (binary blobs go + through hex_blob; identifiers through validate_name).""" + host, db = HOSTS[host_key] + cmd = ["hgsql", "-h", host, db] + p = subprocess.run(cmd, input=sql, capture_output=True, text=True) + if p.returncode != 0: + die(f"hgsql write {host_key} failed: {p.stderr.strip()}\n--- sql ---\n" + f"{sql[:500]}{'...' if len(sql) > 500 else ''}") + return p.stdout + + +def hex_blob(s): + """MySQL hex literal for a string -- binary-safe substitute for a quoted value.""" + h = s.encode("utf-8").hex() + if not h: + return "''" + return f"_binary 0x{h}" + + +# ---------- RTS domain ---------- + +def load_verboten(): + pats = [] + for line in VERBOTEN.read_text().splitlines(): + line = line.strip() + if line and not line.startswith("#"): + pats.append(re.compile(line)) + return pats + + +def split_pairs(contents): + return [p for p in contents.split("&") if p] + + +def join_pairs(pairs): + return "&".join(pairs) + + +def scrub(pairs, verboten): + out = [] + for p in pairs: + key = p.split("=", 1)[0] + if any(rx.match(key) for rx in verboten): + continue + out.append(p) + return sorted(set(out)) + + +def find_db_for_session(session): + """Return the assembly db for a session by scanning recTrackSets..tab.""" + for tab in sorted(INC_DIR.glob("recTrackSets.*.tab")): + m = re.match(r"recTrackSets\.([A-Za-z0-9]+)\.tab$", tab.name) + if not m: + continue + db = m.group(1) + for line in tab.read_text().splitlines(): + if line.startswith("#") or not line.strip(): + continue + cols = line.split("\t") + if len(cols) >= 3 and cols[2] == session: + return db + return None + + +def find_db_in_tree(session): + """Fallback: scan rts// for an existing dump file.""" + for d in sorted(RTS_DIR.iterdir()): + if d.is_dir() and (d / session).exists(): + if SAFE_NAME.match(d.name): + return d.name + return None + + +def db_from_pairs(pairs): + for p in pairs: + if p.startswith("db="): + return p[3:] + return None + + +def kent_path(db, session): + return RTS_DIR / db / session + + +def write_kent_file(db, session, pairs): + path = kent_path(db, session) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(pairs) + "\n") + return path + + +def read_kent_file(db, session): + path = kent_path(db, session) + if not path.exists(): + die(f"kent tree file missing: {path} -- run `rtsUpdate fetch` first") + pairs = [ln.rstrip() for ln in path.read_text().splitlines() if ln.strip()] + return pairs + + +def fetch_session_row(host_key, user, session): + """Return (contents, settings, exists).""" + validate_name(user, "userName") + validate_name(session, "sessionName") + sql = (f"SELECT contents, settings FROM namedSessionDb " + f"WHERE userName='{user}' AND sessionName='{session}'") + rows = hgsql_read(host_key, sql) + if not rows: + return "", "", False + row = rows[0] + contents = row[0] if len(row) > 0 else "" + settings = row[1] if len(row) > 1 else "" + return contents, settings, True + + +def show_pair_diff(label_a, pairs_a, label_b, pairs_b, verbose): + """Symmetric diff between two pair lists.""" + set_a = set(pairs_a) + set_b = set(pairs_b) + only_a = sorted(set_a - set_b) + only_b = sorted(set_b - set_a) + print(f" - {len(only_a)} vars in [{label_a}] not in [{label_b}]") + print(f" + {len(only_b)} vars in [{label_b}] not in [{label_a}]") + if verbose: + for p in only_a: + print(f" -{p}") + for p in only_b: + print(f" +{p}") + + +def show_git_diff_for(path): + """Run `git -C kent diff -- `, after `git add -N` so new files show.""" + rel = str(path) + try: + subprocess.run(["git", "-C", str(HOME / "kent"), "add", "-N", rel], + check=False, capture_output=True) + except FileNotFoundError: + return + print("\n--- git diff (kent tree) ---") + subprocess.run(["git", "-C", str(HOME / "kent"), "--no-pager", + "diff", "--no-color", "--", rel], + check=False) + + +def apply_session_write(host_key, session, contents, dry_run): + """Insert or update View/ on host_key. Returns SQL that was run + (or would be run, if dry_run).""" + validate_name(session, "sessionName") + _, _, exists = fetch_session_row(host_key, TARGET_USER, session) + if exists: + sql = (f"UPDATE namedSessionDb " + f"SET contents = {hex_blob(contents)}, lastUse = NOW() " + f"WHERE userName='{TARGET_USER}' AND sessionName='{session}';\n") + op = "UPDATE" + else: + sql = (f"INSERT INTO namedSessionDb " + f"(userName, sessionName, contents, shared, firstUse, lastUse, useCount, settings) " + f"VALUES ('{TARGET_USER}', '{session}', {hex_blob(contents)}, " + f"1, NOW(), NOW(), 0, '');\n") + op = "INSERT" + if dry_run: + print(f" [dry-run] would {op} on {host_key}") + return sql + hgsql_write(host_key, sql) + print(f" {op} applied on {host_key}") + return sql + + +# ---------- subcommands ---------- + +def cmd_fetch(args): + verboten = load_verboten() + + if args.src_url: + u = urlparse(args.src_url) + m = re.match(r"^/s/([^/]+)/([^/]+)/?$", u.path) + if not m: + die(f"bad --src-url; expected .../s//: {args.src_url}") + src_user, src_session = m.group(1), m.group(2) + else: + if not (args.src_user and args.src_session): + die("must provide --src-url OR (--src-user AND --src-session)") + src_user, src_session = args.src_user, args.src_session + + target = args.target_session + validate_name(src_user, "src userName") + validate_name(src_session, "src sessionName") + validate_name(target, "target sessionName") + + src_host = args.src_host + src_h, src_db = HOSTS[src_host] + print(f"Fetching {src_user}/{src_session} from {src_host} ({src_h}:{src_db})...") + + contents, _, exists = fetch_session_row(src_host, src_user, src_session) + if not exists: + die(f"source session {src_user}/{src_session} not found on {src_host}") + pairs = split_pairs(contents) + print(f" fetched {len(pairs)} cart variables ({len(contents)} bytes)") + + scrubbed = scrub(pairs, verboten) + dropped = len(pairs) - len(scrubbed) + print(f" scrubbed: dropped {dropped} verboten/duplicate vars, kept {len(scrubbed)}") + + db = find_db_for_session(target) + if db is None: + if not args.allow_new: + die(f"target session {target!r} not found in any recTrackSets..tab\n" + f" Add a row to recTrackSets..tab first, OR re-run with --allow-new.") + db = db_from_pairs(scrubbed) + if not db: + die("--allow-new given but cannot determine assembly: no db= cart var in source") + validate_name(db, "db (from cart contents)") + print(f" --allow-new: assembly determined from cart contents: {db}") + else: + print(f" assembly resolved from recTrackSets.{db}.tab: {db}") + + path = write_kent_file(db, target, scrubbed) + print(f" wrote {path}") + show_git_diff_for(path) + + print(f"\nReading current dev row (View/{target} on hgcentraltest)...") + dev_contents, _, dev_exists = fetch_session_row("dev", TARGET_USER, target) + if dev_exists: + dev_pairs = sorted(set(split_pairs(dev_contents))) + print(f" dev currently: {len(dev_pairs)} cart vars") + print("\n--- diff: dev vs new (kent tree) ---") + show_pair_diff("dev", dev_pairs, "tree", scrubbed, args.verbose) + else: + print(f" (no existing row on dev -- INSERT will create it)") + + if not args.commit: + print("\n--dry-run-- (no --commit flag); nothing written to dev. Re-run with --commit to apply.") + return 0 + + if not confirm(f"\nApply this update to dev (View/{target} on hgcentraltest)?"): + print("Aborted.") + return 1 + + new_contents = join_pairs(scrubbed) + apply_session_write("dev", target, new_contents, dry_run=False) + + log(f"fetch src={src_host}:{src_user}/{src_session} target={target} db={db} dev=updated") + + print() + print("Dev updated. Validate at:") + print(f" https://hgwdev-{getpass.getuser()}.gi.ucsc.edu/cgi-bin/hgTracks?" + f"hgS_doOtherUser=submit&hgS_otherUserName=View&hgS_otherUserSessionName={target}") + print(f" https://hgwdev.gi.ucsc.edu/cgi-bin/hgTracks?" + f"hgS_doOtherUser=submit&hgS_otherUserName=View&hgS_otherUserSessionName={target}") + print() + print(f"Next: rtsUpdate push --target-session {target} --to beta --commit") + return 0 + + +def cmd_push(args): + target = args.target_session + dest = args.to + validate_name(target, "target sessionName") + if dest not in ("beta", "rr"): + die(f"--to must be 'beta' or 'rr' (got {dest!r})") + + db = find_db_for_session(target) or find_db_in_tree(target) + if db is None: + die(f"cannot find {target!r} in recTrackSets..tab nor in rts//") + + pairs = read_kent_file(db, target) + if not pairs: + die(f"kent tree file is empty: {kent_path(db, target)}") + new_contents = join_pairs(pairs) + + dest_h, dest_db = HOSTS[dest] + print(f"\nReading current row from {dest} ({dest_h}:{dest_db})...") + dest_contents, dest_settings, dest_exists = fetch_session_row(dest, TARGET_USER, target) + if not dest_exists and not args.allow_create: + die(f"View/{target} does not exist on {dest}; pass --allow-create to INSERT") + + ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ") + bdir = pathlib.Path(tempfile.gettempdir()) / f"rtsUpdate-{ts}" / dest + bdir.mkdir(parents=True, exist_ok=True) + bfile = bdir / f"{target}.tsv" + if dest_exists: + bfile.write_text(f"{TARGET_USER}\t{target}\t{dest_contents}\t{dest_settings}\n") + print(f" pre-write backup: {bfile}") + else: + print(f" no existing row on {dest} -- nothing to back up") + + dest_pairs = sorted(set(split_pairs(dest_contents))) if dest_exists else [] + print(f"\n--- diff: kent tree ({db}/{target}) vs {dest} ---") + show_pair_diff(dest, dest_pairs, "tree", pairs, args.verbose) + + if dest == "rr": + if not args.i_confirm_rr: + die("RR push requires --i-confirm-rr in addition to interactive yes") + print() + print("!" * 64) + print(" PRODUCTION DATABASE: genome-centdb / hgcentral") + print(f" This change will be visible at https://genome.ucsc.edu/s/View/{target}") + print(" within seconds. Pausing 5 seconds before prompt.") + print("!" * 64) + time.sleep(5) + + if not args.commit: + print(f"\n--dry-run-- (no --commit flag); nothing written to {dest}.") + return 0 + + if not confirm(f"\nApply this update to {dest} (View/{target})?"): + print("Aborted.") + return 1 + + print(" drift check: re-reading dest row...") + drift_contents, _, drift_exists = fetch_session_row(dest, TARGET_USER, target) + if drift_exists != dest_exists or drift_contents != dest_contents: + die(f"DRIFT: {dest} row changed since diff was shown. " + f"Aborting; backup at {bfile if dest_exists else '(none)'}.\n" + f"Re-run the command to see the new diff.") + + apply_session_write(dest, target, new_contents, dry_run=False) + log(f"push target={target} dest={dest} backup={bfile if dest_exists else 'none'}") + + print() + if dest == "beta": + print(f"Beta updated. Validate at:") + print(f" https://hgwbeta.soe.ucsc.edu/cgi-bin/hgTracks?" + f"hgS_doOtherUser=submit&hgS_otherUserName=View&hgS_otherUserSessionName={target}") + print() + print(f"Next: rtsUpdate push --target-session {target} --to rr --i-confirm-rr --commit") + else: + print(f"RR updated. Validate at:") + print(f" https://genome.ucsc.edu/s/View/{target}") + print() + print("Reminders:") + print(" 1. Commit and push your kent tree changes:") + print(f" cd ~/kent/src/hg/utils/rts && git add {db}/{target}") + print(f" (also any recTrackSets..tab updates)") + print(f" git commit -m 'Updating RTS {target}. refs #32768'") + print(" git push") + print(" 2. Coordinate with cluster-admin to mirror the View user") + print(" contents to euro/asia (out of scope for this script).") + return 0 + + +def cmd_diff(args): + target = args.target_session + validate_name(target, "target sessionName") + db = find_db_for_session(target) or find_db_in_tree(target) + if db is None: + die(f"cannot find {target!r} in recTrackSets..tab nor in rts//") + print(f"target: View/{target} (db={db})") + + tree_path = kent_path(db, target) + if tree_path.exists(): + tree_pairs = read_kent_file(db, target) + print(f"\n[tree] {tree_path}: {len(tree_pairs)} vars") + else: + tree_pairs = [] + print(f"\n[tree] {tree_path}: (missing)") + + for host_key in ("dev", "beta", "rr"): + host, db_name = HOSTS[host_key] + c, _, exists = fetch_session_row(host_key, TARGET_USER, target) + host_pairs = sorted(set(split_pairs(c))) if exists else [] + if not exists: + print(f"\n[{host_key}] {host}:{db_name} (row missing)") + continue + print(f"\n[{host_key}] {host}:{db_name}: {len(host_pairs)} vars") + show_pair_diff("tree", tree_pairs, host_key, host_pairs, args.verbose) + return 0 + + +# ---------- main ---------- + +def build_parser(): + p = argparse.ArgumentParser( + prog="rtsUpdate", + description="Update Recommended Track Set sessions (userName='View') across " + "hgwdev -> hgwbeta -> genome-centdb. Refs #32768, #34907.") + sub = p.add_subparsers(dest="cmd", required=True) + + pf = sub.add_parser("fetch", + help="fetch source session, scrub, write kent tree, update dev") + pf.add_argument("--src-url", + help="full URL like https://genome.ucsc.edu/s//") + pf.add_argument("--src-user", + help="source userName (alternative to --src-url)") + pf.add_argument("--src-session", + help="source sessionName (alternative to --src-url)") + pf.add_argument("--src-host", choices=("rr", "dev", "beta"), default="rr", + help="where to read the source from (default: rr)") + pf.add_argument("--target-session", required=True, + help="target sessionName under userName='View'") + pf.add_argument("--commit", action="store_true", + help="actually write to dev (default: dry-run)") + pf.add_argument("--allow-new", action="store_true", + help="allow target session not yet in recTrackSets..tab") + pf.add_argument("--verbose", "-v", action="store_true", + help="show every var in diffs") + + pp = sub.add_parser("push", help="push kent tree contents to beta or rr") + pp.add_argument("--target-session", required=True) + pp.add_argument("--to", required=True, choices=("beta", "rr")) + pp.add_argument("--commit", action="store_true", + help="actually write (default: dry-run)") + pp.add_argument("--allow-create", action="store_true", + help="INSERT row if missing on dest") + pp.add_argument("--i-confirm-rr", action="store_true", + help="required in addition to interactive yes for --to rr") + pp.add_argument("--verbose", "-v", action="store_true") + + pd = sub.add_parser("diff", + help="read-only: compare kent tree vs dev/beta/rr") + pd.add_argument("--target-session", required=True) + pd.add_argument("--verbose", "-v", action="store_true") + + return p + + +def main(): + args = build_parser().parse_args() + if args.cmd == "fetch": + return cmd_fetch(args) + if args.cmd == "push": + return cmd_push(args) + if args.cmd == "diff": + return cmd_diff(args) + return 0 + + +if __name__ == "__main__": + sys.exit(main() or 0)