348d17775ccde5f178d2b87915f16beda576f8fa hiram Wed May 27 15:29:13 2026 -0700 add toggle buttons to hide/show the requestType assembly or liftRequest and allow viewing either hgwdev or the RR table refs #31811 diff --git src/hg/utils/otto/userRequests/ottoRequestView.cgi src/hg/utils/otto/userRequests/ottoRequestView.cgi index dc7d51122da..9f15674425b 100644 --- src/hg/utils/otto/userRequests/ottoRequestView.cgi +++ src/hg/utils/otto/userRequests/ottoRequestView.cgi @@ -1,487 +1,579 @@ #!/usr/bin/env python3 """ottoRequestView.cgi - web view of hgcentraltest.ottoRequest. Read-only display of every row in the table, plus a per-row 'reset status' control that is the only write path exposed. Access is restricted to a single IP (UCSC VPN, 128.114.198.5). Any other REMOTE_ADDR gets a 403. """ import cgi import html import json import os import re import subprocess import sys import time import urllib.parse from datetime import datetime ALLOWED_IP = '128.114.198.5' -HGDB_CONF = '/usr/local/apache/cgi-bin/hg.conf' TRASH = '/data/apache/trash' -DB = 'hgcentraltest' TABLE = 'ottoRequest' +# Configuration will be set dynamically in main() +HGDB_CONF = None +DB = None +USE_PROFILE = None + # Galaxy queue status panel - snapshot is refreshed by ottoRequestWatch.sh # (cron, every 11 minutes), CGI just reads it. CACHE_PATH = '/data/apache/trash/ottoRequestGalaxyStatus.json' CACHE_TTL = 1800 # seconds; older than this -> show "stale" instead # featureBits coverage snapshot - append-only file maintained by # featureBitsSnapshot.py (cron, via ottoRequestWatch.sh). fb.*.txt # values are immutable once an alignment completes so no TTL is needed; # featureBitsPct() falls back to an NFS read on a snapshot miss. FB_SNAPSHOT_PATH = '/data/apache/trash/ottoRequestFeatureBitsPct.json' # from README.txt in this directory STATUS_NAMES = { 0: 'received by API', 1: 'acknowledged, email sent', 2: 'galaxy job started', 3: 'galaxy done, download started', 4: 'downloaded, track files made', 5: 'symlinks ready, awaiting push', 6: 'push complete', 7: 'ERROR', 8: 'COMPLETE (final email sent)', } COLS = ['id', 'requestType', 'fromDb', 'toDb', 'email', 'comment', 'requestTime', 'status', 'buildDir', 'completeTime'] # featureBits coverage lookup roots HIVE_GENOMES = '/hive/data/genomes' ASMHUB_ROOT = HIVE_GENOMES + '/asmHubs' # in-process caches; one CGI invocation only, but rows reuse same accessions _buildDirCache = {} _fbPctCache = {} _genarkAsmName = {} # populated up-front by loadGenarkNames() _fbSnapshot = {} # populated up-front by loadFeatureBitsSnapshot() def forbidden(msg): sys.stdout.write("Status: 403 Forbidden\r\n") sys.stdout.write("Content-Type: text/plain; charset=utf-8\r\n\r\n") sys.stdout.write(msg + "\n") sys.exit(0) def checkIp(): remote = os.environ.get('REMOTE_ADDR', '') if remote != ALLOWED_IP: forbidden(f"Access denied for {remote!r}; this page is restricted.") +def setDbConfig(use_otto=False): + """Set database configuration globals based on config parameter.""" + global HGDB_CONF, DB, USE_PROFILE + if use_otto: + HGDB_CONF = '/data/apache/cgi-bin/otto/.otto.conf' + DB = 'hgcentral' + USE_PROFILE = False + else: + HGDB_CONF = '/usr/local/apache/cgi-bin/hg.conf' + DB = 'hgcentraltest' + USE_PROFILE = True + + def unescapeMysql(s): """Reverse `hgsql -B` escaping (\\n, \\t, \\\\, \\0). One pass so \\\\n stays a literal backslash + 'n'.""" out, i, n = [], 0, len(s) while i < n: if s[i] == '\\' and i + 1 < n: c = s[i+1] if c == 'n': out.append('\n') elif c == 't': out.append('\t') elif c == '\\': out.append('\\') elif c == '0': out.append('\0') else: out.append(s[i:i+2]) i += 2 else: out.append(s[i]); i += 1 return ''.join(out) def hgsqlRun(sql): """Run sql via hgsql against DB. Returns (ok, stdout, stderr). Running under Apache the process has no ~/.hg.conf, so point hgsql at the cgi-bin hg.conf via HGDB_CONF.""" env = dict(os.environ) env['HGDB_CONF'] = HGDB_CONF env['HOME'] = TRASH - cmd = ['/cluster/bin/x86_64/hgsql', '-profile=central', DB, '-N', '-B', '-e', sql] + cmd = ['/cluster/bin/x86_64/hgsql'] + if USE_PROFILE: + cmd.append('-profile=central') + cmd.extend([DB, '-N', '-B', '-e', sql]) r = subprocess.run(cmd, capture_output=True, text=True, env=env) return (r.returncode == 0, r.stdout, r.stderr) def fetchRows(): sql = f"SELECT {','.join(COLS)} FROM {TABLE} ORDER BY id DESC" ok, out, err = hgsqlRun(sql) if not ok: raise RuntimeError(err.strip() or 'hgsql failed') rows = [] if out.strip(): for line in out.rstrip('\n').split('\n'): rows.append([unescapeMysql(f) for f in line.split('\t')]) return rows def doResetStatus(form): rid = form.getfirst('id', '') stat = form.getfirst('status', '') if not rid.isdigit(): return None, f"bad id: {rid!r}" if not stat.isdigit() or int(stat) not in STATUS_NAMES: return None, f"bad status: {stat!r}" sql = (f"UPDATE {TABLE} SET status = {int(stat)} " f"WHERE id = {int(rid)}") ok, _out, err = hgsqlRun(sql) if not ok: return None, err.strip() or 'hgsql update failed' return (f"id={rid} status set to {stat} " f"({STATUS_NAMES[int(stat)]})"), None def loadGenarkNames(accessions): """Populate _genarkAsmName: {gcAccession: asmName} for the given accessions in one bulk hgsql call against the genark table. Lets hubBuildDir() construct paths directly instead of NFS-listdir'ing /hive/data/genomes/asmHubs/...//// to discover the asmName suffix on each accession.""" if not accessions: return quoted = ",".join("'%s'" % a for a in sorted(accessions)) sql = (f"SELECT gcAccession, asmName FROM genark " f"WHERE gcAccession IN ({quoted});") ok, out, _err = hgsqlRun(sql) if not ok or not out.strip(): return for line in out.rstrip('\n').split('\n'): parts = line.split('\t') if len(parts) >= 2: _genarkAsmName[parts[0]] = parts[1] def hubBuildDir(acc): """Locate the hive build directory for a fromDb/toDb value. GenArk accession (GCA_/GCF_) -> asmHubs/{genbank,refseq}Build////_ asmName comes from _genarkAsmName, populated up-front by loadGenarkNames() from the genark table. UCSC native db (e.g. hg38) -> /hive/data/genomes/ Returns absolute path or None.""" if not acc: return None if acc in _buildDirCache: return _buildDirCache[acc] result = None if (acc.startswith('GCF_') or acc.startswith('GCA_')) and len(acc) >= 13: asmName = _genarkAsmName.get(acc) if asmName: src = acc[:3] sub = 'refseqBuild' if src == 'GCF' else 'genbankBuild' digits = acc[4:].split('.', 1)[0] if len(digits) >= 9: result = (f'{ASMHUB_ROOT}/{sub}/{src}/' f'{digits[0:3]}/{digits[3:6]}/{digits[6:9]}/' f'{acc}_{asmName}') else: candidate = f'{HIVE_GENOMES}/{acc}' if os.path.isdir(candidate): result = candidate _buildDirCache[acc] = result return result def loadFeatureBitsSnapshot(): """Populate _fbSnapshot from the JSON file written by featureBitsSnapshot.py via cron. Silent no-op if the file is missing or malformed - featureBitsPct() falls back to an NFS read on a snapshot miss, so the page still renders correctly.""" try: with open(FB_SNAPSHOT_PATH) as f: data = json.load(f) except (OSError, ValueError): return _fbSnapshot.update(data.get('pct') or {}) def featureBitsPct(srcAcc, qryAcc): """Return percentage from fb..chainLink.txt (% of srcAcc covered by chains to qryAcc), or '' if unavailable. Two-tier lookup: the precomputed cron snapshot first (pure dict lookup, no I/O); on miss falls back to the NFS file read so freshly-completed rows still show a value before the next cron tick promotes them.""" if not srcAcc or not qryAcc: return '' key = (srcAcc, qryAcc) if key in _fbPctCache: return _fbPctCache[key] snapKey = f'{srcAcc}\t{qryAcc}' if snapKey in _fbSnapshot: pct = _fbSnapshot[snapKey] _fbPctCache[key] = pct return pct bdir = hubBuildDir(srcAcc) pct = '' if bdir: # GenArk builds keep lastz under trackData/, UCSC native under bed/ sub = 'trackData' if '/asmHubs/' in bdir else 'bed' # chainLink.txt: first letter of query is capitalized # (matches the ${dstDb^} convention in installLinks). No-op for # GCA_*/GCF_* accessions; converts hg38 -> Hg38 for native dbs. QryAcc = qryAcc[:1].upper() + qryAcc[1:] path = (f'{bdir}/{sub}/lastz.{qryAcc}/' f'fb.{srcAcc}.chain{QryAcc}Link.txt') try: with open(path) as f: txt = f.read() m = re.search(r'\(([\d.]+)%\)', txt) if m: pct = m.group(1) + '%' except OSError: pass _fbPctCache[key] = pct return pct def elapsedStr(reqTime, doneTime): """Human-readable elapsed time between two MySQL datetimes. Empty string if either side is missing/NULL/unparseable.""" if not reqTime or not doneTime or reqTime == 'NULL' or doneTime == 'NULL': return '' try: t0 = datetime.strptime(reqTime, '%Y-%m-%d %H:%M:%S') t1 = datetime.strptime(doneTime, '%Y-%m-%d %H:%M:%S') except ValueError: return '' secs = int((t1 - t0).total_seconds()) if secs < 0: return '' d, secs = divmod(secs, 86400) h, secs = divmod(secs, 3600) m, s = divmod(secs, 60) if d: return f'{d}d {h}h {m}m' if h: return f'{h}h {m}m' if m: return f'{m}m {s}s' return f'{s}s' def loadGalaxyStatus(): """Return the Galaxy queue snapshot written by ottoRequestWatch.sh (which calls galaxyStatus.py from cron). Returns the parsed dict with an added 'stale' flag when the file is older than CACHE_TTL, or None if the file is missing/unreadable.""" try: mtime = os.path.getmtime(CACHE_PATH) with open(CACHE_PATH) as f: data = json.load(f) except (OSError, ValueError): return None data['stale'] = (time.time() - mtime) > CACHE_TTL return data -def renderPage(rows, info=None, error=None, galaxyStatus=None): +def renderPage(rows, info=None, error=None, galaxyStatus=None, use_otto=False): sys.stdout.write("Content-Type: text/html; charset=utf-8\r\n\r\n") out = sys.stdout.write + db_label = 'RR' if use_otto else 'hgwdev' out('\n\n') - out(f'{TABLE}\n') + out(f'{TABLE} ({db_label})\n') out('\n') - out(f'

{DB}.{TABLE}

\n') + db_label = 'RR' if use_otto else 'hgwdev' + out(f'

{DB}.{TABLE} ({db_label})

\n') if galaxyStatus: staleNote = ' [stale]' if galaxyStatus.get('stale') else '' out('
Galaxy queue: ' f'{galaxyStatus.get("running", "?")} running · ' f'{galaxyStatus.get("queued", "?")} queued · ' f'{galaxyStatus.get("new", "?")} new ' f'(as of {html.escape(galaxyStatus.get("ts", ""))})' f'{staleNote}
\n') else: out('
Galaxy queue: ' 'status unavailable
\n') if info: out(f'\n') if error: out(f'\n') out('
status: ') out(' · '.join(f'{k}={html.escape(v)}' for k, v in STATUS_NAMES.items())) - # Count completed rows for the toggle button label + # Count rows by type for toggle button labels completed_count = sum(1 for r in rows if len(r) > 7 and r[7] == '8') + assembly_count = sum(1 for r in rows if len(r) > 1 and r[1] == 'assembly') + liftover_count = sum(1 for r in rows if len(r) > 1 and r[1] == 'liftOver') + # Config toggle button - switches between test and production databases + current_config = 'otto' if use_otto else 'test' + switch_config = 'test' if use_otto else 'otto' + switch_label = 'Switch to hgwdev' if use_otto else 'Switch to RR' + config_url = f'?config={switch_config}' + out(f' · {len(rows)} row(s)' + f'{switch_label}' '' f'
\n') + f'onclick="toggleCompleted()">hide completed ({completed_count})' + f'' + f'\n') out(f'
cron times: 9,20,31,42,53 for ottoRequestWatch.sh, and 4,26,46 for ottoRequestPush and 1,8,15,22,29,36,43,50,57 for the first acknowledgement
\n') out('\n') for c in COLS: out(f'') out('' '\n') reqIdx = COLS.index('requestTime') doneIdx = COLS.index('completeTime') fromIdx = COLS.index('fromDb') toIdx = COLS.index('toDb') + typeIdx = COLS.index('requestType') for r in rows: rid = r[0] try: stnum = int(r[7]) except (ValueError, IndexError): stnum = -1 - cls = f's{stnum}' if stnum in (7, 8) else '' + cls_parts = [] + if stnum in (7, 8): + cls_parts.append(f's{stnum}') + # Add requestType class for toggle filtering + if typeIdx < len(r): + req_type = r[typeIdx].lower() + if req_type in ('assembly', 'liftover'): + cls_parts.append(req_type) + cls = ' '.join(cls_parts) out(f'') for i, c in enumerate(COLS): cell = r[i] if i < len(r) else '' if c == 'comment': out(f'') elif c == 'status': label = STATUS_NAMES.get(stnum, '?') out(f'') elif c in ('fromDb', 'toDb') and cell: href = ('https://genome-test.gi.ucsc.edu/cgi-bin/hgTracks?db=' + urllib.parse.quote(cell, safe='')) out(f'') elif c == 'email' and '@' in cell: user = cell.split('@', 1)[0] out(f'') else: out(f'') fromAcc = r[fromIdx] if fromIdx < len(r) else '' toAcc = r[toIdx] if toIdx < len(r) else '' fwd = featureBitsPct(fromAcc, toAcc) rev = featureBitsPct(toAcc, fromAcc) if fwd or rev: out(f'') else: out('') elapsed = elapsedStr(r[reqIdx] if reqIdx < len(r) else '', r[doneIdx] if doneIdx < len(r) else '') out(f'') # reset form out('') out('\n') out('
{c}' 'coverage
from / to
elapsedset status
{html.escape(cell)}{html.escape(cell)} ' f'{html.escape(label)}' f'{html.escape(cell)}' f'{html.escape(user)}{html.escape(cell)}{html.escape(fwd or "-")} / ' f'{html.escape(rev or "-")}{html.escape(elapsed)}
' '' f'' '
\n') out('\n') out('\n') out('\n') def main(): # checkIp() + # Create FieldStorage once - it consumes stdin and can't be read twice + form = cgi.FieldStorage() + + # Detect configuration from URL parameter + use_otto = form.getfirst('config') == 'otto' + setDbConfig(use_otto) + # POST/Redirect/GET: handle the write, then 303 to a GET of the same URL # so a browser reload doesn't re-submit the form and re-run the UPDATE. if os.environ.get('REQUEST_METHOD', 'GET') == 'POST': - form = cgi.FieldStorage() action = form.getfirst('action', '') if action == 'resetStatus': info, error = doResetStatus(form) else: info, error = None, f"unknown action: {action!r}" params = {} + if use_otto: params['config'] = 'otto' # preserve config in redirect if info: params['info'] = info if error: params['error'] = error qs = ('?' + urllib.parse.urlencode(params)) if params else '' sys.stdout.write(f"Status: 303 See Other\r\n" f"Location: {os.environ.get('SCRIPT_NAME','')}{qs}" f"\r\n\r\n") return # GET: pick up banner messages left by the PRG redirect, if any - qs = cgi.FieldStorage() - info = qs.getfirst('info') or None - error = qs.getfirst('error') or None + info = form.getfirst('info') or None + error = form.getfirst('error') or None try: rows = fetchRows() except RuntimeError as e: rows = [] error = (error + ' / ' if error else '') + f"fetch failed: {e}" # one bulk lookup of GenArk asmNames so hubBuildDir() avoids NFS readdir fromIdx = COLS.index('fromDb') toIdx = COLS.index('toDb') gcAccs = set() for r in rows: for idx in (fromIdx, toIdx): if idx < len(r): v = r[idx] if v.startswith('GCA_') or v.startswith('GCF_'): gcAccs.add(v) loadGenarkNames(gcAccs) loadFeatureBitsSnapshot() galaxyStatus = loadGalaxyStatus() - renderPage(rows, info=info, error=error, galaxyStatus=galaxyStatus) + renderPage(rows, info=info, error=error, galaxyStatus=galaxyStatus, use_otto=use_otto) if __name__ == '__main__': try: main() except Exception as e: sys.stdout.write("Content-Type: text/plain; charset=utf-8\r\n\r\n") sys.stdout.write(f"ottoRequestView.cgi error: {e}\n")