8d11236cb4e555b9cbd549b86a097e7817682e49 hiram Sat Jun 13 23:05:30 2026 -0700 grammer correction per claude code review refs #31811 diff --git src/hg/utils/otto/userRequests/ottoRequestView.cgi src/hg/utils/otto/userRequests/ottoRequestView.cgi index 7d19cddc8e9..648e32778e8 100644 --- src/hg/utils/otto/userRequests/ottoRequestView.cgi +++ src/hg/utils/otto/userRequests/ottoRequestView.cgi @@ -1,598 +1,598 @@ #!/usr/bin/env python3 """ottoRequestView.cgi - web view of hgcentraltest.ottoRequest. Read-only display of every row in the table, plus a per-row 'reset status' control that is the only write path exposed. """ import cgi import html import json import os import re import subprocess import sys import time import urllib.parse from datetime import datetime TRASH = '/data/apache/trash' TABLE = 'ottoRequest' # Configuration will be set dynamically in main() HGDB_CONF = None DB = None USE_PROFILE = None # Galaxy queue status panel - snapshot is refreshed by ottoRequestWatch.sh # (cron, every 11 minutes), CGI just reads it. CACHE_PATH = '/data/apache/trash/ottoRequestGalaxyStatus.json' CACHE_TTL = 1800 # seconds; older than this -> show "stale" instead # featureBits coverage snapshot - append-only file maintained by # featureBitsSnapshot.py (cron, via ottoRequestWatch.sh). fb.*.txt # values are immutable once an alignment completes so no TTL is needed; # featureBitsPct() falls back to an NFS read on a snapshot miss. FB_SNAPSHOT_PATH = '/data/apache/trash/ottoRequestFeatureBitsPct.json' -# keep this liftStatus and asmStatus lists at the same length so that +# keep these liftStatus and asmStatus lists at the same length so that # the verification check in doResetStatus() will function on just liftStatus liftStatus = { 0: 'received by API', 1: 'acknowledged, email sent', 2: 'galaxy job started', 3: 'galaxy done, download started', 4: 'downloaded, track files made', 5: 'symlinks ready, awaiting push', 6: 'push complete', 7: 'ERROR', 8: 'COMPLETE (final email sent)', } asmStatus = { 0: 'received by API', 1: 'acknowledged, email sent', 2: 'assembly build started', 3: 'assembly build done', 4: 'assembly available on hgwdev', 5: 'assembly available on hgwbeta', 6: 'assembly available on hgw2 - done', 7: 'ERROR', 8: 'COMPLETE (final email sent)', } COLS = ['id', 'requestType', 'fromDb', 'toDb', 'email', 'comment', 'requestTime', 'status', 'buildDir', 'completeTime'] # featureBits coverage lookup roots HIVE_GENOMES = '/hive/data/genomes' ASMHUB_ROOT = HIVE_GENOMES + '/asmHubs' # in-process caches; one CGI invocation only, but rows reuse same accessions _buildDirCache = {} _fbPctCache = {} _genarkAsmName = {} # populated up-front by loadGenarkNames() _fbSnapshot = {} # populated up-front by loadFeatureBitsSnapshot() def forbidden(msg): sys.stdout.write("Status: 403 Forbidden\r\n") sys.stdout.write("Content-Type: text/plain; charset=utf-8\r\n\r\n") sys.stdout.write(msg + "\n") sys.exit(0) def setDbConfig(use_otto=False): """Set database configuration globals based on config parameter.""" global HGDB_CONF, DB, USE_PROFILE if use_otto: HGDB_CONF = '/data/apache/cgi-bin/otto/.otto.conf' DB = 'hgcentral' USE_PROFILE = False else: HGDB_CONF = '/usr/local/apache/cgi-bin/hg.conf' DB = 'hgcentraltest' USE_PROFILE = True def unescapeMysql(s): """Reverse `hgsql -B` escaping (\\n, \\t, \\\\, \\0). One pass so \\\\n stays a literal backslash + 'n'.""" out, i, n = [], 0, len(s) while i < n: if s[i] == '\\' and i + 1 < n: c = s[i+1] if c == 'n': out.append('\n') elif c == 't': out.append('\t') elif c == '\\': out.append('\\') elif c == '0': out.append('\0') else: out.append(s[i:i+2]) i += 2 else: out.append(s[i]); i += 1 return ''.join(out) def getStatusDict(requestType): """Return appropriate status dictionary based on request type.""" return asmStatus if requestType == 'assembly' else liftStatus def hgsqlRun(sql): """Run sql via hgsql against DB. Returns (ok, stdout, stderr). Running under Apache the process has no ~/.hg.conf, so point hgsql at the cgi-bin hg.conf via HGDB_CONF.""" env = dict(os.environ) env['HGDB_CONF'] = HGDB_CONF env['HOME'] = TRASH cmd = ['/cluster/bin/x86_64/hgsql'] if USE_PROFILE: cmd.append('-profile=central') cmd.extend([DB, '-N', '-B', '-e', sql]) r = subprocess.run(cmd, capture_output=True, text=True, env=env) return (r.returncode == 0, r.stdout, r.stderr) def fetchRows(): sql = f"SELECT {','.join(COLS)} FROM {TABLE} ORDER BY id DESC" ok, out, err = hgsqlRun(sql) if not ok: raise RuntimeError(err.strip() or 'hgsql failed') rows = [] if out.strip(): for line in out.rstrip('\n').split('\n'): rows.append([unescapeMysql(f) for f in line.split('\t')]) return rows # the verification of 'stat' here against liftStatus works for both # the liftStatus and asmStatus lists because they are kept at the same length def doResetStatus(form): rid = form.getfirst('id', '') stat = form.getfirst('status', '') if not rid.isdigit(): return None, f"bad id: {rid!r}" if not stat.isdigit() or int(stat) not in liftStatus: return None, f"bad status: {stat!r}" sql = (f"UPDATE {TABLE} SET status = {int(stat)} " f"WHERE id = {int(rid)}") ok, _out, err = hgsqlRun(sql) if not ok: return None, err.strip() or 'hgsql update failed' return (f"id={rid} status set to {stat} " f"({liftStatus[int(stat)]})"), None def loadGenarkNames(accessions): """Populate _genarkAsmName: {gcAccession: asmName} for the given accessions in one bulk hgsql call against the genark table. Lets hubBuildDir() construct paths directly instead of NFS-listdir'ing /hive/data/genomes/asmHubs/...//// to discover the asmName suffix on each accession.""" if not accessions: return quoted = ",".join("'%s'" % a for a in sorted(accessions)) sql = (f"SELECT gcAccession, asmName FROM genark " f"WHERE gcAccession IN ({quoted});") ok, out, _err = hgsqlRun(sql) if not ok or not out.strip(): return for line in out.rstrip('\n').split('\n'): parts = line.split('\t') if len(parts) >= 2: _genarkAsmName[parts[0]] = parts[1] def hubBuildDir(acc): """Locate the hive build directory for a fromDb/toDb value. GenArk accession (GCA_/GCF_) -> asmHubs/{genbank,refseq}Build////_ asmName comes from _genarkAsmName, populated up-front by loadGenarkNames() from the genark table. UCSC native db (e.g. hg38) -> /hive/data/genomes/ Returns absolute path or None.""" if not acc: return None if acc in _buildDirCache: return _buildDirCache[acc] result = None if (acc.startswith('GCF_') or acc.startswith('GCA_')) and len(acc) >= 13: asmName = _genarkAsmName.get(acc) if asmName: src = acc[:3] sub = 'refseqBuild' if src == 'GCF' else 'genbankBuild' digits = acc[4:].split('.', 1)[0] if len(digits) >= 9: result = (f'{ASMHUB_ROOT}/{sub}/{src}/' f'{digits[0:3]}/{digits[3:6]}/{digits[6:9]}/' f'{acc}_{asmName}') else: candidate = f'{HIVE_GENOMES}/{acc}' if os.path.isdir(candidate): result = candidate _buildDirCache[acc] = result return result def loadFeatureBitsSnapshot(): """Populate _fbSnapshot from the JSON file written by featureBitsSnapshot.py via cron. Silent no-op if the file is missing or malformed - featureBitsPct() falls back to an NFS read on a snapshot miss, so the page still renders correctly.""" try: with open(FB_SNAPSHOT_PATH) as f: data = json.load(f) except (OSError, ValueError): return _fbSnapshot.update(data.get('pct') or {}) def featureBitsPct(srcAcc, qryAcc): """Return percentage from fb..chainLink.txt (% of srcAcc covered by chains to qryAcc), or '' if unavailable. Two-tier lookup: the precomputed cron snapshot first (pure dict lookup, no I/O); on miss falls back to the NFS file read so freshly-completed rows still show a value before the next cron tick promotes them.""" if not srcAcc or not qryAcc: return '' key = (srcAcc, qryAcc) if key in _fbPctCache: return _fbPctCache[key] snapKey = f'{srcAcc}\t{qryAcc}' if snapKey in _fbSnapshot: pct = _fbSnapshot[snapKey] _fbPctCache[key] = pct return pct bdir = hubBuildDir(srcAcc) pct = '' if bdir: # GenArk builds keep lastz under trackData/, UCSC native under bed/ sub = 'trackData' if '/asmHubs/' in bdir else 'bed' # chainLink.txt: first letter of query is capitalized # (matches the ${dstDb^} convention in installLinks). No-op for # GCA_*/GCF_* accessions; converts hg38 -> Hg38 for native dbs. QryAcc = qryAcc[:1].upper() + qryAcc[1:] path = (f'{bdir}/{sub}/lastz.{qryAcc}/' f'fb.{srcAcc}.chain{QryAcc}Link.txt') try: with open(path) as f: txt = f.read() m = re.search(r'\(([\d.]+)%\)', txt) if m: pct = m.group(1) + '%' except OSError: pass _fbPctCache[key] = pct return pct def elapsedStr(reqTime, doneTime): """Human-readable elapsed time between two MySQL datetimes. Empty string if either side is missing/NULL/unparseable.""" if not reqTime or not doneTime or reqTime == 'NULL' or doneTime == 'NULL': return '' try: t0 = datetime.strptime(reqTime, '%Y-%m-%d %H:%M:%S') t1 = datetime.strptime(doneTime, '%Y-%m-%d %H:%M:%S') except ValueError: return '' secs = int((t1 - t0).total_seconds()) if secs < 0: return '' d, secs = divmod(secs, 86400) h, secs = divmod(secs, 3600) m, s = divmod(secs, 60) if d: return f'{d}d {h}h {m}m' if h: return f'{h}h {m}m' if m: return f'{m}m {s}s' return f'{s}s' def loadGalaxyStatus(): """Return the Galaxy queue snapshot written by ottoRequestWatch.sh (which calls galaxyStatus.py from cron). Returns the parsed dict with an added 'stale' flag when the file is older than CACHE_TTL, or None if the file is missing/unreadable.""" try: mtime = os.path.getmtime(CACHE_PATH) with open(CACHE_PATH) as f: data = json.load(f) except (OSError, ValueError): return None data['stale'] = (time.time() - mtime) > CACHE_TTL return data def renderPage(rows, info=None, error=None, galaxyStatus=None, use_otto=False): sys.stdout.write("Content-Type: text/html; charset=utf-8\r\n\r\n") out = sys.stdout.write db_label = 'RR' if use_otto else 'hgwdev' out('\n\n') out(f'{TABLE} ({db_label})\n') out('\n') db_label = 'RR' if use_otto else 'hgwdev' out(f'

{DB}.{TABLE} ({db_label})

\n') if galaxyStatus: staleNote = ' [stale]' if galaxyStatus.get('stale') else '' out('
Galaxy queue: ' f'{galaxyStatus.get("running", "?")} running · ' f'{galaxyStatus.get("queued", "?")} queued · ' f'{galaxyStatus.get("new", "?")} new ' f'(as of {html.escape(galaxyStatus.get("ts", ""))})' f'{staleNote}
\n') else: out('
Galaxy queue: ' 'status unavailable
\n') if info: out(f'\n') if error: out(f'\n') out('
status: ') # Show both status types out('liftOver: ') out(' · '.join(f'{k}={html.escape(v)}' for k, v in liftStatus.items())) out('
assembly: ') out(' · '.join(f'{k}={html.escape(v)}' for k, v in asmStatus.items())) # Count rows by type for toggle button labels completed_count = sum(1 for r in rows if len(r) > 7 and r[7] == '8') assembly_count = sum(1 for r in rows if len(r) > 1 and r[1] == 'assembly') liftover_count = sum(1 for r in rows if len(r) > 1 and r[1] == 'liftOver') # Config toggle button - switches between test and production databases current_config = 'otto' if use_otto else 'test' switch_config = 'test' if use_otto else 'otto' switch_label = 'Switch to hgwdev' if use_otto else 'Switch to RR' config_url = f'?config={switch_config}' out(f' · {len(rows)} row(s)' f'{switch_label}' '' f'' f'' f'
\n') out(f'
cron times: 9,20,31,42,53 for ottoRequestWatch.sh, and 4,26,46 for ottoRequestPush and 1,8,15,22,29,36,43,50,57 for the first acknowledgement
\n') out('\n') for c in COLS: out(f'') out('' '\n') reqIdx = COLS.index('requestTime') doneIdx = COLS.index('completeTime') fromIdx = COLS.index('fromDb') toIdx = COLS.index('toDb') typeIdx = COLS.index('requestType') for r in rows: rid = r[0] try: stnum = int(r[7]) except (ValueError, IndexError): stnum = -1 cls_parts = [] if stnum in (7, 8): cls_parts.append(f's{stnum}') # Add requestType class for toggle filtering if typeIdx < len(r): req_type = r[typeIdx].lower() if req_type in ('assembly', 'liftover'): cls_parts.append(req_type) cls = ' '.join(cls_parts) out(f'') for i, c in enumerate(COLS): cell = r[i] if i < len(r) else '' if c == 'comment': out(f'') elif c == 'status': reqType = r[typeIdx] if typeIdx < len(r) else 'liftOver' statusDict = getStatusDict(reqType) label = statusDict.get(stnum, '?') out(f'') elif c in ('fromDb', 'toDb') and cell: href = ('https://genome-test.gi.ucsc.edu/cgi-bin/hgTracks?db=' + urllib.parse.quote(cell, safe='')) out(f'') elif c == 'email' and '@' in cell: user = cell.split('@', 1)[0] out(f'') else: out(f'') fromAcc = r[fromIdx] if fromIdx < len(r) else '' toAcc = r[toIdx] if toIdx < len(r) else '' fwd = featureBitsPct(fromAcc, toAcc) rev = featureBitsPct(toAcc, fromAcc) if fwd or rev: out(f'') else: out('') elapsed = elapsedStr(r[reqIdx] if reqIdx < len(r) else '', r[doneIdx] if doneIdx < len(r) else '') out(f'') # reset form out('') out('\n') out('
{c}' 'coverage
from / to
elapsedset status
{html.escape(cell)}{html.escape(cell)} ' f'{html.escape(label)}' f'{html.escape(cell)}' f'{html.escape(user)}{html.escape(cell)}{html.escape(fwd or "-")} / ' f'{html.escape(rev or "-")}{html.escape(elapsed)}
' '' f'' '
\n') out('\n') out('\n') out('\n') def main(): # Create FieldStorage once - it consumes stdin and can't be read twice form = cgi.FieldStorage() # Detect configuration from URL parameter use_otto = form.getfirst('config') == 'otto' setDbConfig(use_otto) # POST/Redirect/GET: handle the write, then 303 to a GET of the same URL # so a browser reload doesn't re-submit the form and re-run the UPDATE. if os.environ.get('REQUEST_METHOD', 'GET') == 'POST': action = form.getfirst('action', '') if action == 'resetStatus': info, error = doResetStatus(form) else: info, error = None, f"unknown action: {action!r}" params = {} if use_otto: params['config'] = 'otto' # preserve config in redirect if info: params['info'] = info if error: params['error'] = error qs = ('?' + urllib.parse.urlencode(params)) if params else '' sys.stdout.write(f"Status: 303 See Other\r\n" f"Location: {os.environ.get('SCRIPT_NAME','')}{qs}" f"\r\n\r\n") return # GET: pick up banner messages left by the PRG redirect, if any info = form.getfirst('info') or None error = form.getfirst('error') or None try: rows = fetchRows() except RuntimeError as e: rows = [] error = (error + ' / ' if error else '') + f"fetch failed: {e}" # one bulk lookup of GenArk asmNames so hubBuildDir() avoids NFS readdir fromIdx = COLS.index('fromDb') toIdx = COLS.index('toDb') gcAccs = set() for r in rows: for idx in (fromIdx, toIdx): if idx < len(r): v = r[idx] if v.startswith('GCA_') or v.startswith('GCF_'): gcAccs.add(v) loadGenarkNames(gcAccs) loadFeatureBitsSnapshot() galaxyStatus = loadGalaxyStatus() renderPage(rows, info=info, error=error, galaxyStatus=galaxyStatus, use_otto=use_otto) if __name__ == '__main__': try: main() except Exception as e: sys.stdout.write("Content-Type: text/plain; charset=utf-8\r\n\r\n") sys.stdout.write(f"ottoRequestView.cgi error: {e}\n")