46f246dc334a47ded8a439de133f6d94ef9dae81 hiram Thu May 7 14:40:00 2026 -0700 now working correctly with UCSC database assemblies for pushing the lift.over file out to hgdownload refs #31811 diff --git src/hg/utils/otto/userRequests/ottoRequestView.cgi src/hg/utils/otto/userRequests/ottoRequestView.cgi index add167a7d1e..f42bc9d62b7 100644 --- src/hg/utils/otto/userRequests/ottoRequestView.cgi +++ src/hg/utils/otto/userRequests/ottoRequestView.cgi @@ -1,272 +1,392 @@ #!/usr/bin/env python3 """ottoRequestView.cgi - web view of hgcentraltest.ottoRequest. Read-only display of every row in the table, plus a per-row 'reset status' control that is the only write path exposed. Access is restricted to a single IP (UCSC VPN, 128.114.198.5). Any other REMOTE_ADDR gets a 403. """ import cgi import html import json import os +import re import subprocess import sys import time import urllib.parse +from datetime import datetime ALLOWED_IP = '128.114.198.5' HGDB_CONF = '/usr/local/apache/cgi-bin/hg.conf' TRASH = '/data/apache/trash' DB = 'hgcentraltest' TABLE = 'ottoRequest' # Galaxy queue status panel - snapshot is refreshed by ottoRequestWatch.sh # (cron, every 11 minutes), CGI just reads it. CACHE_PATH = '/data/apache/trash/ottoRequestGalaxyStatus.json' CACHE_TTL = 1800 # seconds; older than this -> show "stale" instead # from README.txt in this directory STATUS_NAMES = { 0: 'received by API', 1: 'acknowledged, email sent', 2: 'galaxy job started', 3: 'galaxy done, download started', 4: 'downloaded, track files made', 5: 'symlinks ready, awaiting push', 6: 'push complete', 7: 'ERROR', 8: 'COMPLETE (final email sent)', } COLS = ['id', 'requestType', 'fromDb', 'toDb', 'email', 'comment', 'requestTime', 'status', 'buildDir', 'completeTime'] +# featureBits coverage lookup roots +HIVE_GENOMES = '/hive/data/genomes' +ASMHUB_ROOT = HIVE_GENOMES + '/asmHubs' + +# in-process caches; one CGI invocation only, but rows reuse same accessions +_buildDirCache = {} +_fbPctCache = {} + def forbidden(msg): sys.stdout.write("Status: 403 Forbidden\r\n") sys.stdout.write("Content-Type: text/plain; charset=utf-8\r\n\r\n") sys.stdout.write(msg + "\n") sys.exit(0) def checkIp(): remote = os.environ.get('REMOTE_ADDR', '') if remote != ALLOWED_IP: forbidden(f"Access denied for {remote!r}; this page is restricted.") def unescapeMysql(s): """Reverse `hgsql -B` escaping (\\n, \\t, \\\\, \\0). One pass so \\\\n stays a literal backslash + 'n'.""" out, i, n = [], 0, len(s) while i < n: if s[i] == '\\' and i + 1 < n: c = s[i+1] if c == 'n': out.append('\n') elif c == 't': out.append('\t') elif c == '\\': out.append('\\') elif c == '0': out.append('\0') else: out.append(s[i:i+2]) i += 2 else: out.append(s[i]); i += 1 return ''.join(out) def hgsqlRun(sql): """Run sql via hgsql against DB. Returns (ok, stdout, stderr). Running under Apache the process has no ~/.hg.conf, so point hgsql at the cgi-bin hg.conf via HGDB_CONF.""" env = dict(os.environ) env['HGDB_CONF'] = HGDB_CONF env['HOME'] = TRASH cmd = ['/cluster/bin/x86_64/hgsql', '-profile=central', DB, '-N', '-B', '-e', sql] r = subprocess.run(cmd, capture_output=True, text=True, env=env) return (r.returncode == 0, r.stdout, r.stderr) def fetchRows(): sql = f"SELECT {','.join(COLS)} FROM {TABLE} ORDER BY id DESC" ok, out, err = hgsqlRun(sql) if not ok: raise RuntimeError(err.strip() or 'hgsql failed') rows = [] if out.strip(): for line in out.rstrip('\n').split('\n'): rows.append([unescapeMysql(f) for f in line.split('\t')]) return rows def doResetStatus(form): rid = form.getfirst('id', '') stat = form.getfirst('status', '') if not rid.isdigit(): return None, f"bad id: {rid!r}" if not stat.isdigit() or int(stat) not in STATUS_NAMES: return None, f"bad status: {stat!r}" sql = (f"UPDATE {TABLE} SET status = {int(stat)} " f"WHERE id = {int(rid)}") ok, _out, err = hgsqlRun(sql) if not ok: return None, err.strip() or 'hgsql update failed' return (f"id={rid} status set to {stat} " f"({STATUS_NAMES[int(stat)]})"), None +def hubBuildDir(acc): + """Locate the hive build directory for a fromDb/toDb value. + GenArk accession (GCA_/GCF_) -> asmHubs/{genbank,refseq}Build////_ + UCSC native db (e.g. hg38) -> /hive/data/genomes/ + Returns absolute path or None.""" + if not acc: + return None + if acc in _buildDirCache: + return _buildDirCache[acc] + result = None + if (acc.startswith('GCF_') or acc.startswith('GCA_')) and len(acc) >= 13: + src = acc[:3] + sub = 'refseqBuild' if src == 'GCF' else 'genbankBuild' + digits = acc[4:].split('.', 1)[0] + if len(digits) >= 9: + parent = (f'{ASMHUB_ROOT}/{sub}/{src}/' + f'{digits[0:3]}/{digits[3:6]}/{digits[6:9]}') + try: + for entry in os.listdir(parent): + if entry.startswith(acc + '_'): + result = f'{parent}/{entry}' + break + except OSError: + pass + else: + candidate = f'{HIVE_GENOMES}/{acc}' + if os.path.isdir(candidate): + result = candidate + _buildDirCache[acc] = result + return result + + +def featureBitsPct(srcAcc, qryAcc): + """Return percentage from fb..chainLink.txt (% of srcAcc + covered by chains to qryAcc), or '' if unavailable.""" + if not srcAcc or not qryAcc: + return '' + key = (srcAcc, qryAcc) + if key in _fbPctCache: + return _fbPctCache[key] + bdir = hubBuildDir(srcAcc) + pct = '' + if bdir: + # GenArk builds keep lastz under trackData/, UCSC native under bed/ + sub = 'trackData' if '/asmHubs/' in bdir else 'bed' + # chainLink.txt: first letter of query is capitalized + # (matches the ${dstDb^} convention in installLinks). No-op for + # GCA_*/GCF_* accessions; converts hg38 -> Hg38 for native dbs. + QryAcc = qryAcc[:1].upper() + qryAcc[1:] + path = (f'{bdir}/{sub}/lastz.{qryAcc}/' + f'fb.{srcAcc}.chain{QryAcc}Link.txt') + try: + with open(path) as f: + txt = f.read() + m = re.search(r'\(([\d.]+)%\)', txt) + if m: + pct = m.group(1) + '%' + except OSError: + pass + _fbPctCache[key] = pct + return pct + + +def elapsedStr(reqTime, doneTime): + """Human-readable elapsed time between two MySQL datetimes. + Empty string if either side is missing/NULL/unparseable.""" + if not reqTime or not doneTime or reqTime == 'NULL' or doneTime == 'NULL': + return '' + try: + t0 = datetime.strptime(reqTime, '%Y-%m-%d %H:%M:%S') + t1 = datetime.strptime(doneTime, '%Y-%m-%d %H:%M:%S') + except ValueError: + return '' + secs = int((t1 - t0).total_seconds()) + if secs < 0: + return '' + d, secs = divmod(secs, 86400) + h, secs = divmod(secs, 3600) + m, s = divmod(secs, 60) + if d: return f'{d}d {h}h {m}m' + if h: return f'{h}h {m}m' + if m: return f'{m}m {s}s' + return f'{s}s' + + def loadGalaxyStatus(): """Return the Galaxy queue snapshot written by ottoRequestWatch.sh (which calls galaxyStatus.py from cron). Returns the parsed dict with an added 'stale' flag when the file is older than CACHE_TTL, or None if the file is missing/unreadable.""" try: mtime = os.path.getmtime(CACHE_PATH) with open(CACHE_PATH) as f: data = json.load(f) except (OSError, ValueError): return None data['stale'] = (time.time() - mtime) > CACHE_TTL return data def renderPage(rows, info=None, error=None, galaxyStatus=None): sys.stdout.write("Content-Type: text/html; charset=utf-8\r\n\r\n") out = sys.stdout.write out('\n\n') out(f'{TABLE}\n') out('\n') out(f'

{DB}.{TABLE}

\n') if galaxyStatus: staleNote = ' [stale]' if galaxyStatus.get('stale') else '' out('
Galaxy queue: ' f'{galaxyStatus.get("running", "?")} running · ' f'{galaxyStatus.get("queued", "?")} queued · ' f'{galaxyStatus.get("new", "?")} new ' f'(as of {html.escape(galaxyStatus.get("ts", ""))})' f'{staleNote}
\n') else: out('
Galaxy queue: ' 'status unavailable
\n') if info: out(f'\n') if error: out(f'\n') out('
status: ') out(' · '.join(f'{k}={html.escape(v)}' for k, v in STATUS_NAMES.items())) out(f' · {len(rows)} row(s)' '
\n') out('\n') for c in COLS: out(f'') - out('\n') + out('' + '\n') + + reqIdx = COLS.index('requestTime') + doneIdx = COLS.index('completeTime') + fromIdx = COLS.index('fromDb') + toIdx = COLS.index('toDb') for r in rows: rid = r[0] try: stnum = int(r[7]) except (ValueError, IndexError): stnum = -1 cls = f's{stnum}' if stnum in (7, 8) else '' out(f'') for i, c in enumerate(COLS): cell = r[i] if i < len(r) else '' if c == 'comment': out(f'') elif c == 'status': label = STATUS_NAMES.get(stnum, '?') out(f'') elif c in ('fromDb', 'toDb') and cell: href = ('https://genome-test.gi.ucsc.edu/cgi-bin/hgTracks?db=' + urllib.parse.quote(cell, safe='')) out(f'') + elif c == 'email' and '@' in cell: + user = cell.split('@', 1)[0] + out(f'') else: out(f'') + fromAcc = r[fromIdx] if fromIdx < len(r) else '' + toAcc = r[toIdx] if toIdx < len(r) else '' + fwd = featureBitsPct(fromAcc, toAcc) + rev = featureBitsPct(toAcc, fromAcc) + if fwd or rev: + out(f'') + else: + out('') + + elapsed = elapsedStr(r[reqIdx] if reqIdx < len(r) else '', + r[doneIdx] if doneIdx < len(r) else '') + out(f'') # reset form out('') out('\n') out('
{c}set status
' + 'coverage
from / to
elapsedset status
{html.escape(cell)}{html.escape(cell)} ' f'{html.escape(label)}' f'{html.escape(cell)}' + f'{html.escape(user)}{html.escape(cell)}{html.escape(fwd or "-")} / ' + f'{html.escape(rev or "-")}{html.escape(elapsed)}
' '' f'' '
\n') out('\n') out('\n') def main(): checkIp() # POST/Redirect/GET: handle the write, then 303 to a GET of the same URL # so a browser reload doesn't re-submit the form and re-run the UPDATE. if os.environ.get('REQUEST_METHOD', 'GET') == 'POST': form = cgi.FieldStorage() action = form.getfirst('action', '') if action == 'resetStatus': info, error = doResetStatus(form) else: info, error = None, f"unknown action: {action!r}" params = {} if info: params['info'] = info if error: params['error'] = error qs = ('?' + urllib.parse.urlencode(params)) if params else '' sys.stdout.write(f"Status: 303 See Other\r\n" f"Location: {os.environ.get('SCRIPT_NAME','')}{qs}" f"\r\n\r\n") return # GET: pick up banner messages left by the PRG redirect, if any qs = cgi.FieldStorage() info = qs.getfirst('info') or None error = qs.getfirst('error') or None try: rows = fetchRows() except RuntimeError as e: rows = [] error = (error + ' / ' if error else '') + f"fetch failed: {e}" galaxyStatus = loadGalaxyStatus() renderPage(rows, info=info, error=error, galaxyStatus=galaxyStatus) if __name__ == '__main__': try: main() except Exception as e: sys.stdout.write("Content-Type: text/plain; charset=utf-8\r\n\r\n") sys.stdout.write(f"ottoRequestView.cgi error: {e}\n")