5648ce80fa4b2461fd6ef1d2192370896472cf37 hiram Mon May 11 15:11:22 2026 -0700 correctly recognize previous lastz/chain/net completed by in-house process and go directly to the hgdownload push and improve the efficiency of the web page ottoRequestView CGI refs #31811 diff --git src/hg/utils/otto/userRequests/ottoRequestView.cgi src/hg/utils/otto/userRequests/ottoRequestView.cgi index f42bc9d62b7..8c880859803 100644 --- src/hg/utils/otto/userRequests/ottoRequestView.cgi +++ src/hg/utils/otto/userRequests/ottoRequestView.cgi @@ -41,30 +41,31 @@ 6: 'push complete', 7: 'ERROR', 8: 'COMPLETE (final email sent)', } COLS = ['id', 'requestType', 'fromDb', 'toDb', 'email', 'comment', 'requestTime', 'status', 'buildDir', 'completeTime'] # featureBits coverage lookup roots HIVE_GENOMES = '/hive/data/genomes' ASMHUB_ROOT = HIVE_GENOMES + '/asmHubs' # in-process caches; one CGI invocation only, but rows reuse same accessions _buildDirCache = {} _fbPctCache = {} +_genarkAsmName = {} # populated up-front by loadGenarkNames() def forbidden(msg): sys.stdout.write("Status: 403 Forbidden\r\n") sys.stdout.write("Content-Type: text/plain; charset=utf-8\r\n\r\n") sys.stdout.write(msg + "\n") sys.exit(0) def checkIp(): remote = os.environ.get('REMOTE_ADDR', '') if remote != ALLOWED_IP: forbidden(f"Access denied for {remote!r}; this page is restricted.") @@ -114,54 +115,72 @@ rid = form.getfirst('id', '') stat = form.getfirst('status', '') if not rid.isdigit(): return None, f"bad id: {rid!r}" if not stat.isdigit() or int(stat) not in STATUS_NAMES: return None, f"bad status: {stat!r}" sql = (f"UPDATE {TABLE} SET status = {int(stat)} " f"WHERE id = {int(rid)}") ok, _out, err = hgsqlRun(sql) if not ok: return None, err.strip() or 'hgsql update failed' return (f"id={rid} status set to {stat} " f"({STATUS_NAMES[int(stat)]})"), None +def loadGenarkNames(accessions): + """Populate _genarkAsmName: {gcAccession: asmName} for the given + accessions in one bulk hgsql call against the genark table. Lets + hubBuildDir() construct paths directly instead of NFS-listdir'ing + /hive/data/genomes/asmHubs/...//// to discover the + asmName suffix on each accession.""" + if not accessions: + return + quoted = ",".join("'%s'" % a for a in sorted(accessions)) + sql = (f"SELECT gcAccession, asmName FROM genark " + f"WHERE gcAccession IN ({quoted});") + ok, out, _err = hgsqlRun(sql) + if not ok or not out.strip(): + return + for line in out.rstrip('\n').split('\n'): + parts = line.split('\t') + if len(parts) >= 2: + _genarkAsmName[parts[0]] = parts[1] + + def hubBuildDir(acc): """Locate the hive build directory for a fromDb/toDb value. GenArk accession (GCA_/GCF_) -> asmHubs/{genbank,refseq}Build////_ + asmName comes from _genarkAsmName, populated up-front by + loadGenarkNames() from the genark table. UCSC native db (e.g. hg38) -> /hive/data/genomes/ Returns absolute path or None.""" if not acc: return None if acc in _buildDirCache: return _buildDirCache[acc] result = None if (acc.startswith('GCF_') or acc.startswith('GCA_')) and len(acc) >= 13: + asmName = _genarkAsmName.get(acc) + if asmName: src = acc[:3] sub = 'refseqBuild' if src == 'GCF' else 'genbankBuild' digits = acc[4:].split('.', 1)[0] if len(digits) >= 9: - parent = (f'{ASMHUB_ROOT}/{sub}/{src}/' - f'{digits[0:3]}/{digits[3:6]}/{digits[6:9]}') - try: - for entry in os.listdir(parent): - if entry.startswith(acc + '_'): - result = f'{parent}/{entry}' - break - except OSError: - pass + result = (f'{ASMHUB_ROOT}/{sub}/{src}/' + f'{digits[0:3]}/{digits[3:6]}/{digits[6:9]}/' + f'{acc}_{asmName}') else: candidate = f'{HIVE_GENOMES}/{acc}' if os.path.isdir(candidate): result = candidate _buildDirCache[acc] = result return result def featureBitsPct(srcAcc, qryAcc): """Return percentage from fb..chainLink.txt (% of srcAcc covered by chains to qryAcc), or '' if unavailable.""" if not srcAcc or not qryAcc: return '' key = (srcAcc, qryAcc) if key in _fbPctCache: @@ -367,26 +386,38 @@ f"Location: {os.environ.get('SCRIPT_NAME','')}{qs}" f"\r\n\r\n") return # GET: pick up banner messages left by the PRG redirect, if any qs = cgi.FieldStorage() info = qs.getfirst('info') or None error = qs.getfirst('error') or None try: rows = fetchRows() except RuntimeError as e: rows = [] error = (error + ' / ' if error else '') + f"fetch failed: {e}" + # one bulk lookup of GenArk asmNames so hubBuildDir() avoids NFS readdir + fromIdx = COLS.index('fromDb') + toIdx = COLS.index('toDb') + gcAccs = set() + for r in rows: + for idx in (fromIdx, toIdx): + if idx < len(r): + v = r[idx] + if v.startswith('GCA_') or v.startswith('GCF_'): + gcAccs.add(v) + loadGenarkNames(gcAccs) + galaxyStatus = loadGalaxyStatus() renderPage(rows, info=info, error=error, galaxyStatus=galaxyStatus) if __name__ == '__main__': try: main() except Exception as e: sys.stdout.write("Content-Type: text/plain; charset=utf-8\r\n\r\n") sys.stdout.write(f"ottoRequestView.cgi error: {e}\n")