d95edf23464d1e0768072f9ed96d6d237ff9da83
hiram
  Thu May 14 10:41:48 2026 -0700
push script for any GenArk hub update refs #31811

diff --git src/hg/utils/otto/userRequests/ottoBuildGenArkHub.py src/hg/utils/otto/userRequests/ottoBuildGenArkHub.py
new file mode 100755
index 00000000000..8d0d5e540f7
--- /dev/null
+++ src/hg/utils/otto/userRequests/ottoBuildGenArkHub.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+"""
+ottoBuildGenArkHub.py - given a list of GenArk accession identifiers,
+run each asm's doTrackDb.bash (the way ottoRequestWatch.sh does),
+group the survivors by clade, write tsv.otto in the matching
+cladeAsmHub source directory (~/kent/src/hg/makeDb/doc/<clade>AsmHub),
+and run the genArkMakeCommands sequence there to (re)construct the
+assembly hub files.
+
+Usage:
+  ottoBuildGenArkHub.py [-f FILE] [accession ...]
+
+Accessions may be supplied as positional args and/or via -f FILE
+(one or more per line, whitespace-separated; '#' comments allowed).
+Each entry may be either the bare 'GCA_000001405.15' or the full
+'GCA_000001405.15_GRCh38' - any trailing _<asmName> is stripped and
+the asmName is looked up in the hgcentraltest.genark table along
+with the clade.
+
+This script shares the ottoRequestPush.lock with the live cron, so a
+manual run will exit non-zero if the cron is currently building.
+"""
+
+import argparse
+import os
+import sys
+
+import ottoLib
+
+scriptDir = os.path.dirname(os.path.abspath(__file__))
+lockPath = os.path.join(scriptDir, "ottoRequestPush.lock")
+# match ottoRequestWatch.sh's log location for doTrackDb output
+doTdbLog = os.path.join(scriptDir, "doTdb.log")
+
+
+def parseAccession(arg):
+    """Strip an optional _<asmName> suffix and return the bare GenArk
+    accession, or None if arg doesn't look like one.  Accepts both
+    'GCA_000001405.15' and 'GCA_000001405.15_GRCh38'."""
+    if not ottoLib.gcPattern.match(arg):
+        return None
+    parts = arg.split("_", 2)
+    return "_".join(parts[:2])
+
+
+def readAccessions(args, parser):
+    """Collect accessions from positional args + -f FILE.  De-dupes
+    while preserving first-seen order.  Calls parser.error() if no
+    valid accessions were supplied."""
+    rawInputs = list(args.accession)
+    if args.file:
+        with open(args.file) as fh:
+            for line in fh:
+                line = line.split("#", 1)[0]
+                rawInputs.extend(line.split())
+    seen = set()
+    accessions = []
+    for arg in rawInputs:
+        bare = parseAccession(arg)
+        if bare is None:
+            print("WARNING: not a GenArk accession: %s" % arg, file=sys.stderr)
+            continue
+        if bare in seen:
+            continue
+        seen.add(bare)
+        accessions.append(bare)
+    if not accessions:
+        parser.error("no GenArk accessions supplied")
+    return accessions
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "accession", nargs="*",
+        help="GenArk accession (GC[AF]_<digits.version>), optionally "
+             "with a trailing _<asmName> that will be stripped",
+    )
+    parser.add_argument(
+        "-f", "--file",
+        help="file with one or more accessions "
+             "(whitespace-separated; '#' starts a comment)",
+    )
+    args = parser.parse_args()
+
+    accessions = readAccessions(args, parser)
+
+    lockFh = ottoLib.acquireSingletonLock(lockPath, exitOnLocked=False)
+    if lockFh is None:
+        print("ERROR: another otto build is already running (%s held)"
+              % lockPath, file=sys.stderr)
+        sys.exit(1)
+    # keep ref alive for the lifetime of the process
+    _ = lockFh
+
+    genarkInfo = ottoLib.lookupGenark(accessions)
+    for acc in accessions:
+        if acc not in genarkInfo:
+            print("WARNING: %s not in genark table - skipping"
+                  % acc, file=sys.stderr)
+
+    grouped = ottoLib.groupByClade(set(genarkInfo.keys()), {}, genarkInfo)
+    if not grouped:
+        print("ERROR: nothing to do - no accessions resolved to a clade",
+              file=sys.stderr)
+        sys.exit(1)
+
+    # doTrackDb runs first: it refreshes per-asm trackDb stanzas that the
+    # downstream make chain then bakes into the hub files.  An asm whose
+    # doTrackDb fails is dropped from this build pass (the make chain
+    # would otherwise bake stale or broken trackDb into the hub files);
+    # a clade whose every asm fails doTrackDb is dropped entirely.
+    doTdbFailures = []
+    for clade in sorted(grouped):
+        survivors = []
+        for asmId in grouped[clade]:
+            if ottoLib.runDoTrackDb(asmId, logPath=doTdbLog):
+                survivors.append(asmId)
+            else:
+                doTdbFailures.append(asmId)
+                print("# WARNING: doTrackDb failed for %s - dropping "
+                      "from this build pass" % asmId, file=sys.stderr)
+        grouped[clade] = survivors
+    grouped = {c: ids for c, ids in grouped.items() if ids}
+
+    failedClades = set()
+    builtClades = set()
+    for clade in sorted(grouped):
+        cladeDir = ottoLib.writeCladeTsv(clade, grouped[clade])
+        if cladeDir is None:
+            continue
+        if ottoLib.runGenArkMake(cladeDir):
+            builtClades.add(clade)
+        else:
+            failedClades.add(clade)
+
+    if failedClades:
+        print("# build failures in clade(s): %s"
+              % ", ".join(sorted(failedClades)), file=sys.stderr)
+    if doTdbFailures:
+        print("# doTrackDb.bash failed for: %s"
+              % ", ".join(doTdbFailures), file=sys.stderr)
+
+    if failedClades or doTdbFailures:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()