6ddaedca96e314bcd1607e69bcfa8fb228d22a3c
hiram
  Thu May 21 21:53:56 2026 -0700
common functions moved to ottoLib.py and shared with ottoBuildGenArkHub.py and fixup the legacy clade situation refs #31811

diff --git src/hg/utils/otto/userRequests/ottoLib.py src/hg/utils/otto/userRequests/ottoLib.py
index 2ceb5599b06..25e369027d2 100644
--- src/hg/utils/otto/userRequests/ottoLib.py
+++ src/hg/utils/otto/userRequests/ottoLib.py
@@ -144,55 +144,88 @@
             if clade is None:
                 print("WARNING: %s not in %s" % (db, cladeTsv), file=sys.stderr)
                 continue
             grouped[clade].add(db)
     return {clade: sorted(ids) for clade, ids in grouped.items()}
 
 
 def writeCladeTsv(clade, asmIds):
     """Filter <clade>.orderList.tsv down to lines matching any asmId and
     write the result to tsv.otto in the same directory.  Mirrors:
         cd ~/kent/src/hg/makeDb/doc/<clade>AsmHub
         egrep '<id1>|<id2>|...' <clade>.orderList.tsv > tsv.otto
     Only GenArk identifiers are used; UCSC native dbs are not in the
     AsmHub orderList files.
 
+    If no matches are found in the expected clade directory, falls back
+    to checking legacyAsmHub/legacy.orderList.tsv and works there instead.
+
     Returns cladeDir on success (so the caller can chain the make
     sequence), or None if there is nothing to do for this clade.
     """
     genarkIds = [a for a in asmIds if gcPattern.match(a)]
     if not genarkIds:
         return None
+
+    # First try the expected clade directory
     cladeDir = os.path.join(
         kentTree, "src/hg/makeDb/doc/%sAsmHub" % clade)
     orderList = os.path.join(cladeDir, "%s.orderList.tsv" % clade)
     outPath = os.path.join(cladeDir, "tsv.otto")
-    if not os.path.isfile(orderList):
-        print("WARNING: missing %s" % orderList, file=sys.stderr)
-        return None
+
     # orderList.tsv files occasionally contain Latin-1 bytes (e.g. in
     # Scandinavian fish names) that aren't valid UTF-8.  surrogateescape
     # round-trips those bytes through read+write byte-for-byte instead of
     # raising UnicodeDecodeError.
     matched = []
+
+    if os.path.isfile(orderList):
         with open(orderList, encoding="utf-8", errors="surrogateescape") as fh:
             for line in fh:
                 if any(asmId in line for asmId in genarkIds):
                     matched.append(line)
+
+    # If no matches found in expected clade, try legacy directory
     if not matched:
-        print("WARNING: no matches in %s" % orderList, file=sys.stderr)
+        legacyDir = os.path.join(
+            kentTree, "src/hg/makeDb/doc/legacyAsmHub")
+        legacyOrderList = os.path.join(legacyDir, "legacy.orderList.tsv")
+        legacyOutPath = os.path.join(legacyDir, "tsv.otto")
+
+        if os.path.isfile(legacyOrderList):
+            with open(legacyOrderList, encoding="utf-8", errors="surrogateescape") as fh:
+                for line in fh:
+                    if any(asmId in line for asmId in genarkIds):
+                        matched.append(line)
+
+            if matched:
+                # Found matches in legacy - work there instead
+                with open(legacyOutPath, "w", encoding="utf-8", errors="surrogateescape") as fh:
+                    fh.writelines(matched)
+                return legacyDir
+
+        # No matches found anywhere
+        if not os.path.isfile(orderList):
+            print("WARNING: missing %s" % orderList, file=sys.stderr)
+        if not os.path.isfile(legacyOrderList):
+            print("WARNING: missing %s" % legacyOrderList, file=sys.stderr)
+        if os.path.isfile(orderList) or os.path.isfile(legacyOrderList):
+            print("WARNING: no matches for %s in %s or legacy.orderList.tsv" %
+                  (genarkIds, clade), file=sys.stderr)
         return None
+
+    # Found matches in expected clade directory
     with open(outPath, "w", encoding="utf-8", errors="surrogateescape") as fh:
         fh.writelines(matched)
     return cladeDir
 
 
 # Sequence of make commands run in the clade AsmHub directory after
 # tsv.otto is written.  Stops on the first failure.
 genArkMakeCommands = [
     "time (make symLinks orderList=tsv.otto) >> dbg 2>&1",
     "time (make mkGenomes orderList=tsv.otto) >> dbg 2>&1",
     "time (make symLinks orderList=tsv.otto) >> dbg 2>&1",
     "time (make verifyTestDownload orderList=tsv.otto) >> test.down.log 2>&1",
     "time (make sendDownload orderList=tsv.otto) >> send.down.log 2>&1",
     "time (make verifyDownload orderList=tsv.otto) >> verify.down.log 2>&1",
 ]