30701e4c2c2ad71f0d597ad9629da75c6a35dfba
hiram
  Wed Jun 3 14:31:24 2026 -0700
fix the legacy construction procedure refs #31811

diff --git src/hg/utils/otto/userRequests/ottoLib.py src/hg/utils/otto/userRequests/ottoLib.py
index 7f658bde93b..bed95f72d69 100644
--- src/hg/utils/otto/userRequests/ottoLib.py
+++ src/hg/utils/otto/userRequests/ottoLib.py
@@ -165,71 +165,94 @@
     genarkIds = [a for a in asmIds if gcPattern.match(a)]
     if not genarkIds:
         return None
 
     # First try the expected clade directory
     cladeDir = os.path.join(
         kentTree, "src/hg/makeDb/doc/%sAsmHub" % clade)
     orderList = os.path.join(cladeDir, "%s.orderList.tsv" % clade)
     outPath = os.path.join(cladeDir, "tsv.otto")
 
     # orderList.tsv files occasionally contain Latin-1 bytes (e.g. in
     # Scandinavian fish names) that aren't valid UTF-8.  surrogateescape
     # round-trips those bytes through read+write byte-for-byte instead of
     # raising UnicodeDecodeError.
     matched = []
+    foundIds = set()
 
     if os.path.isfile(orderList):
         with open(orderList, encoding="utf-8", errors="surrogateescape") as fh:
             for line in fh:
-                if any(asmId in line for asmId in genarkIds):
+                for asmId in genarkIds:
+                    if asmId in line:
                         matched.append(line)
+                        foundIds.add(asmId)
+                        break  # Don't match the same line multiple times
 
-    # If no matches found in expected clade, try legacy directory
-    if not matched:
+    # Look for IDs not found in main clade file
+    notMatched = [asmId for asmId in genarkIds if asmId not in foundIds]
+    if notMatched:
         legacyDir = os.path.join(
             kentTree, "src/hg/makeDb/doc/legacyAsmHub")
         legacyOrderList = os.path.join(legacyDir, "legacy.orderList.tsv")
         legacyOutPath = os.path.join(legacyDir, "tsv.otto")
 
+        legacyMatched = []
         if os.path.isfile(legacyOrderList):
             with open(legacyOrderList, encoding="utf-8", errors="surrogateescape") as fh:
                 for line in fh:
-                    if any(asmId in line for asmId in genarkIds):
-                        matched.append(line)
+                    for asmId in notMatched:
+                        if asmId in line:
+                            legacyMatched.append(line)
+                            foundIds.add(asmId)
+                            break  # Don't match the same line multiple times
+
+            if legacyMatched:
+                # Write matches to legacy directory
+                with open(legacyOutPath, "w", encoding="utf-8", errors="surrogateescape") as fh:
+                    fh.writelines(legacyMatched)
 
+                # If we have matches from both main and legacy, handle legacy completely here
                 if matched:
-                # Found matches in legacy - work there instead
-                with open(legacyOutPath, "w", encoding="utf-8", errors="surrogateescape") as fh:
-                    fh.writelines(matched)
+                    if not runGenArkMake(legacyDir):
+                        print(f"# WARNING: make commands failed in legacy directory", file=sys.stderr)
+                    # Main directory will be handled by normal return path below
+                    # This allows both directories to be processed independently
+                else:
+                    # Found matches only in legacy
                     return legacyDir
 
-        # No matches found anywhere
+    # Check for any IDs that still weren't found anywhere
+    stillNotFound = [asmId for asmId in genarkIds if asmId not in foundIds]
+    if stillNotFound:
         if not os.path.isfile(orderList):
             print("WARNING: missing %s" % orderList, file=sys.stderr)
+        legacyOrderList = os.path.join(kentTree, "src/hg/makeDb/doc/legacyAsmHub/legacy.orderList.tsv")
         if not os.path.isfile(legacyOrderList):
             print("WARNING: missing %s" % legacyOrderList, file=sys.stderr)
-        if os.path.isfile(orderList) or os.path.isfile(legacyOrderList):
         print("WARNING: no matches for %s in %s or legacy.orderList.tsv" %
-                  (genarkIds, clade), file=sys.stderr)
-        return None
+              (stillNotFound, clade), file=sys.stderr)
 
-    # Found matches in expected clade directory
+    # If we have matches from main clade, write them and return main directory
+    if matched:
         with open(outPath, "w", encoding="utf-8", errors="surrogateescape") as fh:
             fh.writelines(matched)
         return cladeDir
 
+    # No matches found anywhere
+    return None
+
 
 # Sequence of make commands run in the clade AsmHub directory after
 # tsv.otto is written.  Stops on the first failure.
 genArkMakeCommands = [
     "time (make symLinks orderList=tsv.otto) >> dbg 2>&1",
     "time (make mkGenomes orderList=tsv.otto) >> dbg 2>&1",
     "time (make symLinks orderList=tsv.otto) >> dbg 2>&1",
     "time (make verifyTestDownload orderList=tsv.otto) >> test.down.log 2>&1",
     "time (make sendDownload orderList=tsv.otto) >> send.down.log 2>&1",
     "time (make verifyDownload orderList=tsv.otto) >> verify.down.log 2>&1",
 ]
 
 
 def runGenArkMake(cladeDir):
     """Run the genArkMakeCommands sequence in cladeDir.  Uses bash so