30701e4c2c2ad71f0d597ad9629da75c6a35dfba hiram Wed Jun 3 14:31:24 2026 -0700 fix the legacy construction procedure refs #31811 diff --git src/hg/utils/otto/userRequests/ottoLib.py src/hg/utils/otto/userRequests/ottoLib.py index 7f658bde93b..bed95f72d69 100644 --- src/hg/utils/otto/userRequests/ottoLib.py +++ src/hg/utils/otto/userRequests/ottoLib.py @@ -165,71 +165,94 @@ genarkIds = [a for a in asmIds if gcPattern.match(a)] if not genarkIds: return None # First try the expected clade directory cladeDir = os.path.join( kentTree, "src/hg/makeDb/doc/%sAsmHub" % clade) orderList = os.path.join(cladeDir, "%s.orderList.tsv" % clade) outPath = os.path.join(cladeDir, "tsv.otto") # orderList.tsv files occasionally contain Latin-1 bytes (e.g. in # Scandinavian fish names) that aren't valid UTF-8. surrogateescape # round-trips those bytes through read+write byte-for-byte instead of # raising UnicodeDecodeError. matched = [] + foundIds = set() if os.path.isfile(orderList): with open(orderList, encoding="utf-8", errors="surrogateescape") as fh: for line in fh: - if any(asmId in line for asmId in genarkIds): + for asmId in genarkIds: + if asmId in line: matched.append(line) + foundIds.add(asmId) + break # Don't match the same line multiple times - # If no matches found in expected clade, try legacy directory - if not matched: + # Look for IDs not found in main clade file + notMatched = [asmId for asmId in genarkIds if asmId not in foundIds] + if notMatched: legacyDir = os.path.join( kentTree, "src/hg/makeDb/doc/legacyAsmHub") legacyOrderList = os.path.join(legacyDir, "legacy.orderList.tsv") legacyOutPath = os.path.join(legacyDir, "tsv.otto") + legacyMatched = [] if os.path.isfile(legacyOrderList): with open(legacyOrderList, encoding="utf-8", errors="surrogateescape") as fh: for line in fh: - if any(asmId in line for asmId in genarkIds): - matched.append(line) + for asmId in notMatched: + if asmId in line: + legacyMatched.append(line) + foundIds.add(asmId) + break # Don't match the same line multiple times + + if legacyMatched: + # Write matches to legacy directory + with open(legacyOutPath, "w", encoding="utf-8", errors="surrogateescape") as fh: + fh.writelines(legacyMatched) + # If we have matches from both main and legacy, handle legacy completely here if matched: - # Found matches in legacy - work there instead - with open(legacyOutPath, "w", encoding="utf-8", errors="surrogateescape") as fh: - fh.writelines(matched) + if not runGenArkMake(legacyDir): + print(f"# WARNING: make commands failed in legacy directory", file=sys.stderr) + # Main directory will be handled by normal return path below + # This allows both directories to be processed independently + else: + # Found matches only in legacy return legacyDir - # No matches found anywhere + # Check for any IDs that still weren't found anywhere + stillNotFound = [asmId for asmId in genarkIds if asmId not in foundIds] + if stillNotFound: if not os.path.isfile(orderList): print("WARNING: missing %s" % orderList, file=sys.stderr) + legacyOrderList = os.path.join(kentTree, "src/hg/makeDb/doc/legacyAsmHub/legacy.orderList.tsv") if not os.path.isfile(legacyOrderList): print("WARNING: missing %s" % legacyOrderList, file=sys.stderr) - if os.path.isfile(orderList) or os.path.isfile(legacyOrderList): print("WARNING: no matches for %s in %s or legacy.orderList.tsv" % - (genarkIds, clade), file=sys.stderr) - return None + (stillNotFound, clade), file=sys.stderr) - # Found matches in expected clade directory + # If we have matches from main clade, write them and return main directory + if matched: with open(outPath, "w", encoding="utf-8", errors="surrogateescape") as fh: fh.writelines(matched) return cladeDir + # No matches found anywhere + return None + # Sequence of make commands run in the clade AsmHub directory after # tsv.otto is written. Stops on the first failure. genArkMakeCommands = [ "time (make symLinks orderList=tsv.otto) >> dbg 2>&1", "time (make mkGenomes orderList=tsv.otto) >> dbg 2>&1", "time (make symLinks orderList=tsv.otto) >> dbg 2>&1", "time (make verifyTestDownload orderList=tsv.otto) >> test.down.log 2>&1", "time (make sendDownload orderList=tsv.otto) >> send.down.log 2>&1", "time (make verifyDownload orderList=tsv.otto) >> verify.down.log 2>&1", ] def runGenArkMake(cladeDir): """Run the genArkMakeCommands sequence in cladeDir. Uses bash so