3976a8ec6b27464bd64e93916821dbcb52cd7671 hiram Thu Apr 30 23:01:52 2026 -0700 this actually might be working all the way through now refs #31811 diff --git src/hg/utils/otto/userRequests/ottoRequestPush.py src/hg/utils/otto/userRequests/ottoRequestPush.py index 934536270c6..be46468bc0f 100755 --- src/hg/utils/otto/userRequests/ottoRequestPush.py +++ src/hg/utils/otto/userRequests/ottoRequestPush.py @@ -15,36 +15,41 @@ import sys from collections import defaultdict scriptDir = os.path.dirname(os.path.abspath(__file__)) cladeTsv = os.path.join(scriptDir, "dbDb.name.clade.tsv") lockPath = os.path.join(scriptDir, "ottoRequestPush.lock") gcPattern = re.compile(r"^GC[AF]_") def acquireSingletonLock(): """Ensure only one instance of this script runs at a time. Holds an exclusive flock on lockPath for the lifetime of the process; the kernel releases it on exit (including crash / kill -9), so no stale lock cleanup is needed. Returns the open file handle, which the caller must keep alive.""" - fh = open(lockPath, "w") + # "a+" opens read+write without truncating (and creates if missing), + # so a second instance that fails to lock doesn't wipe the running + # instance's PID from the file before exiting. + fh = open(lockPath, "a+") try: fcntl.flock(fh, fcntl.LOCK_EX | fcntl.LOCK_NB) except BlockingIOError: sys.exit(0) - # indicate PID in the lock file, merely for information, not relevant + # we own the lock truncate and write our PID for information + fh.seek(0) + fh.truncate() fh.write("%d\n" % os.getpid()) fh.flush() return fh ### FYI: can also see the locking process via: lsof ottoRequestPush.lock def hgsql(query, db="hgcentraltest"): """Run hgsql -N -B and return rows as list of tuples (tab-split).""" out = subprocess.run( ["hgsql", "-N", "-B", "-e", query, db], check=True, capture_output=True, text=True, ).stdout return [tuple(line.split("\t")) for line in out.splitlines() if line] def loadDbDbClades(): @@ -118,39 +123,43 @@ AsmHub orderList files. Returns cladeDir on success (so the caller can chain the make sequence), or None if there is nothing to do for this clade. """ genarkIds = [a for a in asmIds if gcPattern.match(a)] if not genarkIds: return None cladeDir = os.path.expanduser( "~/kent/src/hg/makeDb/doc/%sAsmHub" % clade) orderList = os.path.join(cladeDir, "%s.orderList.tsv" % clade) outPath = os.path.join(cladeDir, "tsv.otto") if not os.path.isfile(orderList): print("WARNING: missing %s" % orderList, file=sys.stderr) return None + # orderList.tsv files occasionally contain Latin-1 bytes (e.g. xxx in + # Scandinavian fish names) that aren't valid UTF-8. surrogateescape + # round-trips those bytes through read+write byte-for-byte instead of + # raising UnicodeDecodeError. matched = [] - with open(orderList) as fh: + with open(orderList, encoding="utf-8", errors="surrogateescape") as fh: for line in fh: if any(asmId in line for asmId in genarkIds): matched.append(line) if not matched: print("WARNING: no matches in %s" % orderList, file=sys.stderr) return None - with open(outPath, "w") as fh: + with open(outPath, "w", encoding="utf-8", errors="surrogateescape") as fh: fh.writelines(matched) print("# wrote %d line(s) to %s" % (len(matched), outPath), file=sys.stderr) return cladeDir # Sequence of make commands run in the clade AsmHub directory after # tsv.otto is written. Stops on the first failure. makeChainCommands = [ "time (make symLinks orderList=tsv.otto) >> dbg 2>&1", "time (make mkGenomes orderList=tsv.otto) >> dbg 2>&1", "time (make symLinks orderList=tsv.otto) >> dbg 2>&1", "time (make verifyTestDownload orderList=tsv.otto) >> test.down.log 2>&1", "time (make sendDownload orderList=tsv.otto) >> send.down.log 2>&1", "time (make verifyDownload orderList=tsv.otto) >> verify.down.log 2>&1",