65a21f5c7cdda8de27c2054ddffdd391284c3aa1 hiram Mon Jun 1 09:03:06 2026 -0700 switching entirely to genome-centdb hgcentral refs #31811 diff --git src/hg/utils/otto/userRequests/ottoRequestWatch.sh src/hg/utils/otto/userRequests/ottoRequestWatch.sh index 9b21be7fbe1..7805a3d67a1 100755 --- src/hg/utils/otto/userRequests/ottoRequestWatch.sh +++ src/hg/utils/otto/userRequests/ottoRequestWatch.sh @@ -9,61 +9,63 @@ # # Phase 1: new requests needing alignment setup - status=1 AND buildDir='' # run ottoRequestAlign.sh to set up and launch the workflow # Phase 2: in-progress requests needing workflow monitoring # run workflowMonitor.sh to poll Galaxy and install results # 0 pending, 1 notified, 2 in progress, 3 galaxy done, 4 tracks complete, # 5 ready to push, 6 push is done, 7 problems, # 8 final notification has been sent == process is complete ### cron job entry: #9,20,31,42,53 * * * * ~/kent/src/hg/utils/otto/userRequests/ottoRequestWatch.sh set -eEu -o pipefail umask 002 export scriptDir=$(cd "$(dirname "$0")" && pwd) +export centDb="hgcentral" +export hgSql="hgsql -hgenome-centdb" ############################################################################## ### singleton lock - only one instance at a time ### Open lockPath on FD 9 for the lifetime of the shell, then take a ### non-blocking exclusive lock. Kernel releases the lock on exit ### (normal, error, or kill -9), so no stale lock cleanup is needed. ### Exit 0 silently if another instance holds the lock so cron doesn't ### email on every overlapping tick. PID is written to the file for ### information only see the holder via: ### cat ottoRequestWatch.lock (the PID) ### lsof ottoRequestWatch.lock (the locking process) ############################################################################## export lockPath="${scriptDir}/ottoRequestWatch.lock" # 9<> opens read+write without truncating, so a second instance that # comes along while we're running won't wipe our PID from the file # before its flock attempt fails. exec 9<>"${lockPath}" flock -n 9 || exit 0 # we own the lock now safe to truncate and write our PID. ': >file' # truncates via a separate FD; FD 9 keeps its position 0 from <>, so # the printf below starts writing at the beginning of the empty file. : >"${lockPath}" printf "%d\n" "$$" >&9 ############################################################################## ############################################################################## ### errors - set error status in the table function setErrorStatus() { id="${1}" - /cluster/bin/x86_64/hgsql -N -e \ - "UPDATE ottoRequest SET status=7 WHERE id=${id};" hgcentraltest + /cluster/bin/x86_64/${hgSql} -N -e \ + "UPDATE ottoRequest SET status=7 WHERE id=${id};" "${centDb}" } ############################################################################## ############################################################################## ### getFeatureBitsPct - get percentage coverage from featureBits file ### args: srcDb dstDb buildDir ### returns percentage string (e.g., "45.2%") or empty string if not found ### mimics the featureBitsPct() function from ottoRequestView.cgi ############################################################################## function getFeatureBitsPct() { local srcDb="${1}" local dstDb="${2}" local buildDir="${3}" local DstDb="${dstDb^}" # first letter capitalized @@ -131,63 +133,63 @@ ############################################################################## ### sendNotification - email the requesting user that their alignment is done ### args: reqId subject ### message body is read from stdin ### recipient: email column of ottoRequest table for that reqId ### bcc: chain-file-request-group@ucsc.edu ### envelope sender / Return-Path / bounce: genome-www@soe.ucsc.edu ### returns 0 on success, non-zero on failure ############################################################################## function sendNotification() { local reqId="${1}" local subject="${2}" local msgBody="${3}" local toAddr - toAddr="$(/cluster/bin/x86_64/hgsql -N -B -e \ - "SELECT email FROM ottoRequest WHERE id = ${reqId};" hgcentraltest)" + toAddr="$(/cluster/bin/x86_64/${hgSql} -N -B -e \ + "SELECT email FROM ottoRequest WHERE id = ${reqId};" ${centDb})" if [ -z "${toAddr}" ]; then printf "ERROR: sendNotification: no email for request %s\n" "${reqId}" 1>&2 return 1 fi local bcc="chain-file-request-group@ucsc.edu" local from="genome-www@soe.ucsc.edu" local bounce="gb" bounce+="aut" bounce+="o" bounce+="@" bounce+="uc" bounce+="sc." bounce+="ed" bounce+="u" # -f sets the envelope sender (becomes Return-Path at delivery and the # bounce address); -t reads recipients from To:/Cc:/Bcc: headers; # -oi prevents a lone "." in body from ending the message { printf "From: %s\n" "${from}" printf "To: %s\n" "${toAddr}" printf "Bcc: %s\n" "${bcc}" printf "Reply-To: %s\n" "${from}" printf "Subject: %s\n" "${subject}" printf "\n" printf "%s\n" "${msgBody}" } | /usr/sbin/sendmail -f "${bounce}" -t -oi } ############################################################################## ############################################################################## -### installLinks - drop chain/quickLift symlinks and register in hgcentraltest +### installLinks - drop chain/quickLift symlinks and register in ${centDb} ### args: tDb qDb buildDir ### detects GenArk (tDb starts with "GC") vs UCSC db and chooses the ### matching branch from doBlastzChainNet.pl loadUp(). ### returns 0 on success, non-zero on failure. ############################################################################## function installLinks() { local tDb="${1}" local qDb="${2}" local localBuildDir="${3}" local QDb="${qDb^}" local over="${tDb}To${QDb}.over.chain.gz" local quick="${qDb}" local axtDir="${localBuildDir}/axtChain" local overChain="${axtDir}/${tDb}.${qDb}.over.chain.gz" local quickBb="${axtDir}/${tDb}.${qDb}.quick.bb" @@ -256,31 +258,31 @@ rm -f "${liftOverDir}/${over}" ln -s "${overChain}" "${liftOverDir}/${over}" mkdir -p "${gbdbLiftOverDir}" "${gbdbQuickLiftDir}" rm -f "${gbdbLiftOverDir}/${over}" rm -f "${gbdbQuickLiftDir}/${quick}.bb" rm -f "${gbdbQuickLiftDir}/${quick}.link.bb" ln -s "${quickBb}" "${gbdbQuickLiftDir}/${quick}.bb" ln -s "${quickLinkBb}" "${gbdbQuickLiftDir}/${quick}.link.bb" ln -s "${liftOverDir}/${over}" "${gbdbLiftOverDir}/${over}" chainPath="${gbdbLiftOverDir}/${over}" quickPath="${gbdbQuickLiftDir}/${quick}.bb" fi - # register both rows in hgcentraltest + # register both rows in ${centDb} if ! /cluster/bin/x86_64/hgAddLiftOverChain -minMatch=0.1 -multiple \ -path="${chainPath}" "${tDb}" "${qDb}" > /dev/null 2>&1; then printf "ERROR: installLinks: hgAddLiftOverChain failed for %s -> %s\n" \ "${tDb}" "${qDb}" 1>&2 return 1 fi if ! "${HOME}/kent/src/hg/utils/automation/addQuickLift.py" \ "${tDb}" "${qDb}" "${quickPath}" > /dev/null 2>&1; then printf "ERROR: installLinks: addQuickLift.py failed for %s -> %s\n" \ "${tDb}" "${qDb}" 1>&2 return 1 fi return 0 } ############################################################################## @@ -307,112 +309,112 @@ ### state. Has its own singleton lock, writes its own snapshot file ### atomically, exits 0 silently when nothing to do. ############################################################################## if ! timeout 45 "${scriptDir}/featureBitsSnapshot.py" 2>/dev/null; then : # non-zero exit ignored: leave stale snapshot, next tick will retry fi ############################################################################## ############################################################################ # phase 0: pre-flight existing-work detection. If the alignment has # already been built in-house (legacy lastz/chain/net or an # earlier kegAlign run), the only step left is the hgdownload # push. Signals (all must hold): # /hive/data/genomes/${fromDb}/bed/lastz.${toDb} symlink exists # /hive/data/genomes/${toDb}/bed/lastz.${fromDb} symlink exists -# hgcentraltest.liftOverChain has both directions -# hgcentraltest.quickLiftChain has both directions +# ${centDb}.liftOverChain has both directions +# ${centDb}.quickLiftChain has both directions # When all four hold, fill in buildDir with the resolved # fromDb-side build dir and bump status=5 so # ottoRequestPush.py picks it up. Anything that doesn't match # stays at status=1 and falls through to phase 1. ############################################################################ while IFS=$'\t' read -r reqId fromDb toDb; do fromSym="/hive/data/genomes/${fromDb}/bed/lastz.${toDb}" toSym="/hive/data/genomes/${toDb}/bed/lastz.${fromDb}" if [ ! -L "${fromSym}" ] || [ ! -L "${toSym}" ]; then continue fi fromBuild="$(readlink -f "${fromSym}")" toBuild="$(readlink -f "${toSym}")" if [ ! -d "${fromBuild}" ] || [ ! -d "${toBuild}" ]; then continue fi - loCount=$(/cluster/bin/x86_64/hgsql -N -B -e \ + loCount=$(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT COUNT(*) FROM liftOverChain WHERE \ (fromDb='${fromDb}' AND toDb='${toDb}') OR \ - (fromDb='${toDb}' AND toDb='${fromDb}');" hgcentraltest) - qlCount=$(/cluster/bin/x86_64/hgsql -N -B -e \ + (fromDb='${toDb}' AND toDb='${fromDb}');" ${centDb}) + qlCount=$(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT COUNT(*) FROM quickLiftChain WHERE \ (fromDb='${fromDb}' AND toDb='${toDb}') OR \ - (fromDb='${toDb}' AND toDb='${fromDb}');" hgcentraltest) + (fromDb='${toDb}' AND toDb='${fromDb}');" ${centDb}) if [ "${loCount}" -lt 2 ] || [ "${qlCount}" -lt 2 ]; then continue fi printf "# request %s: prior work detected at %s, jumping to push\n" \ "${reqId}" "${fromBuild}" 1>&2 - /cluster/bin/x86_64/hgsql -N -e \ + /cluster/bin/x86_64/${hgSql} -N -e \ "UPDATE ottoRequest SET status=5, buildDir='${fromBuild}' \ - WHERE id=${reqId};" hgcentraltest -done < <(/cluster/bin/x86_64/hgsql -N -B -e \ + WHERE id=${reqId};" ${centDb} +done < <(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT id, fromDb, toDb FROM ottoRequest \ - WHERE status = 1 AND buildDir = '' AND requestType = 'liftOver';" hgcentraltest) + WHERE status = 1 AND buildDir = '' AND requestType = 'liftOver';" ${centDb}) ############################################################################ # phase 1: new requests needing alignment setup - status=1 AND buildDir='' ############################################################################ while read -r reqId; do # printf "# starting alignment for request %s\n" "${reqId}" 1>&2 if ! "${scriptDir}/ottoRequestAlign.sh" "${reqId}"; then printf "# alignment setup FAILED for request %s\n" "${reqId}" 1>&2 setErrorStatus "${reqId}" fi -done < <(/cluster/bin/x86_64/hgsql -N -B -e \ +done < <(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT id FROM ottoRequest WHERE status = 1 AND buildDir = '' AND requestType = 'liftOver';" \ - hgcentraltest) + ${centDb}) ############################################################################ # phase 2: in-progress requests needing workflow monitoring ############################################################################ while IFS=$'\t' read -r reqId buildDir; do if [ ! -d "${buildDir}" ]; then printf "# WARNING: buildDir not found for request %s: %s\n" \ "${reqId}" "${buildDir}" 1>&2 continue fi # takes a while for the galaxy WF to start up, wait for this file to appear if [ ! -s "${buildDir}/pendingInvocationId.txt" ]; then continue fi # printf "# monitoring request %s: %s\n" "${reqId}" "${buildDir}" 1>&2 if "${scriptDir}/workflowMonitor.sh" "${reqId}" "${buildDir}"; then # workflowMonitor.sh exits 0 both when still running and when complete; # check for the success marker to distinguish if [ -s "${buildDir}/successInvocationId.txt" ]; then - /cluster/bin/x86_64/hgsql -N -e \ + /cluster/bin/x86_64/${hgSql} -N -e \ "UPDATE ottoRequest SET status = 4, completeTime = NOW() \ - WHERE id = ${reqId};" hgcentraltest + WHERE id = ${reqId};" ${centDb} # printf "# request %s completed successfully\n" "${reqId}" 1>&2 fi # else: still running, will check again next invocation else printf "# workflow error for request %s\n" "${reqId}" 1>&2 setErrorStatus "${reqId}" fi -done < <(/cluster/bin/x86_64/hgsql -N -B -e \ +done < <(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT id, buildDir FROM ottoRequest \ - WHERE status = 2 AND buildDir != '' AND requestType = 'liftOver';" hgcentraltest) + WHERE status = 2 AND buildDir != '' AND requestType = 'liftOver';" ${centDb}) ############################################################################ # phase 3: check for tracks done, setup symlinks set status=5 to indicate # ready to push ############################################################################ while IFS=$'\t' read -r reqId buildDir; do if [ ! -d "${buildDir}" ]; then printf "# WARNING: buildDir not found for request %s: %s\n" \ "${reqId}" "${buildDir}" 1>&2 continue fi source <(grep -E '^export (swapDir|targetDb|queryDb)=' "${buildDir}/kegAlign.sh") export trackData="$(dirname "${buildDir}")" export swapData="$(dirname "${swapDir}")" export workDir="$(basename "${buildDir}")" @@ -473,46 +475,46 @@ fi rm -f /dev/shm/swapTdb.$$.log ;; *) if ! ( cd "${swapDir}" \ && "${scriptDir}/chainNetTrackDb.pl" \ "${queryDb}" "${targetDb}" ); then printf "ERROR: chainNetTrackDb.pl failed for %s/%s\n" \ "${queryDb}" "${targetDb}" 1>&2 setErrorStatus "${reqId}" continue fi ;; esac - # install liftOver and quickLift symlinks + register in hgcentraltest, + # install liftOver and quickLift symlinks + register in ${centDb}, # for both directions if ! installLinks "${targetDb}" "${queryDb}" "${buildDir}"; then setErrorStatus "${reqId}" continue fi if ! installLinks "${queryDb}" "${targetDb}" "${swapDir}"; then setErrorStatus "${reqId}" continue fi - /cluster/bin/x86_64/hgsql -N -e \ - "UPDATE ottoRequest SET status = 5 WHERE id=${reqId};" hgcentraltest -done < <(/cluster/bin/x86_64/hgsql -N -B -e \ + /cluster/bin/x86_64/${hgSql} -N -e \ + "UPDATE ottoRequest SET status = 5 WHERE id=${reqId};" ${centDb} +done < <(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT id, buildDir FROM ottoRequest \ - WHERE status = 4 AND buildDir != '' AND requestType = 'liftOver';" hgcentraltest) + WHERE status = 4 AND buildDir != '' AND requestType = 'liftOver';" ${centDb}) ############################################################################ # phase 4: check for push files is complete, send final notification # clean up galaxy workflow ############################################################################ while IFS=$'\t' read -r reqId fromDb toDb comment requestTime buildDir; do # time to clean up the galaxy history and workflow to release the space if [ -s "${buildDir}/successInvocationId.txt" ]; then invocationId=$(cut -f2 "${buildDir}/successInvocationId.txt") if ! "${scriptDir}/galaxyCleanup.py" "${profileJson}" "${invocationId}"; then printf "# WARNING: galaxy cleanup failed for request %s\n" "${reqId}" 1>&2 fi fi @@ -560,22 +562,22 @@ sendNotification "${reqId}" \ "from UCSC: liftOverRequest complete: ${fromDb}<->${toDb}" \ "Your lift over request is complete: From: ${fromDb} To: ${toDb} comment: ${comment} submitted: ${requestTime} ${coverageInfo} The lift.over files are available at these links: ${fromUrl} ${toUrl} " - /cluster/bin/x86_64/hgsql -N -e \ - "UPDATE ottoRequest SET status=8, completeTime=now() WHERE id=${reqId};" hgcentraltest + /cluster/bin/x86_64/${hgSql} -N -e \ + "UPDATE ottoRequest SET status=8, completeTime=now() WHERE id=${reqId};" ${centDb} -done < <(/cluster/bin/x86_64/hgsql -N -B -e \ +done < <(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT id, fromDb, toDb, comment, requestTime, buildDir FROM ottoRequest \ - WHERE status = 6 AND requestType = 'liftOver';" hgcentraltest) + WHERE status = 6 AND requestType = 'liftOver';" ${centDb})