9ffda9e5b601497b1ddcda75d5bbf377c1156713 hiram Mon Jun 8 14:32:29 2026 -0700 watch for a build to have completed refs #31811 diff --git src/hg/utils/otto/userRequests/asmRequestWatch.sh src/hg/utils/otto/userRequests/asmRequestWatch.sh index e1e7161256d..f80129f6a69 100755 --- src/hg/utils/otto/userRequests/asmRequestWatch.sh +++ src/hg/utils/otto/userRequests/asmRequestWatch.sh @@ -1,177 +1,205 @@ #!/bin/bash # asmRequestWatch.sh - what for assembly requests in the ottoRequest table # # Initial function is just to watch the status, and when it # reaches status 6 'push complete' (which currently is set manually) # then send email and mark finished # # The 'otto' user cron job 'ottoRequest.py' will be watching for status 0 # for a new entry. It will set status 1 and send notification email # # Different meanings from the liftOver status settings: # 0 pending, 1 notified, 2 in progress, 6 push is done and is available on the RR, # status 7 for problems, and 8 is final notification has been sent == process is complete ### cron job entry: (1 minute later than ottoRequestWatch.sh) # 10,21,32,43,54 * * * * ~/kent/src/hg/utils/otto/userRequests/asmRequestWatch.sh set -eEu -o pipefail umask 002 export scriptDir=$(cd "$(dirname "$0")" && pwd) export centDb="hgcentral" export hgSql="hgsql -hgenome-centdb" ############################################################################## ### singleton lock - only one instance at a time ### Open lockPath on FD 9 for the lifetime of the shell, then take a ### non-blocking exclusive lock. Kernel releases the lock on exit ### (normal, error, or kill -9), so no stale lock cleanup is needed. ### Exit 0 silently if another instance holds the lock so cron doesn't ### email on every overlapping tick. PID is written to the file for ### information only see the holder via: ### cat asmRequestWatch.lock (the PID) ### lsof asmRequestWatch.lock (the locking process) ############################################################################## export lockPath="${scriptDir}/asmRequestWatch.lock" # 9<> opens read+write without truncating, so a second instance that # comes along while we're running won't wipe our PID from the file # before its flock attempt fails. exec 9<>"${lockPath}" flock -n 9 || exit 0 # we own the lock now safe to truncate and write our PID. ': >file' # truncates via a separate FD; FD 9 keeps its position 0 from <>, so # the printf below starts writing at the beginning of the empty file. : >"${lockPath}" printf "%d\n" "$$" >&9 ############################################################################## ############################################################################## ### errors - set error status in the table function setErrorStatus() { id="${1}" /cluster/bin/x86_64/${hgSql} -N -e \ "UPDATE ottoRequest SET status=7 WHERE id=${id};" "${centDb}" } ############################################################################## ############################################################################## ### sendNotification - email the requesting user that their assembly request is done ### args: reqId subject ### message body is read from stdin ### recipient: email column of ottoRequest table for that reqId ### bcc: genark-request-group@ucsc.edu ### envelope sender / Return-Path / bounce: genome-www@soe.ucsc.edu ### returns 0 on success, non-zero on failure ############################################################################## function sendNotification() { local reqId="${1}" local subject="${2}" local msgBody="${3}" local toAddr toAddr="$(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT email FROM ottoRequest WHERE id = ${reqId};" ${centDb})" if [ -z "${toAddr}" ]; then printf "ERROR: sendNotification: no email for request %s\n" "${reqId}" 1>&2 return 1 fi local bcc="genark-request-group@ucsc.edu" local from="genome-www@soe.ucsc.edu" local bounce="gb" bounce+="aut" bounce+="o" bounce+="@" bounce+="uc" bounce+="sc." bounce+="ed" bounce+="u" # -f sets the envelope sender (becomes Return-Path at delivery and the # bounce address); -t reads recipients from To:/Cc:/Bcc: headers; # -oi prevents a lone "." in body from ending the message { printf "From: %s\n" "${from}" printf "To: %s\n" "${toAddr}" printf "Bcc: %s\n" "${bcc}" printf "Reply-To: %s\n" "${from}" printf "Subject: %s\n" "${subject}" printf "\n" printf "%s\n" "${msgBody}" } | /usr/sbin/sendmail -f "${bounce}" -t -oi } # function sendNotification() ############################################################################## ### accessionToPath - given an accession - expand to path name ############################################################################## function accessionToPath() { local acc="${1}" local gcX="${acc:0:3}" local d0="${acc:4:3}" local d1="${acc:7:3}" local d2="${acc:10:3}" local pathName="${gcX}/${d0}/${d1}/${d2}" printf "%s" "${pathName}" } # function accessionToPath() ############################################################################ # phase 1: watch for new requests, detect when assembly build has started ############################################################################ while IFS=$'\t' read -r reqId fromDb; do accPath=$(accessionToPath "${fromDb}") # if the trackData/ directory is present, the build is running shopt -s nullglob # make globs expand to nothing if no matches trackDataDirs=(/hive/data/genomes/asmHubs/allBuild/${accPath}/${fromDb}_*/trackData) shopt -u nullglob # restore default behavior # Check the results case ${#trackDataDirs[@]} in 0) # no directory seen yet, nothing happening ;; 1) # single directory seen - build has started - set the buildDir buildDir=$(dirname "$(realpath "${trackDataDirs[0]}")") /cluster/bin/x86_64/${hgSql} -N -e \ "UPDATE ottoRequest SET status=2, buildDir='${buildDir}' \ WHERE id=${reqId};" ${centDb} ;; *) scriptName=$(basename "$0") printf "ERROR: %s: Multiple trackData directories found for %s:\n" "${scriptName}" "${accPath}" 1>&2 printf " %s\n" "${trackDataDirs[@]}" 1>&2 setErrorStatus "${reqId}" ;; esac done < <(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT id, fromDb FROM ottoRequest WHERE status = 1 AND buildDir = '' AND requestType = 'assembly';" \ ${centDb}) +############################################################################ +# phase 2: watch for a build to have completed +############################################################################ +while IFS=$'\t' read -r reqId fromDb; do + accPath=$(accessionToPath "${fromDb}") + # if the trackDb.txt file is present, the build is finished + shopt -s nullglob # make globs expand to nothing if no matches + trackDbFile=(/hive/data/genomes/asmHubs/allBuild/${accPath}/${fromDb}_*/${fromDb}_*.trackDb.txt) + shopt -u nullglob # restore default behavior + # Check the results + case ${#trackDbFile[@]} in + 0) # no trackDb.txt file seen yet, not done + ;; + 1) # single file seen - build is complete + /cluster/bin/x86_64/${hgSql} -N -e \ + "UPDATE ottoRequest SET status=3 WHERE id=${reqId};" ${centDb} + ;; + *) + scriptName=$(basename "$0") + printf "ERROR: %s: Multiple trackDb.txt files found for %s:\n" "${scriptName}" "${accPath}" 1>&2 + printf " %s\n" "${trackDbFile[@]}" 1>&2 + setErrorStatus "${reqId}" + ;; + esac + +done < <(/cluster/bin/x86_64/${hgSql} -N -B -e \ + "SELECT id, fromDb FROM ottoRequest WHERE status = 2 AND requestType = 'assembly';" \ + ${centDb}) ############################################################################ # check for phase 6: the assembly is complete and available on the RR # this checking and setting status 6 is currently done manually, # eventually this will become automatic. ############################################################################ while IFS=$'\t' read -r reqId fromDb comment requestTime; do export gcX="${fromDb:0:3}" export d0="${fromDb:4:3}" export d1="${fromDb:7:3}" export d2="${fromDb:10:3}" export gbDbPath="/gbdb/genark/${gcX}/${d0}/${d1}/${d2}/${fromDb}/hub.txt" export hubTxt="https://genome.ucsc.edu/cgi-bin/hgTracks?genome=${fromDb}&hubUrl=${gbDbPath}" sendNotification "${reqId}" \ "from UCSC: assembly request complete: ${fromDb}" \ "from UCSC: Your assembly request is complete: assembly: ${fromDb} comment: ${comment} submitted: ${requestTime} The assembly is available in the browser at the following URL: ${hubTxt} " /cluster/bin/x86_64/${hgSql} -N -e \ "UPDATE ottoRequest SET status=8, completeTime=now() WHERE id=${reqId};" ${centDb} done < <(/cluster/bin/x86_64/${hgSql} -N -B -e \ "SELECT id, fromDb, comment, requestTime FROM ottoRequest \ WHERE status = 6 AND requestType = 'assembly';" ${centDb})