f5cece60b38bd8c2cfd57acf2abb972d66edb6d9 hiram Fri Apr 24 11:43:36 2026 -0700 rename doneStatus to just status and correctly set status codes refs #31811 diff --git src/hg/utils/otto/userRequests/workflowMonitor.sh src/hg/utils/otto/userRequests/workflowMonitor.sh index dd53ed9338e..3d51b27cf55 100755 --- src/hg/utils/otto/userRequests/workflowMonitor.sh +++ src/hg/utils/otto/userRequests/workflowMonitor.sh @@ -1,43 +1,56 @@ #!/bin/bash # workflowMonitor.sh - check a pending Galaxy kegAlign workflow invocation # and install results when complete # -# usage: workflowMonitor.sh <buildDir> +# usage: workflowMonitor.sh <reqId> <buildDir> # # Called periodically (e.g. from cron) to poll the Galaxy API for # invocation status. Exits 0 silently when still running so cron # can simply re-invoke on schedule. When complete, downloads results # via planemo and installs chain/liftOver bigBed files into the build # and swap directories. # # Expects in <buildDir>: # pendingInvocationId.txt - written by kegAlign.sh # (3 tab-separated fields: DS invocationId logJsonPath) # kegAlign.sh - generated by kegAlignLastz.sh, # variable definitions sourced for buildDir, # swapDir, PM, targetDb, queryDb, etc. +# status indicators: +# 0 pending, 1 notified, 2 in progress, 3 galaxy done, 4 tracks complete, +# 5 finish notification, 6 complete, 7 problems */ set -beEu -o pipefail -if [ $# != 1 ]; then - printf "usage: workflowMonitor.sh <buildDir>\n" 1>&2 +if [ $# != 2 ]; then + printf "usage: workflowMonitor.sh <reqId> <buildDir>\n" 1>&2 exit 255 fi -export buildDir="$1" +export reqId="$1" +export buildDir="$2" + +############################################################################## +### errors - set error status in the table +function setErrorStatus() { + id="${1}" + hgsql -N -e \ + "UPDATE ottoRequest SET status=7 WHERE id=${id};" hgcentraltest +} +############################################################################## if [ ! -d "${buildDir}" ]; then printf "ERROR: buildDir not found: %s\n" "${buildDir}" 1>&2 exit 255 fi cd "${buildDir}" # already finished? if [ -s successInvocationId.txt ]; then printf "# already completed: %s\n" "${buildDir}" 1>&2 exit 0 fi # no pending invocation? @@ -80,30 +93,31 @@ # strip trailing slash galaxyUrl="${galaxyUrl%/}" ############################################################################ # query invocation state from the Galaxy API ############################################################################ stateJson=$(curl -s -H "x-api-key: ${galaxyApiKey}" \ "${galaxyUrl}/api/invocations/${invocationId}") state=$(printf "%s" "${stateJson}" | jq -r '.state // "unknown"') printf "# invocation state: %s\n" "${state}" 1>&2 case "${state}" in "cancelled"|"failed") printf "ERROR: workflow %s -- invocation %s\n" "${state}" "${invocationId}" 1>&2 + setErrorStatus ${reqId} exit 1 ;; "new"|"ready") printf "# workflow still starting up, will check again later\n" 1>&2 exit 0 ;; "scheduled") # all steps dispatched -- fall through to check individual jobs ;; "completed") # all steps dispatched -- fall through to check individual jobs ;; *) printf "# unexpected state '%s', will check again later\n" "${state}" 1>&2 exit 0 @@ -128,30 +142,31 @@ '[.states | to_entries[] | select(.key != "ok" and .key != "error" and .key != "deleted" and .key != "skipped" and .key != "paused") | "\(.value) \(.key)"] | join(", ")') printf "# %d jobs still active (%s), will check again later\n" \ "${nonTerminalCount}" "${activeStates}" 1>&2 exit 0 fi # check for errored jobs -- a "completed" invocation can still contain # individual jobs that failed errorCount=$(printf "%s" "${summaryJson}" | jq '.states.error // 0') if [ "${errorCount}" -gt 0 ]; then printf "ERROR: %d job(s) had errors in invocation %s\n" \ "${errorCount}" "${invocationId}" 1>&2 + setErrorStatus ${reqId} # the invocation detail (stateJson) embeds steps with job_ids but not # job states; query each job individually to find which step(s) failed printf "%s" "${stateJson}" | jq -r ' .steps[] | select(.job_id != null) | "\(.order_index)\t\(.workflow_step_label // "unlabeled")\t\(.job_id)" ' | while IFS=$'\t' read -r stepIdx stepLabel jobId; do jobState=$(curl -s -H "x-api-key: ${galaxyApiKey}" \ "${galaxyUrl}/api/jobs/${jobId}" | jq -r '.state // "unknown"') if [ "${jobState}" = "error" ]; then printf " FAILED step %s: %s (job %s)\n" \ "${stepIdx}" "${stepLabel}" "${jobId}" 1>&2 fi done @@ -161,30 +176,32 @@ "\(.order_index)\t\(.workflow_step_label // "unlabeled")\t\(.subworkflow_invocation_id)" ' | while IFS=$'\t' read -r stepIdx stepLabel subInvId; do subErrors=$(curl -s -H "x-api-key: ${galaxyApiKey}" \ "${galaxyUrl}/api/invocations/${subInvId}/jobs_summary" \ | jq '.states.error // 0') if [ "${subErrors}" -gt 0 ]; then printf " FAILED step %s: %s (sub-workflow %s, %d error(s))\n" \ "${stepIdx}" "${stepLabel}" "${subInvId}" "${subErrors}" 1>&2 fi done exit 1 fi printf "# all jobs complete, downloading results\n" 1>&2 +hgsql -N -e \ + "UPDATE ottoRequest SET status = 3 WHERE id = ${reqId};" hgcentraltest ############################################################################ # download results via planemo ############################################################################ mkdir -p "result/${DS}" ${PM} invocation_download "${invocationId}" --profile vgp \ --output_directory "result/${DS}" ############################################################################ # chainBigBedFb - convert chain to bigBed and compute featureBits # args: db chainName chainGz sizesFile fbFile ############################################################################ function chainBigBedFb() { local db=$1 local chainName=$2 @@ -278,15 +295,18 @@ chainBigBedFb ${queryDb} chainLiftOver${Target} \ "${queryDb}.${targetDb}.over.chain.gz" ${qSizes} \ "${swapDir}/fb.${queryDb}.chainLiftOver${Target}Link.txt" fi cat "${swapDir}/fb.${queryDb}.chainLiftOver${Target}Link.txt" 1>&2 ############################################################################ # mark complete ############################################################################ printf "%s\tinvocation ID: %s\t%s\n" "${DS}" "${invocationId}" "${logJson}" \ > successInvocationId.txt rm -f pendingInvocationId.txt printf "### workflow monitor complete: %s %s -> %s\n" \ "${buildDir}" "${targetDb}" "${queryDb}" 1>&2 + +hgsql -N -e \ + "UPDATE ottoRequest SET status = 4 WHERE id = ${reqId};" hgcentraltest