src/hg/utils/otto/userRequests/ottoRequestAlign.sh f5cece60b38bd8c2cfd57acf2abb972d66edb6d9

f5cece60b38bd8c2cfd57acf2abb972d66edb6d9
hiram
  Fri Apr 24 11:43:36 2026 -0700
rename doneStatus to just status and correctly set status codes refs #31811

diff --git src/hg/utils/otto/userRequests/ottoRequestAlign.sh src/hg/utils/otto/userRequests/ottoRequestAlign.sh
index 7b9559b659b..18d42c128ea 100755
--- src/hg/utils/otto/userRequests/ottoRequestAlign.sh
+++ src/hg/utils/otto/userRequests/ottoRequestAlign.sh
@@ -1,79 +1,70 @@
 #!/bin/bash
 
 # ottoRequestAlign.sh - look up an ottoRequest row by id and construct
 #   the kegAlignLastz.sh command line from genark metadata
 #
 # usage: ottoRequestAlign.sh <id>
 #
 # Queries hgcentraltest.ottoRequest for fromDb/toDb, then looks up
 # each accession in hgcentraltest.genark for asmName and clade.
 # Prints and executes the resulting kegAlignLastz.sh command.
 
 set -beEu -o pipefail
 
+############################################################################
+### verify arguments
+############################################################################
 if [ $# != 1 ]; then
   printf "usage: ottoRequestAlign.sh <id>\n" 1>&2
   printf "  where <id> is a row id from hgcentraltest.ottoRequest\n" 1>&2
   exit 255
 fi
 
 export requestId="$1"
 
 # validate id is a positive integer
 case "${requestId}" in
   ''|*[!0-9]*)
     printf "ERROR: id must be a positive integer, got: '%s'\n" "${requestId}" 1>&2
     exit 255
     ;;
 esac
 
 ############################################################################
-# step 1: look up fromDb and toDb from ottoRequest
+### function definitions
 ############################################################################
-export ottoResult=$(hgsql -N -e \
-  "select fromDb,toDb from ottoRequest where id=${requestId};" hgcentraltest)
-
-if [ -z "${ottoResult}" ]; then
-  printf "ERROR: no ottoRequest row found for id=%s\n" "${requestId}" 1>&2
-  exit 255
-fi
-
-export fromDb=$(printf "%s" "${ottoResult}" | cut -f1)
-export toDb=$(printf "%s" "${ottoResult}" | cut -f2)
-
-if [ -z "${fromDb}" -o -z "${toDb}" ]; then
-  printf "ERROR: empty fromDb or toDb for ottoRequest id=%s\n" "${requestId}" 1>&2
-  printf "  got: fromDb='%s' toDb='%s'\n" "${fromDb}" "${toDb}" 1>&2
-  exit 255
-fi
-
-printf "# ottoRequest id=%s: fromDb='%s' toDb='%s'\n" \
-  "${requestId}" "${fromDb}" "${toDb}" 1>&2
+### errors - set error status in the table
+function setErrorStatus() {
+  id="${1}"
+  hgsql -N -e \
+      "UPDATE ottoRequest SET status=7 WHERE id=${id};" hgcentraltest
+}
 
 ############################################################################
 # genarkLookup - query genark table for accession, asmName, clade
 #   arg: gcAccession (e.g. GCF_000002285.3)
 #   sets: _acc, _asmName, _clade
 ############################################################################
 function genarkLookup() {
   local acc=$1
   local result=$(hgsql -N -e \
-    "select gcAccession,asmName,clade from genark where gcAccession='${acc}';" \
+    "SELECT gcAccession,asmName,clade from genark WHERE gcAccession='${acc}';" \
     hgcentraltest)
   if [ -z "${result}" ]; then
     printf "ERROR: accession '%s' not found in hgcentraltest.genark\n" "${acc}" 1>&2
+    setErrorStatus ${requestId}
     return 1
   fi
   _acc=$(printf "%s" "${result}" | cut -f1)
   _asmName=$(printf "%s" "${result}" | cut -f2)
   _clade=$(printf "%s" "${result}" | cut -f3)
 }
 
 ############################################################################
 # dbDbCladeLookup - look up clade for a UCSC database name
 #   from dbDb.name.clade.tsv (in same directory as this script)
 #   arg: dbName (e.g. hg38, rn7)
 #   sets: _clade
 ############################################################################
 function dbDbCladeLookup() {
   local dbName=$1
@@ -91,30 +82,93 @@
 }
 
 ############################################################################
 # cladeMap - convert genark/dbDb plural clade to kegAlignLastz singular form
 #   primates -> primate, mammals -> mammal, everything else -> other
 ############################################################################
 function cladeMap() {
   local genarkClade=$1
   case "${genarkClade}" in
     primates) printf "primate" ;;
     mammals)  printf "mammal"  ;;
     *)        printf "other"   ;;
   esac
 }
 
+############################################################################
+# twoBitPath - return path to 2bit file
+############################################################################
+function twoBitPath() {
+  local asmName=$1
+  case ${asmName} in
+    GC[AF]_*)
+      local gcX=$(printf "%s" "${asmName}" | cut -c1-3)
+      local d0=$(printf "%s" "${asmName}" | cut -c5-7)
+      local d1=$(printf "%s" "${asmName}" | cut -c8-10)
+      local d2=$(printf "%s" "${asmName}" | cut -c11-13)
+      printf "/hive/data/genomes/asmHubs/%s/%s/%s/%s/%s/%s.2bit" \
+        "${gcX}" "${d0}" "${d1}" "${d2}" "${asmName}" "${asmName}"
+      ;;
+    *)
+      printf "/hive/data/genomes/%s/%s.2bit" "${asmName}" "${asmName}"
+      ;;
+  esac
+}
+############################################################################
+
+############################################################################
+# asmN50 - compute N50 from the twoBit file
+############################################################################
+function asmN50() {
+  local twoBit=$1
+  twoBitInfo "${twoBit}" stdout \
+    | n50.pl stdin 2>&1 \
+    | grep -A1 "^[0-9].*one half size" \
+    | tail -1 \
+    | awk '{print $NF}'
+}
+############################################################################
+
+############################################################################
+### main() scripting begins here
+############################################################################
+
+############################################################################
+# step 1: look up fromDb and toDb from ottoRequest
+############################################################################
+export ottoResult=$(hgsql -N -e \
+  "SELECT fromDb,toDb from ottoRequest WHERE id=${requestId} AND status = 1;" hgcentraltest)
+
+if [ -z "${ottoResult}" ]; then
+  printf "ERROR: no ottoRequest row found for id=%s AND status = 1\n" "${requestId}" 1>&2
+  hgsql -e "SELECT fromDb,toDb,status from ottoRequest WHERE id=${requestId};" hgcentraltest 1>&2
+  exit 255
+fi
+
+export fromDb=$(printf "%s" "${ottoResult}" | cut -f1)
+export toDb=$(printf "%s" "${ottoResult}" | cut -f2)
+
+if [ -z "${fromDb}" -o -z "${toDb}" ]; then
+  printf "ERROR: empty fromDb or toDb for ottoRequest id=%s\n" "${requestId}" 1>&2
+  printf "  got: fromDb='%s' toDb='%s'\n" "${fromDb}" "${toDb}" 1>&2
+  setErrorStatus ${requestId}
+  exit 255
+fi
+
+printf "# ottoRequest id=%s: fromDb='%s' toDb='%s'\n" \
+  "${requestId}" "${fromDb}" "${toDb}" 1>&2
+
 ############################################################################
 # step 2: look up both identifiers -- GenArk accession or UCSC db name
 ############################################################################
 case "${fromDb}" in
   GC[AF]_*)
     genarkLookup "${fromDb}" || exit 255
     export fromId="${_acc}_${_asmName}"
     export fromClade="${_clade}"
     ;;
   *)
     dbDbCladeLookup "${fromDb}" || exit 255
     export fromId="${fromDb}"
     export fromClade="${_clade}"
     ;;
 esac
@@ -124,98 +178,65 @@
     genarkLookup "${toDb}" || exit 255
     export toId="${_acc}_${_asmName}"
     export toClade="${_clade}"
     ;;
   *)
     dbDbCladeLookup "${toDb}" || exit 255
     export toId="${toDb}"
     export toClade="${_clade}"
     ;;
 esac
 
 printf "# from: %s  clade=%s\n" "${fromId}" "${fromClade}" 1>&2
 printf "#   to: %s  clade=%s\n" "${toId}" "${toClade}" 1>&2
 
 ############################################################################
-# step 2b: compare N50 -- better N50 assembly becomes the alignment target
+# step 3: determine N50 for each to decide target vs. query
 ############################################################################
-function twoBitPath() {
-  local asmName=$1
-  case ${asmName} in
-    GC[AF]_*)
-      local gcX=$(printf "%s" "${asmName}" | cut -c1-3)
-      local d0=$(printf "%s" "${asmName}" | cut -c5-7)
-      local d1=$(printf "%s" "${asmName}" | cut -c8-10)
-      local d2=$(printf "%s" "${asmName}" | cut -c11-13)
-      printf "/hive/data/genomes/asmHubs/%s/%s/%s/%s/%s/%s.2bit" \
-        "${gcX}" "${d0}" "${d1}" "${d2}" "${asmName}" "${asmName}"
-      ;;
-    *)
-      printf "/hive/data/genomes/%s/%s.2bit" "${asmName}" "${asmName}"
-      ;;
-  esac
-}
-
-function asmN50() {
-  local twoBit=$1
-  twoBitInfo "${twoBit}" stdout \
-    | n50.pl stdin 2>&1 \
-    | grep -A1 "^[0-9].*one half size" \
-    | tail -1 \
-    | awk '{print $NF}'
-}
 
 export from2bit=$(twoBitPath "${fromDb}")
 export to2bit=$(twoBitPath "${toDb}")
 
 if [ ! -s "${from2bit}" ]; then
   printf "ERROR: 2bit file not found: %s\n" "${from2bit}" 1>&2
+  setErrorStatus ${requestId}
   exit 255
 fi
 
 if [ ! -s "${to2bit}" ]; then
   printf "ERROR: 2bit file not found: %s\n" "${to2bit}" 1>&2
+  setErrorStatus ${requestId}
   exit 255
 fi
 
 export fromN50=$(asmN50 "${from2bit}")
 export toN50=$(asmN50 "${to2bit}")
 
 printf "# from N50: %s (%s)\n" "${fromN50}" "${fromDb}" 1>&2
 printf "#   to N50: %s (%s)\n" "${toN50}" "${toDb}" 1>&2
 
 if [ -n "${fromN50}" -a -n "${toN50}" ]; then
   if [ "${toN50}" -gt "${fromN50}" ]; then
     printf "# swapping: %s (N50=%s) becomes target over %s (N50=%s)\n" \
       "${toId}" "${toN50}" "${fromId}" "${fromN50}" 1>&2
     tmpId="${fromId}"; export fromId="${toId}"; export toId="${tmpId}"
     tmpClade="${fromClade}"; export fromClade="${toClade}"; export toClade="${tmpClade}"
   fi
 else
   printf "WARNING: could not determine N50, keeping original target/query order\n" 1>&2
 fi
 
-############################################################################
-# step 3: map clades and build the command
-############################################################################
-export fromCladeArg=$(cladeMap "${fromClade}")
-export toCladeArg=$(cladeMap "${toClade}")
-
-export cmd="kegAlignLastz.sh ${fromId} ${toId} ${fromCladeArg} ${toCladeArg}"
-
-printf "# %s\n" "${cmd}" 1>&2
-
 ############################################################################
 # step 4: compute buildDir and update ottoRequest table
 ############################################################################
 
 # derive accession ID and Query label the same way kegAlignLastz.sh does
 export tAccId=$(printf "%s" "${fromId}" | cut -d'_' -f1-2)
 export qAccId=$(printf "%s" "${toId}" | cut -d'_' -f1-2)
 export Query="${qAccId^}"
 export DS=$(date "+%F")
 
 # compute buildDir -- UCSC db default, then GenArk override
 export buildDir="/hive/data/genomes/${fromId}/bed/lastz${Query}.${DS}"
 export targetExists="/hive/data/genomes/${fromId}/bed"
 
 case ${fromId} in
@@ -229,17 +250,28 @@
     targetExists="/hive/data/genomes/asmHubs/allBuild/${tGcPath}/${fromId}/trackData"
     ;;
 esac
 
 # reuse existing build directory if one is already in progress
 working=$(ls -d ${targetExists}/lastz${Query}.* 2> /dev/null | wc -l)
 if [ "${working}" -gt 0 ]; then
   buildDir=$(ls -d ${targetExists}/lastz${Query}.* | tail -1)
   printf "# existing buildDir: %s\n" "${buildDir}" 1>&2
 fi
 
 printf "# buildDir: %s\n" "${buildDir}" 1>&2
 
 # store buildDir in ottoRequest table for workflowMonitor.sh
 hgsql -N -e \
-  "UPDATE ottoRequest SET buildDir='${buildDir}' WHERE id=${requestId};" \
+  "UPDATE ottoRequest SET buildDir='${buildDir}', status=2 WHERE id=${requestId};" \
   hgcentraltest
+
+############################################################################
+# step 5: map clades and build the kegAlignLastz.sh command
+############################################################################
+export fromCladeArg=$(cladeMap "${fromClade}")
+export toCladeArg=$(cladeMap "${toClade}")
+
+export cmd="kegAlignLastz.sh ${fromId} ${toId} ${fromCladeArg} ${toCladeArg}"
+
+printf "####### kegAlignLastz.sh script would be:\n" 1>&2
+printf "# %s\n" "${cmd}" 1>&2