d57e386b7f6f2da8649984c0add4fd31810e78af
lrnassar
  Tue Jun 2 14:08:33 2026 -0700
Add weekly otto notifier for new NCBI RefSeq historical releases. refs #35766

Silent check script (cron emails otto-group only when NCBI publishes a GRCh38
RefSeq historical release newer than lastHandledRelease.txt). Includes retry on
the NCBI fetch to avoid false positives from sporadic FTP failures. Added a
weekly Wednesday crontab entry next to the mitoMap check.

diff --git src/hg/utils/otto/refSeqHistorical/checkRefSeqHistoricalUpdate.sh src/hg/utils/otto/refSeqHistorical/checkRefSeqHistoricalUpdate.sh
new file mode 100755
index 00000000000..355140674f7
--- /dev/null
+++ src/hg/utils/otto/refSeqHistorical/checkRefSeqHistoricalUpdate.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# checkRefSeqHistoricalUpdate.sh -- weekly notifier (RM #35766).
+# Emails the otto MAILTO ONLY if NCBI has published a GRCh38 RefSeq historical
+# release newer than the one we've already handled. Silent (no output, so cron
+# sends no mail) when there is nothing new.
+#
+# NCBI names each release dir GCF_000001405.40-RS_YYYY_MM_historical/, and the
+# RS_YYYY_MM tag sorts lexically = chronologically.
+#
+# To silence after updating the track: set the new tag in lastHandledRelease.txt.
+# Build steps live in the RS_YYYY_MM sections of:
+#   kent/src/hg/makeDb/doc/hg38/ncbiRefSeq.txt
+set -o pipefail
+cd /hive/data/outside/otto/refSeqHistorical || exit 1
+
+baseUrl="https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/historical/GRCh38/"
+handled=$(tr -d '[:space:]' < lastHandledRelease.txt 2>/dev/null)
+
+# NCBI's FTP/HTTPS server fails sporadically; retry a few times before giving
+# up, so a transient blip does not turn into a false "couldn't reach" mail.
+listing=""
+for attempt in 1 2 3 4 5; do
+    listing=$(wget -q -O - "$baseUrl") && [ -n "$listing" ] && break
+    listing=""
+    sleep 30
+done
+
+if [ -z "$listing" ]; then
+    echo "refSeqHistorical check: could not reach $baseUrl after 5 attempts"
+    exit 1
+fi
+
+latest=$(echo "$listing" | grep -oE 'RS_[0-9]{4}_[0-9]{2}' | sort -u | tail -1)
+if [ -z "$latest" ]; then
+    echo "refSeqHistorical check: no RS_* release tags found at $baseUrl (page format changed?)"
+    exit 1
+fi
+
+if [[ "$latest" > "$handled" ]]; then
+    echo "New RefSeq historical release available for hg38: $latest (we have: ${handled:-none})"
+    echo "Source: $baseUrl"
+    echo "Build steps: RS_YYYY_MM section in kent/src/hg/makeDb/doc/hg38/ncbiRefSeq.txt"
+    echo "To silence: set $latest in /hive/data/outside/otto/refSeqHistorical/lastHandledRelease.txt. refs #35766"
+fi
+exit 0