f2ef97844c0973a30cbbc1afcdd01b4096143a69
lrnassar
  Wed Jun 3 10:56:07 2026 -0700
Add date-based dataVersion to composite/superTrack otto containers. refs #36455

Composite and superTrack container pages do not display 'Data last updated at
UCSC' (printUpdateTime returns early for them), so a dataVersion file is the
only freshness signal a user sees there. Add one to the dbVar (dbVarSv),
panelApp, clinGen (clinGenComp) and decipher (decipherContainer) containers.

Each otto build script writes a per-assembly 'Last updated <date>' file when it
actually updates the data, and the container stanza points to it via dataVersion.
clinGen's container date is written by its three displayed feeds (makeDosage,
makeGeneValidity, makeClinGenCspec); decipher writes hg38 only (hg19 is frozen).

diff --git src/hg/utils/otto/dbVar/checkDbVar.sh src/hg/utils/otto/dbVar/checkDbVar.sh
index 2e5d250d999..21e899a001e 100755
--- src/hg/utils/otto/dbVar/checkDbVar.sh
+++ src/hg/utils/otto/dbVar/checkDbVar.sh
@@ -1,165 +1,168 @@
 #!/bin/bash
 
 #	Do not modify this script, modify the source tree copy:
 #	src/hg/utils/otto/dbVar/checkDbVar.sh
 #	This script is used via a cron job and kept in $HOME/bin/scripts/
 #	The source tree copy is installed to $WORKDIR via the makefile
 #	in the same directory (make install).
 
 set -eEu -o pipefail
 WORKDIR=$1
 
 cleanUpOnError () {
     echo "dbVar build failed"
 }
 
 trap cleanUpOnError ERR
 trap "cleanUpOnError; exit 1" SIGINT SIGTERM
 umask 002
 
 #	cron jobs need to ensure this is true
 
 #	this is where we are going to work
 if [ ! -d "${WORKDIR}" ]; then
     printf "ERROR in dbVar build, can not find the directory: %s\n" "${WORKDIR}"
     exit 255
 fi
 
 # the release directory, where gbdb symlinks will point
 if [ ! -d release ]; then
     mkdir -p ${WORKDIR}/release/{hg19,hg38}
 fi
 
 cd "${WORKDIR}"
 
 # check if genome in a bottle variants have updated:
 ./checkNstd175.sh ${WORKDIR}
 
 # Maximum fractional change in itemCount before a .bb file is flagged as
 # suspicious and we refuse to promote it into release/. Matches the tooMuch=0.10
 # convention used by validateDecipher.sh / validateGwas.sh / validateISCA.sh.
 tooMuch=0.10
 
 # Minimum plausible .bb file size (bytes). Anything smaller likely means a
 # truncated / failed download. Smallest real file in current release is
 # ~90 KB (common_east_asian_only.bb), so 10 KB is a safe floor.
 minBytes=10240
 
 #	see if anything is changing, if so, download, build, and email notify
 wget -q https://ftp.ncbi.nlm.nih.gov/pub/dbVar/sandbox/dbvarhub/hub.txt -O tempUpdate
 if [[ ! -e lastUpdate || tempUpdate -nt lastUpdate ]]; then
     today=`date +%F`
     mkdir -p $today
     cd $today
     hubClone -download https://ftp.ncbi.nlm.nih.gov/pub/dbVar/sandbox/dbvarhub/hub.txt
 
     # Stage the .bb files we expose via trackDb into release/${db}.new/ rather
     # than overwriting release/${db}/ in place. Validation (below) then decides
     # whether to promote .new/ to live via an atomic directory swap. This keeps
     # in-flight readers on the live files consistent and gives us a one-cycle
     # rollback copy in release/${db}.prev/.
     #
     # We mirror common_*.bb, conflict_*.bb, somatic_*.bb, and normal_*.bb.
     # clinvar_*.bb are intentionally skipped -- those are redundant with our
     # clinvarCnv track. Any new filename NCBI adds will trigger a notification
     # below (see knownFiles.txt diff) so we can decide whether to expose it.
     for db in hg19 hg38; do
         rm -rf ../release/${db}.new
         mkdir -p ../release/${db}.new
         cp dbVar/${db}/common_*.bb    ../release/${db}.new/
         cp dbVar/${db}/conflict_*.bb  ../release/${db}.new/
         cp dbVar/${db}/somatic_*.bb   ../release/${db}.new/
         cp dbVar/${db}/normal_*.bb    ../release/${db}.new/
     done
     cd ..
 
     # Validate each staged .bb file against the current live copy. Two gates:
     #   1) byte-size floor: catches truncated downloads.
     #   2) item-count delta: catches the case where NCBI ships a well-formed
     #      but mostly-empty file, or a 10x inflation from a build bug. The
     #      0.10 threshold matches the other UCSC otto validators.
     # On any failure we leave release/${db}.new/ in place for human inspection
     # and exit 1 so the wrapper emails the diagnostic.
     validationErrors=""
     for db in hg19 hg38; do
         for newFile in release/${db}.new/*.bb; do
             base=$(basename "$newFile")
             liveFile="release/${db}/${base}"
 
             newSize=$(stat -c%s "$newFile")
             if [ "$newSize" -lt "$minBytes" ]; then
                 validationErrors+="  ${db}/${base}: new file only ${newSize} bytes (< ${minBytes}) -- likely truncated download\n"
                 continue
             fi
 
             # First-time file (no prior live copy): accept without delta check.
             if [ ! -e "$liveFile" ]; then
                 continue
             fi
 
             newCount=$(bigBedInfo "$newFile"  | awk '/^itemCount:/ {gsub(",","",$2); print $2}')
             oldCount=$(bigBedInfo "$liveFile" | awk '/^itemCount:/ {gsub(",","",$2); print $2}')
             if [ "$oldCount" -eq 0 ]; then continue; fi
 
             # |newCount - oldCount| / oldCount  > tooMuch  => fail
             tooMany=$(awk -v n="$newCount" -v o="$oldCount" -v t="$tooMuch" \
                       'BEGIN { d = n > o ? n - o : o - n; print (d / o > t) ? 1 : 0 }')
             if [ "$tooMany" = "1" ]; then
                 validationErrors+="  ${db}/${base}: itemCount changed old=${oldCount} new=${newCount} (> ${tooMuch})\n"
             fi
         done
     done
 
     if [ -n "$validationErrors" ]; then
         printf "dbVar hub update: %s\n" "$(date)"
         printf "Source: https://ftp.ncbi.nlm.nih.gov/pub/dbVar/sandbox/dbvarhub/\n"
         printf "\n*** VALIDATION FAILED -- manual intervention required ***\n"
         printf "%b" "$validationErrors"
         printf "\nNew files are staged at ${WORKDIR}/release/{hg19,hg38}.new/.\n"
         printf "Inspect vs the live release/{hg19,hg38}/ copies, then either:\n"
         printf "  - rsync each .new/ over its live dir to accept, or\n"
         printf "  - rm -rf release/{hg19,hg38}.new to reject (next cron re-downloads).\n"
         printf "lastUpdate was NOT bumped; the next cron run will retry the fetch.\n"
         exit 1
     fi
 
     # Validation passed -- promote .new/ to live via directory rename. Per-file
     # mv would give smaller inconsistency windows but per-directory mv on the
     # same filesystem is already effectively atomic, and keeping the rollback
     # as a whole-directory snapshot is simpler to reason about.
     for db in hg19 hg38; do
         rm -rf release/${db}.prev
         [ -d release/${db} ] && mv release/${db} release/${db}.prev
         mv release/${db}.new release/${db}
+        # dbVar is a superTrack, whose container page does not show "Data last
+        # updated"; this dataVersion file gives it a freshness date instead.
+        printf 'Last updated %s\n' "$today" > release/${db}/version.txt
     done
 
     # Detect new .bb files that NCBI has added to the hub since the last
     # acknowledged state (stored in knownFiles.txt). Union both assemblies
     # so a file added to only one assembly still gets flagged. If any are
     # found, the email should include the list so a human can decide whether
     # to add a trackDb stanza + gbdb symlink.
     # Use find rather than `ls *.bb` so a partial hub state (one assembly
     # missing files) doesn't blow up the script via set -e after release/
     # has already been promoted.
     currentFiles=$(find ${today}/dbVar/hg19 ${today}/dbVar/hg38 -maxdepth 1 -name '*.bb' -printf '%f\n' | sort -u)
     newFiles=$(comm -23 <(echo "$currentFiles") <(grep -v '^#' ${WORKDIR}/knownFiles.txt | grep -v '^$' | sort -u))
 
     # Print the email body: what ran, any new files, and a reminder of next
     # steps if there are additions. Everything below this point goes into
     # the email via the wrapper.
     printf "dbVar hub update: %s\n" "$(date)"
     printf "Source: https://ftp.ncbi.nlm.nih.gov/pub/dbVar/sandbox/dbvarhub/\n"
     printf "Validation: all files passed size + 10%% itemCount delta checks.\n"
     if [ -n "$newFiles" ]; then
         printf "\n*** NEW FILES in NCBI hub (not in knownFiles.txt) ***\n"
         printf "%s\n" "$newFiles"
         printf "\nUpdate trackDb + /gbdb symlinks if these should be exposed,\n"
         printf "then update src/hg/utils/otto/dbVar/knownFiles.txt and run\n"
         printf "'make install' in that directory to acknowledge them.\n"
     fi
 
     mv tempUpdate lastUpdate
 else
     # No hub update -- stay silent so the wrapper doesn't email.
     rm tempUpdate
 fi