9169db807b49113b231d35f2beec4e04c2fb1756
lrnassar
  Wed Jun 3 08:29:02 2026 -0700
Add dataVersion to Orphanet and ClinGen dosage tracks to show upstream release. refs #36455

Orphanet: parse the JDBOR release version (e.g. 1.3.42 / 4.1.8 [2025-03-03]) from
the downloaded XML and write it to a per-assembly version.txt that the orphadata
track points to via dataVersion.

ClinGen: write the ClinGen dosage 'results as of' date (e.g. 28 Apr,2026) from the
curation list header to a version file shared by the clinGenHaplo and clinGenTriplo
subtracks. The other ClinGen feeds publish no dataset-level date, so they are left
unversioned.

The version file is written per assembly only after that assembly's bigBed is
installed, surfacing the source's own release, which can differ from our otto
update timestamp.

diff --git src/hg/utils/otto/orphanet/checkOrphanet.sh src/hg/utils/otto/orphanet/checkOrphanet.sh
index 31acb138d60..8c5f78b2884 100755
--- src/hg/utils/otto/orphanet/checkOrphanet.sh
+++ src/hg/utils/otto/orphanet/checkOrphanet.sh
@@ -45,17 +45,24 @@
 sort -k1,1 -k2,2n orphadata.hg19.bed > sortedOrphadata.hg19.bed
 sort -k1,1 -k2,2n orphadata.hg38.bed > sortedOrphadata.hg38.bed
 
 # Check not too much has changed
 oldHg19Count=$(bigBedInfo /gbdb/hg19/bbi/orphanet/orphadata.bb | grep itemCount | cut -d' ' -f2 | sed 's/,//g')
 newHg19Count=$(wc -l sortedOrphadata.hg19.bed | cut -d' ' -f1)
 oldHg38Count=$(bigBedInfo /gbdb/hg38/bbi/orphanet/orphadata.bb | grep itemCount | cut -d' ' -f2 | sed 's/,//g')
 newHg38Count=$(wc -l sortedOrphadata.hg38.bed | cut -d' ' -f1)
 echo $oldHg19Count $newHg19Count | awk '{diff=(($2-$1)/$1); if (diff > 0.1 || diff < -0.1) {printf "validate on hg19 Orphanet failed: old count: %d, new count: %d, difference: %0.2f\n", $1,$2,diff; exit 1;}}'
 echo $oldHg38Count $newHg38Count | awk '{diff=(($2-$1)/$1); if (diff > 0.1 || diff < -0.1) {printf "validate on hg38 Orphanet failed: old count: %d, new count: %d, difference: %0.2f\n", $1,$2,diff; exit 1;}}'
 
 # Make bigBed files
 bedToBigBed -tab -as=$WORKDIR/orphadata.as -type=bed9+20 sortedOrphadata.hg19.bed /hive/data/genomes/hg19/chrom.sizes orphadata.hg19.bb
 bedToBigBed -tab -as=$WORKDIR/orphadata.as -type=bed9+20 sortedOrphadata.hg38.bed /hive/data/genomes/hg38/chrom.sizes orphadata.hg38.bb
 
+# Grab Orphanet's own release version from the XML header for the trackDb dataVersion
+# file. Match the JDBOR line specifically; a bare version= would hit the line 1 XML
+# declaration version="1.0".
+ver=$(grep -m1 'JDBOR' en_product6.xml | grep -o 'version="[^"]*"' | sed 's/version="//; s/"$//')
+
 cp orphadata.hg19.bb $WORKDIR/release/hg19/orphadata.bb
+printf 'Orphadata version %s\n' "$ver" > $WORKDIR/release/hg19/version.txt
 cp orphadata.hg38.bb $WORKDIR/release/hg38/orphadata.bb
+printf 'Orphadata version %s\n' "$ver" > $WORKDIR/release/hg38/version.txt