9169db807b49113b231d35f2beec4e04c2fb1756 lrnassar Wed Jun 3 08:29:02 2026 -0700 Add dataVersion to Orphanet and ClinGen dosage tracks to show upstream release. refs #36455 Orphanet: parse the JDBOR release version (e.g. 1.3.42 / 4.1.8 [2025-03-03]) from the downloaded XML and write it to a per-assembly version.txt that the orphadata track points to via dataVersion. ClinGen: write the ClinGen dosage 'results as of' date (e.g. 28 Apr,2026) from the curation list header to a version file shared by the clinGenHaplo and clinGenTriplo subtracks. The other ClinGen feeds publish no dataset-level date, so they are left unversioned. The version file is written per assembly only after that assembly's bigBed is installed, surfacing the source's own release, which can differ from our otto update timestamp. diff --git src/hg/utils/otto/orphanet/checkOrphanet.sh src/hg/utils/otto/orphanet/checkOrphanet.sh index 31acb138d60..8c5f78b2884 100755 --- src/hg/utils/otto/orphanet/checkOrphanet.sh +++ src/hg/utils/otto/orphanet/checkOrphanet.sh @@ -45,17 +45,24 @@ sort -k1,1 -k2,2n orphadata.hg19.bed > sortedOrphadata.hg19.bed sort -k1,1 -k2,2n orphadata.hg38.bed > sortedOrphadata.hg38.bed # Check not too much has changed oldHg19Count=$(bigBedInfo /gbdb/hg19/bbi/orphanet/orphadata.bb | grep itemCount | cut -d' ' -f2 | sed 's/,//g') newHg19Count=$(wc -l sortedOrphadata.hg19.bed | cut -d' ' -f1) oldHg38Count=$(bigBedInfo /gbdb/hg38/bbi/orphanet/orphadata.bb | grep itemCount | cut -d' ' -f2 | sed 's/,//g') newHg38Count=$(wc -l sortedOrphadata.hg38.bed | cut -d' ' -f1) echo $oldHg19Count $newHg19Count | awk '{diff=(($2-$1)/$1); if (diff > 0.1 || diff < -0.1) {printf "validate on hg19 Orphanet failed: old count: %d, new count: %d, difference: %0.2f\n", $1,$2,diff; exit 1;}}' echo $oldHg38Count $newHg38Count | awk '{diff=(($2-$1)/$1); if (diff > 0.1 || diff < -0.1) {printf "validate on hg38 Orphanet failed: old count: %d, new count: %d, difference: %0.2f\n", $1,$2,diff; exit 1;}}' # Make bigBed files bedToBigBed -tab -as=$WORKDIR/orphadata.as -type=bed9+20 sortedOrphadata.hg19.bed /hive/data/genomes/hg19/chrom.sizes orphadata.hg19.bb bedToBigBed -tab -as=$WORKDIR/orphadata.as -type=bed9+20 sortedOrphadata.hg38.bed /hive/data/genomes/hg38/chrom.sizes orphadata.hg38.bb +# Grab Orphanet's own release version from the XML header for the trackDb dataVersion +# file. Match the JDBOR line specifically; a bare version= would hit the line 1 XML +# declaration version="1.0". +ver=$(grep -m1 'JDBOR' en_product6.xml | grep -o 'version="[^"]*"' | sed 's/version="//; s/"$//') + cp orphadata.hg19.bb $WORKDIR/release/hg19/orphadata.bb +printf 'Orphadata version %s\n' "$ver" > $WORKDIR/release/hg19/version.txt cp orphadata.hg38.bb $WORKDIR/release/hg38/orphadata.bb +printf 'Orphadata version %s\n' "$ver" > $WORKDIR/release/hg38/version.txt