9169db807b49113b231d35f2beec4e04c2fb1756 lrnassar Wed Jun 3 08:29:02 2026 -0700 Add dataVersion to Orphanet and ClinGen dosage tracks to show upstream release. refs #36455 Orphanet: parse the JDBOR release version (e.g. 1.3.42 / 4.1.8 [2025-03-03]) from the downloaded XML and write it to a per-assembly version.txt that the orphadata track points to via dataVersion. ClinGen: write the ClinGen dosage 'results as of' date (e.g. 28 Apr,2026) from the curation list header to a version file shared by the clinGenHaplo and clinGenTriplo subtracks. The other ClinGen feeds publish no dataset-level date, so they are left unversioned. The version file is written per assembly only after that assembly's bigBed is installed, surfacing the source's own release, which can differ from our otto update timestamp. diff --git src/hg/utils/otto/clinGen/makeDosage.sh src/hg/utils/otto/clinGen/makeDosage.sh index 45cc160bd57..e1f8e377167 100755 --- src/hg/utils/otto/clinGen/makeDosage.sh +++ src/hg/utils/otto/clinGen/makeDosage.sh @@ -83,20 +83,27 @@ if [[ $shouldError -eq 0 ]]; then printf "validate on %s ClinGen Triplo failed: old count: %d, new count: %d, difference: %0.2f\n" $db $oldTriploLc $newTriploLc $diffPerc exit 1 fi diffPerc=$(echo $newTriploLc $diffCountTriplo | awk '{diff=$2/$1; printf "%0.2f", diff}') shouldError=$(echo $diffPerc | awk '{if ($1 > 0.1 || $1 < -0.1) {print 0} else {print 1}}') if [[ $shouldError -eq 0 ]]; then printf "validate on %s ClinGen Triplo failed with too many differences to old version: %d lines changed, new count: %d, difference: %0.2f\n" $db $diffCountTriplo $newTriploLc $diffPerc exit 1 fi bedToBigBed -type=bed9+17 -as=../../clinGenDosageHaplo.as -tab ../output/${db}.clinGenHaplo.bed /hive/data/genomes/${db}/chrom.sizes ../output/${db}.clinGenHaplo.bb bedToBigBed -type=bed9+17 -as=../../clinGenDosageTriplo.as -tab ../output/${db}.clinGenTriplo.bed /hive/data/genomes/${db}/chrom.sizes ../output/${db}.clinGenTriplo.bb cp ../output/${db}.clinGenHaplo.bb ${WORKDIR}/release/${db}/clinGenHaplo.bb cp ../output/${db}.clinGenTriplo.bb ${WORKDIR}/release/${db}/clinGenTriplo.bb + + # ClinGen stamps the curation list with its own "results as of" date (line 2, + # e.g. "#28 Apr,2026"). Expose it via dataVersion (shared by the Haplo and Triplo + # subtracks) so users can tie the browser data to a specific ClinGen dosage + # release rather than our otto run date. + dosageDate=$(sed -n 2p ClinGen_region_curation_list_${grc}.tsv | tr -d '#\r') + printf 'ClinGen Dosage Sensitivity Map, %s\n' "$dosageDate" > ${WORKDIR}/release/${db}/clinGenDosageVersion.txt done cd ../.. else echo "No ClinGen CNV update" fi