17b7d3c37be41135afaf8e91e365e3847af96ca5 lrnassar Mon Jun 22 10:56:56 2026 -0700 Add TAD (topologically associating domains) track set on hg19, hg38, mm10, mm39. refs #21599 New "tads" superTrack collecting published TAD calls, alpha-gated via include tad.ra alpha in each assembly's trackDb.ra. hg38 (all five sources): Dixon 2012 domains, Schmitt 2016 boundaries, McArthur & Capra 2021 boundary stability, ENCODE contact domains (faceted composite over 117 biosamples), and 3D Genome Browser 2.0 domains (faceted composite over 464 datasets). hg19: the three sources with hg19-compatible data (Dixon, Schmitt, McArthur). mm10/mm39 (domains only; the boundary sources have no mouse data): Dixon, ENCODE (faceted, 16 biosamples), and 3D Genome Browser (faceted, 30 datasets); mm39 lifted from mm10, lift noted in the long labels. Faceted composites are organ-colored from a TAD-owned organ_colors.json symlinked into /gbdb/<asm>/bbi/tad/. Build scripts and autoSql are version-controlled under makeDb/scripts/tad/ and symlinked into the per-source build dirs. Provenance and fetch for every dataset are documented in the makedocs (doc/hg38/tad.txt, doc/mm10/tad.txt, doc/mm39/tad.txt, and the hg19 TAD section in doc/hg19.txt). diff --git src/hg/makeDb/scripts/tad/liftEncodeMouse.sh src/hg/makeDb/scripts/tad/liftEncodeMouse.sh new file mode 100644 index 00000000000..42f1c61cfe3 --- /dev/null +++ src/hg/makeDb/scripts/tad/liftEncodeMouse.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Lift the native-mm10 ENCODE faceted TAD track to mm39. refs #21599 +# Lifts the finished mm10 bigBeds (bed4+5; -bedPlus=4 -tab carries the 5 Arrowhead score cols), +# copies the (assembly-independent) metadata TSV, and transforms the mm10 stanza -> mm39 +# (gbdb path + "lifted from mm10" in the longLabels). The Calls facet stays "Arrowhead (mm10)" +# to record the native call assembly. +set -beEu -o pipefail +SRC=/hive/data/outside/tad/encode/build/mm10 +DST=/hive/data/outside/tad/encode/build/mm39 +CHAIN=/gbdb/mm10/liftOver/mm10ToMm39.over.chain.gz +CS=/hive/data/genomes/mm39/chrom.sizes +AS=/hive/data/outside/tad/tadDomainEncode.as +mkdir -p "$DST/tadsEncode" +tmp=$(mktemp -d) +for bb in "$SRC"/tadsEncode/*.bb; do + name=$(basename "$bb") + bigBedToBed "$bb" "$tmp/in.bed" + liftOver -bedPlus=4 -tab "$tmp/in.bed" "$CHAIN" "$tmp/lift.bed" "$tmp/unmapped" || true + nin=$(wc -l < "$tmp/in.bed"); nout=$(wc -l < "$tmp/lift.bed") + awk '$1 !~ /_/' "$tmp/lift.bed" | bedClip stdin "$CS" "$tmp/clip.bed" + sort -k1,1 -k2,2n "$tmp/clip.bed" > "$tmp/sort.bed" + bedToBigBed -type=bed4+5 -tab -as="$AS" "$tmp/sort.bed" "$CS" "$DST/tadsEncode/$name" + echo " $name: $nin -> $nout" +done +rm -rf "$tmp" +cp "$SRC/tadsEncode_metadata.tsv" "$DST/tadsEncode_metadata.tsv" +sed -e 's#/gbdb/mm10/#/gbdb/mm39/#g' \ + -e 's# (Arrowhead/Hi-C)$#, lifted from mm10#' \ + -e '/^ longLabel ENCODE TADs in /s#)$#, lifted from mm10)#' \ + "$SRC/tadsEncode.ra" > "$DST/tadsEncode.ra" +echo "DONE -> $DST/ ($(ls $DST/tadsEncode/*.bb | wc -l) bigBeds)"