17b7d3c37be41135afaf8e91e365e3847af96ca5 lrnassar Mon Jun 22 10:56:56 2026 -0700 Add TAD (topologically associating domains) track set on hg19, hg38, mm10, mm39. refs #21599 New "tads" superTrack collecting published TAD calls, alpha-gated via include tad.ra alpha in each assembly's trackDb.ra. hg38 (all five sources): Dixon 2012 domains, Schmitt 2016 boundaries, McArthur & Capra 2021 boundary stability, ENCODE contact domains (faceted composite over 117 biosamples), and 3D Genome Browser 2.0 domains (faceted composite over 464 datasets). hg19: the three sources with hg19-compatible data (Dixon, Schmitt, McArthur). mm10/mm39 (domains only; the boundary sources have no mouse data): Dixon, ENCODE (faceted, 16 biosamples), and 3D Genome Browser (faceted, 30 datasets); mm39 lifted from mm10, lift noted in the long labels. Faceted composites are organ-colored from a TAD-owned organ_colors.json symlinked into /gbdb/<asm>/bbi/tad/. Build scripts and autoSql are version-controlled under makeDb/scripts/tad/ and symlinked into the per-source build dirs. Provenance and fetch for every dataset are documented in the makedocs (doc/hg38/tad.txt, doc/mm10/tad.txt, doc/mm39/tad.txt, and the hg19 TAD section in doc/hg19.txt). diff --git src/hg/makeDb/scripts/tad/buildDixonMouse.sh src/hg/makeDb/scripts/tad/buildDixonMouse.sh new file mode 100644 index 00000000000..a22199c47a5 --- /dev/null +++ src/hg/makeDb/scripts/tad/buildDixonMouse.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Build mouse Dixon 2012 TAD domains (tadsDixon) for mm10 + mm39 by liftOver from the +# native mm9 calls. refs #21599 +# Source: Dixon 2012 Nature suppl Table S3 "Combined" sheets, mouse mESC (2200) + cortex +# (1518) domains, mm9, 40 kb (extracted earlier to lifttest/mm9_{mesc,cortex}_domains.bed). +set -beEu -o pipefail + +SRC=/hive/users/lrnassar/claude/RM21599/lifttest +AS=/hive/data/outside/tad/tadDomain.as +declare -A CHAIN=( [mm10]=/gbdb/mm9/liftOver/mm9ToMm10.over.chain.gz \ + [mm39]=/gbdb/mm9/liftOver/mm9ToMm39.over.chain.gz ) +declare -A CS=( [mm10]=/hive/data/genomes/mm10/chrom.sizes \ + [mm39]=/hive/data/genomes/mm39/chrom.sizes ) +# cell type -> (source bed, display name, bigBed basename) +SRCBED_mesc=mm9_mesc_domains.bed; NAME_mesc=mESC; BB_mesc=tadsDixonMESC +SRCBED_cortex=mm9_cortex_domains.bed; NAME_cortex=Cortex; BB_cortex=tadsDixonCortex + +tmp=$(mktemp -d) +for asm in mm10 mm39; do + out=/hive/data/outside/tad/dixon2012/build/$asm + mkdir -p "$out" + for ct in mesc cortex; do + eval src=\$SRCBED_$ct; eval name=\$NAME_$ct; eval bb=\$BB_$ct + # set col4 = cell-type name (matches human Dixon: mouseOver "Cell type: $name") + awk -v n="$name" 'BEGIN{OFS="\t"}{print $1,$2,$3,n}' "$SRC/$src" > "$tmp/in.bed" + liftOver -bedPlus=4 -tab "$tmp/in.bed" "${CHAIN[$asm]}" "$tmp/lift.bed" "$tmp/unmapped" || true + nin=$(wc -l < "$tmp/in.bed"); nout=$(wc -l < "$tmp/lift.bed") + echo "$name -> $asm: $nin -> $nout ($((nin-nout)) dropped)" + # primary chroms only (drop *_random/_alt), clip to bounds, sort + awk '$1 !~ /_/' "$tmp/lift.bed" | bedClip stdin "${CS[$asm]}" "$tmp/clip.bed" + sort -k1,1 -k2,2n "$tmp/clip.bed" > "$tmp/sort.bed" + bedToBigBed -type=bed4 -tab -as="$AS" "$tmp/sort.bed" "${CS[$asm]}" "$out/$bb.bb" + done +done +rm -rf "$tmp" +echo "DONE: $(ls /hive/data/outside/tad/dixon2012/build/mm10/*.bb /hive/data/outside/tad/dixon2012/build/mm39/*.bb)"