bfa91627a224f0ca4a9e10718297df25f163847a max Tue Mar 24 04:09:48 2026 -0700 adding NMD escape supertrack, refs #33737 diff --git src/hg/makeDb/doc/hg38/nmd.txt src/hg/makeDb/doc/hg38/nmd.txt new file mode 100644 index 00000000000..064f8ee719c --- /dev/null +++ src/hg/makeDb/doc/hg38/nmd.txt @@ -0,0 +1,59 @@ +####################################################################### +# NMD escape regions from Gencode (2025-03-24 max/Claude) +# Two outputs: decorator bigBed (per-transcript) and collapsed bigBed (merged by coordinates) +# Collapsed version uses gene symbols from input, colors by rule, transcript lists +# Script accepts -f bigGenePred (gencode .bb) or -f genePredExt (ncbiRefSeq .txt.gz) + +cd /hive/data/genomes/hg38/bed/nmd/gencode/ + +# run the script on gencode bigGenePred - produces decorator + collapsed BED files +~/kent/src/hg/makeDb/scripts/nmd/genePredNmdEsc -f bigGenePred \ + /hive/data/genomes/hg38/bed/gencodeV49/build/hg38.gencodeV49.bb \ + knownGeneNmdProt.bed nmdEscRegions.bed + +# build decorator bigBed +bedSort knownGeneNmdProt.bed knownGeneNmdProt.bed +bedToBigBed knownGeneNmdProt.bed ../../../chrom.sizes knownGeneNmdProt.bb \ + -tab -type=bed12+5 -as=${HOME}/kent/src/hg/makeDb/scripts/nmd/nmdEscDecoration.as + +# build collapsed bigBed +bedSort nmdEscRegions.bed nmdEscRegions.bed +bedToBigBed nmdEscRegions.bed ../../../chrom.sizes nmdEscRegions.bb \ + -tab -type=bed9+2 -as=${HOME}/kent/src/hg/makeDb/scripts/nmd/nmdEscCollapsed.as + + +####################################################################### +# NMD escape regions from NCBI RefSeq (2025-03-24 max) + +cd /hive/data/genomes/hg38/bed/nmd/ncbiRefSeq/ + +# run the script on ncbiRefSeq genePredExt +# Using all of RefSeq, not just refseq curated - good idea? +# This is the file for RefSeq curated: /hive/data/genomes/hg38/bed/ncbiRefSeq.p14.2025-08-13/archive/hg38.ncbiRefSeqCurated.txt.gz +~/kent/src/hg/makeDb/scripts/nmd/genePredNmdEsc -f genePredExt \ + /hive/data/genomes/hg38/bed/ncbiRefSeq.p14.2025-08-13/archive/hg38.ncbiRefSeq.txt.gz + nmdNcbiRefSeqDeco.bed nmdEscNcbiRefSeq.bed + +# build collapsed bigBed +bedSort nmdEscNcbiRefSeq.bed nmdEscNcbiRefSeq.bed +bedToBigBed nmdEscNcbiRefSeq.bed ../../../chrom.sizes nmdEscNcbiRefSeq.bb \ + -tab -type=bed9+2 -as=${HOME}/kent/src/hg/makeDb/scripts/nmd/nmdEscCollapsed.as + +# symlink to gbdb +ln -sf /hive/data/genomes/hg38/bed/nmd/ncbiRefSeq/nmdEscNcbiRefSeq.bb /gbdb/hg38/nmd/nmdEscNcbiRefSeq.bb + +####################################################################### +# Lindeboom et al. NMDetective scores (2025-03-23 max/Claude) +# NMD efficiency predictions from Lindeboom et al. 2019, Nat Genet. +# Four bedGraph custom track files downloaded to: +# /hive/data/genomes/hg38/bed/nmd/lindeboom/ +# Data downloaded from https://figshare.com/articles/dataset/NMDetective/7803398 +# Custom track data in the session links from that page +# - NMDetectiveA.ct - Random forest prediction of NMD efficiency +# - NMDetectiveB.ct - Decision tree prediction of NMD efficiency +# - nmdDectA-ptc.ct - Random forest, first out-of-frame PTC +# - nmdDectB-ptc.ct - Decision tree, first out-of-frame PTC + +# Convert bedGraph custom tracks to bigWig and symlink from /gbdb: +cd /hive/data/genomes/hg38/bed/nmd/lindeboom/ +bash ~/kent/src/hg/makeDb/scripts/nmd/lindeboomToBigWig.sh