003337bf766f61f102c61cb924af98ad09372ad0 markd Tue Aug 5 12:41:24 2025 -0700 added size and splice junction filters to recount3 diff --git src/hg/makeDb/doc/mm10.txt src/hg/makeDb/doc/mm10.txt index d8f561aa21d..0a0fb209d53 100644 --- src/hg/makeDb/doc/mm10.txt +++ src/hg/makeDb/doc/mm10.txt @@ -19528,75 +19528,51 @@ ) _EOF_ bedToBigBed -type=bed12+8 -as=encode4.as -tab transcripts.bed /hive/data/genomes/mm10/chrom.sizes encode4.bb ############################################################################## # Recount3 - Jeltje April 2025 #################################### # recount3 intron tracks ticket 34886 # Jeltje January 2025 # NOTE: The sra files are so large that the trackDb.ra file needs a maxWindowToDraw limit # or else the browser window won't load within the set time -cat << _TOEND_ > recount3.as -table recount3 -"Bed 9+6 file for NCBI orthologs" - ( - string chrom; "Reference sequence chromosome or scaffold" - uint chromStart; "Start position in chromosome" - uint chromEnd; "End position in chromosome" - string name; "Short Name of item" - uint score; "Score from 0-1000" - char[1] strand; "+ or -" - uint thickStart; "Start of where display should be thick" - uint thickEnd; "End of where display should be thick" - uint reserved; "Used as itemRgb as of 2004-11-22" - bigint readcount; "Read count" - uint samplecount; "Sample count" - string donor; "Splice donor" - string acceptor; "Splice acceptor" - string url; "URL" - ) -_TOEND_ - +see kent/src/hg/lib/recount3.as process_dataset() { local dset=$1 - if [ ! -f "$dset.tsv" ]; then - wget -O $dset.tsv.bgz https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz - bgzip -d $dset.tsv.bgz + if [ ! -f "$dset.tsv.bgz" ]; then + wget -nv -O $dset.tsv.bgz https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz fi - ./junctionsToBed.py --junctions $dset.tsv --bed $dset.bed --decorator dec$dset.bed --compilation $dset + ~/kent/src/hg/makeDb/outside/recount3/junctionsToBed.py --junctions $dset.tsv.bgz --bed $dset.bed --decorator dec$dset.bed --compilation $dset # bedSort $dset.bed $dset.bed bedSort dec$dset.bed dec$dset.bed } dset=srav1m -#wget https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz -#zcat junctions.bgz > $dset.tsv -#process_dataset $dset & -#wait - +process_dataset $dset -bedToBigBed -type=bed9+6 -tab -as=recount3.as $dset.bed /hive/data/genomes/mm10/chrom.sizes $dset.bb & -bedToBigBed -type=bed12+ -as=/cluster/home/jeltje/kent/src/hg/lib/decoration.as dec$dset.bed /hive/data/genomes/mm10/chrom.sizes dec$dset.bb & +bedToBigBed -type=bed9+6 -tab -as=${HOME}/kent/src/hg/lib/recount3.as $dset.bed /hive/data/genomes/mm10/chrom.sizes $dset.bb & +bedToBigBed -type=bed12+ -as=${HOME}/kent/src/hg/lib/decoration.as dec$dset.bed /hive/data/genomes/mm10/chrom.sizes dec$dset.bb & wait #rm srav1m.bed srav1m.tsv junctions.bgz +2025-08-04 markd: update to add size column for filter ############################################################################## # ENCODE4 triplets - Jeltje April 2025 mkdir -p april2025 #wget -O april2025/mouse_ucsc_transcripts.gtf "https://zenodo.org/records/15116042/files/mouse_ucsc_transcripts.gtf?download=1" #wget -O april2025/filt_ab_tpm_mouse.tsv "https://zenodo.org/records/15116042/files/filt_ab_tpm_mouse.tsv?download=1" #wget -O april2025/mouse_protein_summary.tsv "https://zenodo.org/records/15116042/files/mouse_protein_summary.tsv?download=1" #wget -O april2025/mouse_sample_info.tsv https://zenodo.org/records/15116042/files/lr_mouse_library_data_summary.tsv?download=1 gtfFile='april2025/mouse_ucsc_transcripts.gtf' quantFile='april2025/filt_ab_tpm_mouse.tsv' # really counts per million since every read is full length protFile='april2025/mouse_protein_summary.tsv' sampleFile='april2025/mouse_sample_info.tsv' # this outputs bed12 + extra ID fields, topval expressions for mouseover and an expression html table