003337bf766f61f102c61cb924af98ad09372ad0 markd Tue Aug 5 12:41:24 2025 -0700 added size and splice junction filters to recount3 diff --git src/hg/makeDb/doc/hg38/recount3.txt src/hg/makeDb/doc/hg38/recount3.txt index daad9e35c06..9878eb2a15a 100644 --- src/hg/makeDb/doc/hg38/recount3.txt +++ src/hg/makeDb/doc/hg38/recount3.txt @@ -1,61 +1,39 @@ #################################### # recount3 intron tracks ticket 34886 # Jeltje January 2025 # NOTE: The sra files are so large that the trackDb.ra file needs a maxWindowToDraw limit # or else the browser window won't load within the set time - -cat << _TOEND_ > recount3.as -table recount3 -"Bed 9+6 file for NCBI orthologs" - ( - string chrom; "Reference sequence chromosome or scaffold" - uint chromStart; "Start position in chromosome" - uint chromEnd; "End position in chromosome" - string name; "Short Name of item" - uint score; "Score from 0-1000" - char[1] strand; "+ or -" - uint thickStart; "Start of where display should be thick" - uint thickEnd; "End of where display should be thick" - uint reserved; "Used as itemRgb as of 2004-11-22" - bigint readcount; "Read count" - uint samplecount; "Sample count" - string donor; "Splice donor" - string acceptor; "Splice acceptor" - string url; "URL" - ) -_TOEND_ +see kent/src/hg/lib/recount3.as process_dataset() { local dset=$1 - if [ ! -f "$dset.tsv" ]; then - wget -O $dset.tsv.bgz https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz - bgzip -d $dset.tsv.bgz + if [ ! -f "$dset.tsv.bgz" ]; then + wget -nv -O $dset.tsv.bgz https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz fi - ./junctionsToBed.py --junctions $dset.tsv --bed $dset.bed --decorator dec$dset.bed --compilation $dset + ~/kent/src/hg/makeDb/outside/recount3/junctionsToBed.py --junctions $dset.tsv.bgz --bed $dset.bed --decorator dec$dset.bed --compilation $dset bedSort $dset.bed $dset.bed bedSort dec$dset.bed dec$dset.bed } for dset in gtexv2 srav3h tcgav2 ccle; do - wget https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz - zcat junctions.bgz > $dset.tsv process_dataset $dset & done wait toBB() { local dset=$1 - bedToBigBed -type=bed9+6 -tab -as=recount3.as $dset.bed /hive/data/genomes/hg38/chrom.sizes $dset.bb & - bedToBigBed -type=bed12+ -as=/cluster/home/jeltje/kent/src/hg/lib/decoration.as dec$dset.bed /hive/data/genomes/hg38/chrom.sizes dec$dset.bb & + bedToBigBed -type=bed9+6 -tab -as=${HOME}/kent/src/hg/lib/recount3.as $dset.bed /hive/data/genomes/hg38/chrom.sizes $dset.bb & + bedToBigBed -type=bed12+ -as=${HOME}/kent/src/hg/lib/decoration.as dec$dset.bed /hive/data/genomes/hg38/chrom.sizes dec$dset.bb & } for dset in gtexv2 srav3h tcgav2 ccle; do toBB $dset & done wait +rm *.bed *.tsv -rm *.bed *tsv junctions.bgz +2025-08-04 markd: update to add size column for filter