003337bf766f61f102c61cb924af98ad09372ad0
markd
  Tue Aug 5 12:41:24 2025 -0700
added size and splice junction filters to recount3

diff --git src/hg/makeDb/doc/hg38/recount3.txt src/hg/makeDb/doc/hg38/recount3.txt
index daad9e35c06..9878eb2a15a 100644
--- src/hg/makeDb/doc/hg38/recount3.txt
+++ src/hg/makeDb/doc/hg38/recount3.txt
@@ -1,61 +1,39 @@
 
 ####################################
 # recount3 intron tracks ticket 34886
 # Jeltje January 2025
 # NOTE: The sra files are so large that the trackDb.ra file needs a maxWindowToDraw limit
 # or else the browser window won't load within the set time
 
-
-cat << _TOEND_ > recount3.as
-table recount3
-"Bed 9+6 file for NCBI orthologs"
-    (
-    string chrom;      "Reference sequence chromosome or scaffold"
-    uint   chromStart; "Start position in chromosome"
-    uint   chromEnd;   "End position in chromosome"
-    string name;       "Short Name of item"
-    uint   score;      "Score from 0-1000"
-    char[1] strand;    "+ or -"
-    uint thickStart;   "Start of where display should be thick"
-    uint thickEnd;     "End of where display should be thick"
-    uint reserved;     "Used as itemRgb as of 2004-11-22"
-    bigint readcount;  "Read count"
-    uint samplecount;  "Sample count"
-    string donor;      "Splice donor"
-    string acceptor;   "Splice acceptor"
-    string url;        "URL"
-    )
-_TOEND_
+see kent/src/hg/lib/recount3.as
 
 
 process_dataset() {
 	local dset=$1
-	if [ ! -f "$dset.tsv" ]; then
-	    wget -O $dset.tsv.bgz https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz
-	    bgzip -d $dset.tsv.bgz
+	if [ ! -f "$dset.tsv.bgz" ]; then
+	    wget -nv -O $dset.tsv.bgz https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz
 	fi
-	./junctionsToBed.py --junctions $dset.tsv --bed $dset.bed --decorator dec$dset.bed --compilation $dset
+        ~/kent/src/hg/makeDb/outside/recount3/junctionsToBed.py --junctions $dset.tsv.bgz --bed $dset.bed --decorator dec$dset.bed --compilation $dset
 	bedSort $dset.bed $dset.bed
 	bedSort dec$dset.bed dec$dset.bed
 }
 
 for dset in gtexv2 srav3h tcgav2 ccle; do
-	wget https://snaptron.cs.jhu.edu/data/$dset/junctions.bgz
-	zcat junctions.bgz > $dset.tsv 
 	process_dataset $dset &
 done
 wait
 
 toBB() {
 	local dset=$1
-	bedToBigBed -type=bed9+6 -tab -as=recount3.as $dset.bed /hive/data/genomes/hg38/chrom.sizes $dset.bb &
-	bedToBigBed -type=bed12+ -as=/cluster/home/jeltje/kent/src/hg/lib/decoration.as dec$dset.bed /hive/data/genomes/hg38/chrom.sizes dec$dset.bb &
+	bedToBigBed -type=bed9+6 -tab -as=${HOME}/kent/src/hg/lib/recount3.as $dset.bed /hive/data/genomes/hg38/chrom.sizes $dset.bb &
+	bedToBigBed -type=bed12+ -as=${HOME}/kent/src/hg/lib/decoration.as dec$dset.bed /hive/data/genomes/hg38/chrom.sizes dec$dset.bb &
 }
 
 for dset in gtexv2 srav3h tcgav2 ccle; do
 	toBB $dset &
 done
 wait
 
+rm *.bed *.tsv
 
-rm *.bed *tsv junctions.bgz
+2025-08-04 markd: update to add size column for filter