9eb265fc64110d5d873242c3a314253fa4032be2 hiram Thu Feb 18 10:57:02 2021 -0800 updated wgRna track to version 20 2012-10-01 last release from miRNA base for GRCh37/hg19 refs #27017 diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index 08a2824..059c510 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -34379,15 +34379,91 @@ ############################################################################# # CADD, max Wed Feb 10 06:08:01 PST 2021 cd /hive/data/genomes/hg19/bed/cadd/ wget https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh37/InDels.tsv.gz wget https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh37/whole_genome_SNVs.tsv.gz time python ~/kent/src/hg/makeDb/cadd/caddToBed.py InDels.tsv.gz time python ~/kent/src/hg/makeDb/cadd/caddToWig.py wigToBigWig a.wig ../../chrom.sizes a.bw & wigToBigWig c.wig ../../chrom.sizes c.bw & wigToBigWig a.wig ../../chrom.sizes a.bw & wigToBigWig g.wig ../../chrom.sizes g.bw & bedToBigBed ins.bed ../../chrom.sizes ins.bb -type=bed9+ -tab -as=${HOME}/kent/src/hg/makeDb/cadd/cadd.as bedToBigBed del.bed ../../chrom.sizes del.bb -type=bed9+ -tab -as=${HOME}/kent/src/hg/makeDb/cadd/cadd.as rm -f *.wig *.bed + +############################################################################## +# update sno/miRNA TRACK (WORKING - 2021-02-18 - Hiram) + # last release for GRCh37 was version 20: 2013-06-13 + + # The data in this track is out of date so update the track. + mkdir /hive/data/genomes/hg19/bed/wgRna-2013-06-13 + cd /hive/data/genomes/hg19/bed/wgRna-2013-06-13 + + wget --timestamping \ + ftp://mirbase.org/pub/mirbase/20/genomes/hsa.gff3 + + # examine chromosome list: + grep -v "^#" hsa.gff3 | cut -f1 | sort | uniq -c > chr.list + + # Only select the primary transcripts, make coords 0-based so they match + # gencode and refseq tracks, and remove ID and Alias entries in column 9: + grep -v "^#" hsa.gff3 | grep -c "miRNA_primary_transcript" + # 1871 + tawk '{if ($3 == "miRNA_primary_transcript") {$4-=1; print;}}' hsa.gff3 \ + | grep -v '^#' | tr ';' '\t' | tr '=' '\t' | cut -f1-8,14 \ + > hsa.primaryCleaned.gff3 + wc -l hsa.primaryCleaned.gff3 + # 1971 + + # now get into bed format, with type="miRNA" + tawk '{print $1, $4, $5, $9, 0, $7, 0, 0, "miRNA";}' \ + hsa.primaryCleaned.gff3 > miRNA.bed + + # get snoRNA entries from current wgRNA table: + hgsql -Ne "select * from wgRna where type != 'miRNA'" hg19 \ + | cut -f2- > wgRna.other.bed + + # combine and load: + cat miRNA.bed wgRna.other.bed > wgRna2021-02-18.bed + hgLoadBed -tab -renameSqlTable -verbose=4 \ + -sqlTable=$HOME/kent/src/hg/lib/wgRna.sql \ + -as=$HOME/kent/src/hg/lib/wgRna.as hg19 wgRnaNew wgRna2021-02-18.bed +# ### kent source version 410 ### +# Reading wgRna2021-02-18.bed +# Read 2273 elements of size 9 from wgRna2021-02-18.bed +# Loading hg19 + + # compare old and new tables: + hgsql -Ne "select type, count(*) from wgRna group by type" hg19 ++---------+-----+ +| CDBox | 269 | +| HAcaBox | 112 | +| miRNA | 939 | +| scaRna | 21 | ++---------+-----+ + hgsql -Ne "select type, count(*) from wgRnaNew group by type" hg19 ++---------+------+ +| CDBox | 269 | +| HAcaBox | 112 | +| miRNA | 1871 | +| scaRna | 21 | ++---------+------+ + # compared to hg38: + hgsql -Ne "select type, count(*) from wgRna group by type" hg38 ++---------+------+ +| CDBox | 269 | +| HAcaBox | 112 | +| miRNA | 1918 | +| scaRna | 21 | ++---------+------+ + + # backup old table and compare to new one before rename. Should be only miRNA update: + hgsql -Ne "select * from wgRna" hg19 | cut -f2- > wgRna.backup + comm -23 <(sort -k1 -k2n wgRna2021-02-18.bed) <(sort -k1 -k2n wgRna.backup) | cut -f9 | sort -u +miRNA + + # rename wgRnaNew table: + hgsqlSwapTables -dropTable3 hg19 wgRnaNew wgRna wgRnaOld + +##############################################################################