06d7be056190c14b85e71bc12523f18ea6815b5e markd Mon Dec 7 00:50:29 2020 -0800 BLAT mmap index support merge with master diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index d5ad7a5..36fa16a 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -33769,16 +33769,88 @@ mkdir run seq 4 57 | parallel -j10 'zcat all.baselevel.021620.tsv.bgz | cut -f1-3,{} | gzip -c > run/tissue{}.pext.gz' # overlapping exons in coding regions causes problems, don't output any scores # for those regions seq 4 57 | parallel --joblog run.log -j20 './buildPext.py run/tissue{}.pext.gz -o split' tail -n +2 run.log | cut -f4 | awk '{sum += $1}END{print sum/NR}' 452.034 # Turn into bigWigs: find split/ -name "*.bed" | parallel -j15 'sort -k1,1 -k2,2n {} | cut -f1-3,5 > {.}.bedGraph' find split/ -name "*.bedGraph" | parallel -j15 'bedGraphToBigWig {} /hive/data/genomes/hg19/chrom.sizes {.}.bw' mkdir -p /gbdb/hg19/gnomAD/pext ln -s `pwd`/split/*.bw /gbdb/hg19/gnomAD/pext/ +############################################################################# +# update 2020-10-27 (DONE - Hiram - 2020-10-27) + + mkdir /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2020-10-27 + cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2020-10-27 + + time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ + -bigClusterHub=ku -dbHost=hgwdev \ + -fileServer=hgwdev -smallClusterHub=hgwdev -workhorse=hgwdev \ + GCF_000001405.25_GRCh37.p13 hg19) > do.log 2>&1 & + # real 6m47.005s + + cat fb.ncbiRefSeq.hg19.txt + # 93720294 bases of 2991710746 (3.133%) in intersection +############################################################################# +# Covid-19 rare mutations, Max, Fri Oct 30 08:40:34 PDT 2020 +# received table from qzhang02@rockefeller.edu, wrote to UCSC.txt +cd /hive/data/genomes/hg19/bed/covidMuts/ +dos2unix UCSC.txt +cat UCSC.txt | tawk '{$1="chr"$1; chrom=$1; start=$2; rsId=$3; ref=$4; alt=$5; zygo=$6; gene=$7; genotype=$8; inh=$9; end=$2+length(ref); print chrom, start, end, ref">"alt, "0", ".", start, end, "0,0,0", "1", length(ref), "0", ref, alt, rsId, zygo, gene, genotype, inh;}' | grep -v chrchr > covidMuts.bed +bedSort covidMuts.bed covidMuts.bed +bedToBigBed -tab covidMuts.bed ../../chrom.sizes covidMuts.bb -as=../../hg19/bed/covidMuts/covidMuts.as -type=bed12+ +############################################################################# + +############################################################################# +# gnomAD v2.1.1 update, ChrisL 12-2-2020 +############################################################################# +# See /hive/data/inside/gnomAD/v2.1.1/run.sh for more information, listed +# here are the important steps: +WORKDIR=/hive/data/inside/gnomAD/v2.1.1/ +cd $WORKDIR +db="hg19" +cd $db + +time parallel -j15 --joblog exomes.run.log --plus "vcfToBed -fields=${fields} {} exomes/{/..}.bed" ::: /hive/data/outside/gnomAD.2/v2.1.1/exomes/*.bgz +# real 16m42.939s +# user 172m26.966s +# sys 1m41.186s + +# now turn into a single bed +time cat hg19/exomes/*.bed | ./gnomadVcfBedToBigBed stdin stdout | sort -k1,1 -k2,2n > gnomad.v2.1.1.exomes.bed +# real 21m44.331s +# user 20m24.018s +# sys 3m5.405s +time bedToBigBed -type=bed9+50 -tab -as=exomes.as gnomad.v2.1.1.exomes.bed /hive/data/genomes/hg19/chrom.sizes exomes.bb +# pass1 - making usageList (24 chroms): 11485 millis +# pass2 - checking and writing primary data (17209972 records, 57 fields): 339555 millis +# +# real 6m45.792s +# user 6m7.880s +# sys 0m11.924s + +# same for genomes +cd $db +time parallel -j15 --joblog genomes.run.log --plus "vcfToBed -fields=${fields} {} genomes/{/..}.bed" ::: /hive/data/outside/gnomAD.2/v2.1.1/genomes/*.bgz +# real 134m40.184s +# user 1559m44.664s +# sys 12m0.858s +cd .. +time cat hg19/genomes/*.bed | ./gnomadVcfBedToBigBed stdin stdout | sort -k1,1 -k2,2n > gnomad.v2.1.1.genomes.bed +# real 199m48.619s +# user 186m49.769s +# sys 29m12.841s + +# now South Asian variants in the genomes file, change type: +time bedToBigBed -type=bed9+47 -tab -as=genomes.as gnomad.v2.1.1.genomes.bed /hive/data/genomes/hg19/chrom.sizes genomes.bb +# pass1 - making usageList (23 chroms): 165336 millis +# pass2 - checking and writing primary data (253556152 records, 55 fields): 4909106 millis +# +# real 89m3.165s +# user 86m41.554s +# sys 2m15.722s