b5ea08a66e208b8bc4852fb84bd9929fcdaae241 chmalee Wed Dec 2 10:59:31 2020 -0800 Add makedoc for hg19 gnomAD v2.1.1 update, refs #25010 diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index 65a998c..36fa16a 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -33793,15 +33793,64 @@ GCF_000001405.25_GRCh37.p13 hg19) > do.log 2>&1 & # real 6m47.005s cat fb.ncbiRefSeq.hg19.txt # 93720294 bases of 2991710746 (3.133%) in intersection ############################################################################# # Covid-19 rare mutations, Max, Fri Oct 30 08:40:34 PDT 2020 # received table from qzhang02@rockefeller.edu, wrote to UCSC.txt cd /hive/data/genomes/hg19/bed/covidMuts/ dos2unix UCSC.txt cat UCSC.txt | tawk '{$1="chr"$1; chrom=$1; start=$2; rsId=$3; ref=$4; alt=$5; zygo=$6; gene=$7; genotype=$8; inh=$9; end=$2+length(ref); print chrom, start, end, ref">"alt, "0", ".", start, end, "0,0,0", "1", length(ref), "0", ref, alt, rsId, zygo, gene, genotype, inh;}' | grep -v chrchr > covidMuts.bed bedSort covidMuts.bed covidMuts.bed bedToBigBed -tab covidMuts.bed ../../chrom.sizes covidMuts.bb -as=../../hg19/bed/covidMuts/covidMuts.as -type=bed12+ ############################################################################# + +############################################################################# +# gnomAD v2.1.1 update, ChrisL 12-2-2020 +############################################################################# +# See /hive/data/inside/gnomAD/v2.1.1/run.sh for more information, listed +# here are the important steps: +WORKDIR=/hive/data/inside/gnomAD/v2.1.1/ +cd $WORKDIR +db="hg19" +cd $db + +time parallel -j15 --joblog exomes.run.log --plus "vcfToBed -fields=${fields} {} exomes/{/..}.bed" ::: /hive/data/outside/gnomAD.2/v2.1.1/exomes/*.bgz +# real 16m42.939s +# user 172m26.966s +# sys 1m41.186s + +# now turn into a single bed +time cat hg19/exomes/*.bed | ./gnomadVcfBedToBigBed stdin stdout | sort -k1,1 -k2,2n > gnomad.v2.1.1.exomes.bed +# real 21m44.331s +# user 20m24.018s +# sys 3m5.405s +time bedToBigBed -type=bed9+50 -tab -as=exomes.as gnomad.v2.1.1.exomes.bed /hive/data/genomes/hg19/chrom.sizes exomes.bb +# pass1 - making usageList (24 chroms): 11485 millis +# pass2 - checking and writing primary data (17209972 records, 57 fields): 339555 millis +# +# real 6m45.792s +# user 6m7.880s +# sys 0m11.924s + +# same for genomes +cd $db +time parallel -j15 --joblog genomes.run.log --plus "vcfToBed -fields=${fields} {} genomes/{/..}.bed" ::: /hive/data/outside/gnomAD.2/v2.1.1/genomes/*.bgz +# real 134m40.184s +# user 1559m44.664s +# sys 12m0.858s +cd .. +time cat hg19/genomes/*.bed | ./gnomadVcfBedToBigBed stdin stdout | sort -k1,1 -k2,2n > gnomad.v2.1.1.genomes.bed +# real 199m48.619s +# user 186m49.769s +# sys 29m12.841s + +# now South Asian variants in the genomes file, change type: +time bedToBigBed -type=bed9+47 -tab -as=genomes.as gnomad.v2.1.1.genomes.bed /hive/data/genomes/hg19/chrom.sizes genomes.bb +# pass1 - making usageList (23 chroms): 165336 millis +# pass2 - checking and writing primary data (253556152 records, 55 fields): 4909106 millis +# +# real 89m3.165s +# user 86m41.554s +# sys 2m15.722s