06d7be056190c14b85e71bc12523f18ea6815b5e
markd
  Mon Dec 7 00:50:29 2020 -0800
BLAT mmap index support merge with master

diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt
index d5ad7a5..36fa16a 100644
--- src/hg/makeDb/doc/hg19.txt
+++ src/hg/makeDb/doc/hg19.txt
@@ -33769,16 +33769,88 @@
 mkdir run
 seq 4 57 | parallel -j10 'zcat all.baselevel.021620.tsv.bgz | cut -f1-3,{} | gzip -c > run/tissue{}.pext.gz'
 
 # overlapping exons in coding regions causes problems, don't output any scores
 # for those regions
 seq 4 57 | parallel --joblog run.log -j20 './buildPext.py run/tissue{}.pext.gz -o split'
 tail -n +2 run.log | cut -f4 | awk '{sum += $1}END{print sum/NR}'
 452.034
 
 # Turn into bigWigs:
 find split/ -name "*.bed" | parallel -j15 'sort -k1,1 -k2,2n {} | cut -f1-3,5 > {.}.bedGraph'
 find split/ -name "*.bedGraph" | parallel -j15 'bedGraphToBigWig {} /hive/data/genomes/hg19/chrom.sizes {.}.bw'
 mkdir -p /gbdb/hg19/gnomAD/pext
 ln -s `pwd`/split/*.bw /gbdb/hg19/gnomAD/pext/
 
+#############################################################################
+# update 2020-10-27 (DONE - Hiram - 2020-10-27)
+
+  mkdir /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2020-10-27
+  cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2020-10-27
+
+  time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
+      -bigClusterHub=ku -dbHost=hgwdev \
+      -fileServer=hgwdev -smallClusterHub=hgwdev -workhorse=hgwdev \
+      GCF_000001405.25_GRCh37.p13 hg19) > do.log 2>&1 &
+  # real    6m47.005s
+
+  cat fb.ncbiRefSeq.hg19.txt
+  # 93720294 bases of 2991710746 (3.133%) in intersection
 
+#############################################################################
+# Covid-19 rare mutations, Max, Fri Oct 30 08:40:34 PDT 2020
+# received table from qzhang02@rockefeller.edu, wrote to UCSC.txt
+cd /hive/data/genomes/hg19/bed/covidMuts/
+dos2unix UCSC.txt
+cat UCSC.txt | tawk '{$1="chr"$1; chrom=$1; start=$2; rsId=$3; ref=$4; alt=$5; zygo=$6; gene=$7; genotype=$8; inh=$9; end=$2+length(ref); print chrom, start, end, ref">"alt, "0", ".", start, end, "0,0,0", "1", length(ref), "0", ref, alt, rsId, zygo, gene, genotype, inh;}' | grep -v chrchr > covidMuts.bed
+bedSort covidMuts.bed covidMuts.bed
+bedToBigBed -tab covidMuts.bed ../../chrom.sizes covidMuts.bb -as=../../hg19/bed/covidMuts/covidMuts.as -type=bed12+
+#############################################################################
+
+#############################################################################
+# gnomAD v2.1.1 update, ChrisL 12-2-2020
+#############################################################################
+# See /hive/data/inside/gnomAD/v2.1.1/run.sh for more information, listed
+# here are the important steps:
+WORKDIR=/hive/data/inside/gnomAD/v2.1.1/
+cd $WORKDIR
+db="hg19"
+cd $db
+
+time parallel -j15 --joblog exomes.run.log --plus "vcfToBed -fields=${fields} {} exomes/{/..}.bed" ::: /hive/data/outside/gnomAD.2/v2.1.1/exomes/*.bgz
+# real    16m42.939s
+# user    172m26.966s
+# sys 1m41.186s
+
+# now turn into a single bed
+time cat hg19/exomes/*.bed | ./gnomadVcfBedToBigBed stdin stdout | sort -k1,1 -k2,2n > gnomad.v2.1.1.exomes.bed
+# real    21m44.331s
+# user    20m24.018s
+# sys 3m5.405s
+time bedToBigBed -type=bed9+50 -tab -as=exomes.as gnomad.v2.1.1.exomes.bed /hive/data/genomes/hg19/chrom.sizes exomes.bb
+# pass1 - making usageList (24 chroms): 11485 millis
+# pass2 - checking and writing primary data (17209972 records, 57 fields): 339555 millis
+#
+# real    6m45.792s
+# user    6m7.880s
+# sys 0m11.924s
+
+# same for genomes
+cd $db
+time parallel -j15 --joblog genomes.run.log --plus "vcfToBed -fields=${fields} {} genomes/{/..}.bed" ::: /hive/data/outside/gnomAD.2/v2.1.1/genomes/*.bgz
+# real  134m40.184s
+# user    1559m44.664s
+# sys 12m0.858s
+cd ..
+time cat hg19/genomes/*.bed | ./gnomadVcfBedToBigBed stdin stdout | sort -k1,1 -k2,2n > gnomad.v2.1.1.genomes.bed
+# real    199m48.619s
+# user    186m49.769s
+# sys 29m12.841s
+
+# now South Asian variants in the genomes file, change type:
+time bedToBigBed -type=bed9+47 -tab -as=genomes.as gnomad.v2.1.1.genomes.bed /hive/data/genomes/hg19/chrom.sizes genomes.bb
+# pass1 - making usageList (23 chroms): 165336 millis
+# pass2 - checking and writing primary data (253556152 records, 55 fields): 4909106 millis
+#
+# real    89m3.165s
+# user    86m41.554s
+# sys 2m15.722s