src/hg/makeDb/doc/hg19.txt a7533ba73cff353b993553668f9fb21df243f71b

a7533ba73cff353b993553668f9fb21df243f71b
jcasper
  Thu Mar 18 09:54:14 2021 -0700
Makedoc for DECIPHER haploinsufficiency on hg19, refs #25707

diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt
index d2d8582..ed4e1f1 100644
--- src/hg/makeDb/doc/hg19.txt
+++ src/hg/makeDb/doc/hg19.txt
@@ -33831,31 +33831,31 @@
       -fileServer=hgwdev -smallClusterHub=hgwdev -workhorse=hgwdev \
       GCF_000001405.25_GRCh37.p13 hg19) > do.log 2>&1 &
   # real    6m47.005s
 
   cat fb.ncbiRefSeq.hg19.txt
   # 93720294 bases of 2991710746 (3.133%) in intersection
 
 #############################################################################
 # Covid-19 rare mutations, Max, Fri Oct 30 08:40:34 PDT 2020
 # received table from qzhang02@rockefeller.edu, wrote to UCSC.txt
 cd /hive/data/genomes/hg19/bed/covidMuts/
 dos2unix UCSC.txt
 cat UCSC.txt | tawk '{$1="chr"$1; chrom=$1; start=$2; rsId=$3; ref=$4; alt=$5; zygo=$6; gene=$7; genotype=$8; inh=$9; end=$2+length(ref); print chrom, start, end, ref">"alt, "0", ".", start, end, "0,0,0", "1", length(ref), "0", ref, alt, rsId, zygo, gene, genotype, inh;}' | grep -v chrchr > covidMuts.bed
 bedSort covidMuts.bed covidMuts.bed
 bedToBigBed -tab covidMuts.bed ../../chrom.sizes covidMuts.bb -as=../../hg19/bed/covidMuts/covidMuts.as -type=bed12+
-#############################################################################
+<<<<<<< Updated upstream
 
 #############################################################################
 # gnomAD v2.1.1 update, ChrisL 12-2-2020
 #############################################################################
 # See /hive/data/inside/gnomAD/v2.1.1/run.sh for more information, listed
 # here are the important steps:
 WORKDIR=/hive/data/inside/gnomAD/v2.1.1/
 cd $WORKDIR
 db="hg19"
 cd $db
 
 time parallel -j15 --joblog exomes.run.log --plus "vcfToBed -fields=${fields} {} exomes/{/..}.bed" ::: /hive/data/outside/gnomAD.2/v2.1.1/exomes/*.bgz
 # real    16m42.939s
 # user    172m26.966s
 # sys 1m41.186s
@@ -34201,31 +34201,65 @@
 bedToBigBed sorted_MGI_Exome_Capture_V4.bed hg19.chrom.sizes MGI_Exome_Capture_V4.bb
 
 --
 
 The following files from Roche had long entries in col4, causing these files to have rows that were too long for bedToBigBed. Therefore, all the input bed files had col4 cut. (Note: these were just the ensembl and ccds ids, which did not provide any other substantial information.)
 
 We ran the command
 
 > cut -f1,2,3
 
 for all such files. Here's an example for the Roche - KAPA HyperExome Capture Probe:
 
 Footprint file:
 
 cut -f1,2,3 sorted-KAPA_HyperExome_hg19_capture_targets.bed > sorted-cut-KAPA_HyperExome_hg19_capture_targets.bed
+
+
 #############################################################################
+# haploinsufficiency from DECIPHER - DONE 3/18/2021 Jonathan
+
+# Download latest predictions list from https://decipher.sanger.ac.uk/about/downloads/data
+mkdir -p /hive/data/outside/decipher/haploinsufficiency
+cd /hive/data/outside/decipher/haploinsufficiency
+wget https://decipher.sanger.ac.uk/files/downloads/HI_Predictions_Version3.bed.gz
+filePath=`pwd`/HI_Predictions_Version3.bed.gz
+
+# zcat | head shows the file is nearly ready to go, but could benefit from a bit of reorganization
+# (also floating point score values don't work for some bed processors)
+
+mkdir -p /hive/data/genomes/hg19/bed/decipherHaplo
+cd /hive/data/genomes/hg19/bed/decipherHaplo
+
+printf 'chomp;
+@fields = split /\t/;
+($gene, $score, $pct) = split /\|/, $fields[3];
+$fields[3] = $gene;
+$fields[4] = 0;
+push @fields, ($pct, $score);
+push @fields, ("$gene, HI: $pct");
+print join ("\t", @fields) . "\n";
+' > parse.pl
+
+zcat $filePath | tail -n +2 | perl -nf parse.pl | bedSort stdin HI_Predictions.bed
+
+bedToBigBed HI_Predictions.bed -type=bed9+3 -as=$HOME/kent/src/hg/lib/haploinsufficiency.as -tab ../../chrom.sizes haploinsufficiency.bb
+
+mkdir -p /gbdb/hg19/bbi/haploins/
+cd /gbdb/hg19/bbi/haploins/
+ln -s /hive/data/genomes/hg19/bed/decipherHaplo/haploinsufficiency.bb .
+
 
 #############################################################################
 # skinSoleBoldo JimK 01-14-2020
 # This describes how we got the skinSoleBoldo data set into the
 # Genome Browser from the Cell Browser.
 #############################################################################
 
 # Create working directory and go there
 mkdir /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo
 cd /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo
 
 # Create output dir for binaries
 mkdir bbi
 
 # Downloaded files from the UCSC cell browser's as so