a7533ba73cff353b993553668f9fb21df243f71b jcasper Thu Mar 18 09:54:14 2021 -0700 Makedoc for DECIPHER haploinsufficiency on hg19, refs #25707 diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt index d2d8582..ed4e1f1 100644 --- src/hg/makeDb/doc/hg19.txt +++ src/hg/makeDb/doc/hg19.txt @@ -33831,31 +33831,31 @@ -fileServer=hgwdev -smallClusterHub=hgwdev -workhorse=hgwdev \ GCF_000001405.25_GRCh37.p13 hg19) > do.log 2>&1 & # real 6m47.005s cat fb.ncbiRefSeq.hg19.txt # 93720294 bases of 2991710746 (3.133%) in intersection ############################################################################# # Covid-19 rare mutations, Max, Fri Oct 30 08:40:34 PDT 2020 # received table from qzhang02@rockefeller.edu, wrote to UCSC.txt cd /hive/data/genomes/hg19/bed/covidMuts/ dos2unix UCSC.txt cat UCSC.txt | tawk '{$1="chr"$1; chrom=$1; start=$2; rsId=$3; ref=$4; alt=$5; zygo=$6; gene=$7; genotype=$8; inh=$9; end=$2+length(ref); print chrom, start, end, ref">"alt, "0", ".", start, end, "0,0,0", "1", length(ref), "0", ref, alt, rsId, zygo, gene, genotype, inh;}' | grep -v chrchr > covidMuts.bed bedSort covidMuts.bed covidMuts.bed bedToBigBed -tab covidMuts.bed ../../chrom.sizes covidMuts.bb -as=../../hg19/bed/covidMuts/covidMuts.as -type=bed12+ -############################################################################# +<<<<<<< Updated upstream ############################################################################# # gnomAD v2.1.1 update, ChrisL 12-2-2020 ############################################################################# # See /hive/data/inside/gnomAD/v2.1.1/run.sh for more information, listed # here are the important steps: WORKDIR=/hive/data/inside/gnomAD/v2.1.1/ cd $WORKDIR db="hg19" cd $db time parallel -j15 --joblog exomes.run.log --plus "vcfToBed -fields=${fields} {} exomes/{/..}.bed" ::: /hive/data/outside/gnomAD.2/v2.1.1/exomes/*.bgz # real 16m42.939s # user 172m26.966s # sys 1m41.186s @@ -34201,31 +34201,65 @@ bedToBigBed sorted_MGI_Exome_Capture_V4.bed hg19.chrom.sizes MGI_Exome_Capture_V4.bb -- The following files from Roche had long entries in col4, causing these files to have rows that were too long for bedToBigBed. Therefore, all the input bed files had col4 cut. (Note: these were just the ensembl and ccds ids, which did not provide any other substantial information.) We ran the command > cut -f1,2,3 for all such files. Here's an example for the Roche - KAPA HyperExome Capture Probe: Footprint file: cut -f1,2,3 sorted-KAPA_HyperExome_hg19_capture_targets.bed > sorted-cut-KAPA_HyperExome_hg19_capture_targets.bed + + ############################################################################# +# haploinsufficiency from DECIPHER - DONE 3/18/2021 Jonathan + +# Download latest predictions list from https://decipher.sanger.ac.uk/about/downloads/data +mkdir -p /hive/data/outside/decipher/haploinsufficiency +cd /hive/data/outside/decipher/haploinsufficiency +wget https://decipher.sanger.ac.uk/files/downloads/HI_Predictions_Version3.bed.gz +filePath=`pwd`/HI_Predictions_Version3.bed.gz + +# zcat | head shows the file is nearly ready to go, but could benefit from a bit of reorganization +# (also floating point score values don't work for some bed processors) + +mkdir -p /hive/data/genomes/hg19/bed/decipherHaplo +cd /hive/data/genomes/hg19/bed/decipherHaplo + +printf 'chomp; +@fields = split /\t/; +($gene, $score, $pct) = split /\|/, $fields[3]; +$fields[3] = $gene; +$fields[4] = 0; +push @fields, ($pct, $score); +push @fields, ("$gene, HI: $pct"); +print join ("\t", @fields) . "\n"; +' > parse.pl + +zcat $filePath | tail -n +2 | perl -nf parse.pl | bedSort stdin HI_Predictions.bed + +bedToBigBed HI_Predictions.bed -type=bed9+3 -as=$HOME/kent/src/hg/lib/haploinsufficiency.as -tab ../../chrom.sizes haploinsufficiency.bb + +mkdir -p /gbdb/hg19/bbi/haploins/ +cd /gbdb/hg19/bbi/haploins/ +ln -s /hive/data/genomes/hg19/bed/decipherHaplo/haploinsufficiency.bb . + ############################################################################# # skinSoleBoldo JimK 01-14-2020 # This describes how we got the skinSoleBoldo data set into the # Genome Browser from the Cell Browser. ############################################################################# # Create working directory and go there mkdir /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo cd /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo # Create output dir for binaries mkdir bbi # Downloaded files from the UCSC cell browser's as so