37befdf6676de0022c60966291e21e5977dba086
chmalee
  Fri Jun 19 13:46:31 2020 -0700
Adding clingen cnv and dosage otto job to source tree now that it's been running for a while, refs #25562

diff --git src/hg/utils/otto/clinGen/makeDosage.sh src/hg/utils/otto/clinGen/makeDosage.sh
new file mode 100755
index 0000000..8704a59
--- /dev/null
+++ src/hg/utils/otto/clinGen/makeDosage.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# downloads and builds the curated ClinGen Curated CNVS track
+# assumes running in the build directory:
+# /hive/data/outside/otto/clinGen/
+set -beEu -o pipefail
+
+WORKDIR=$1
+mkdir -p ${WORKDIR}/clinGenDosage
+cd ${WORKDIR}/clinGenDosage
+
+echo "user anonymous
+ls ClinGen*
+bye" > ftp.dosage.cmds
+
+if [ -e release.list ]
+then
+    mv release.list prev.release.list
+fi
+touch prev.release.list
+rm -f release.list
+
+# connect and list a directory, result to file: ls.check
+ftp -n -v -i ftp.clinicalgenome.org 2>&1 < ftp.dosage.cmds &> ls.check
+grep "haplo\|triplo\|curation" ls.check | sort > release.list || echo "Error - no bed files found"
+
+# see if anything is changing, if so, notify, download, and build
+diff prev.release.list release.list > release.diff || true
+count=`wc -l release.diff | cut -d' ' -f1`
+if [ "${count}" -gt 1 ]
+then
+    echo "New ClinGen Dosage update"
+    today=`date +%F`
+    mkdir -p ${today}/{download,output}
+    cd ${today}/download
+    for grc in GRCh37 GRCh38
+    do
+        wget -N -q "ftp://ftp.clinicalgenome.org/ClinGen_haploinsufficiency_gene_${grc}.bed"
+        wget -N -q "ftp://ftp.clinicalgenome.org/ClinGen_triplosensitivity_gene_${grc}.bed"
+        wget -N -q "ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_${grc}.tsv"
+        wget -N -q "ftp://ftp.clinicalgenome.org/ClinGen_gene_curation_list_${grc}.tsv"
+        if [ ${grc} == "GRCh37" ]
+        then
+            echo $grc
+            ../../../processClinGenDosage.py ClinGen_haploinsufficiency_gene_${grc}.bed ClinGen_gene_curation_list_${grc}.tsv haplo | sort -k1,1 -k2,2n > ../output/hg19.clinGenHaplo.bed
+            ../../../processClinGenDosage.py ClinGen_triplosensitivity_gene_${grc}.bed ClinGen_gene_curation_list_${grc}.tsv triplo | sort -k1,1 -k2,2n > ../output/hg19.clinGenTriplo.bed
+            bedToBigBed -type=bed9+10 -as=../../clinGenDosageHaplo.as -tab ../output/hg19.clinGenHaplo.bed /hive/data/genomes/hg19/chrom.sizes ../output/hg19.clinGenHaplo.bb
+            bedToBigBed -type=bed9+10 -as=../../clinGenDosageTriplo.as -tab ../output/hg19.clinGenTriplo.bed /hive/data/genomes/hg19/chrom.sizes ../output/hg19.clinGenTriplo.bb
+            cp ../output/hg19.clinGenHaplo.bb ${WORKDIR}/release/hg19/clinGenHaplo.bb
+            cp ../output/hg19.clinGenTriplo.bb ${WORKDIR}/release/hg19/clinGenTriplo.bb
+        elif [ ${grc} == "GRCh38" ]
+        then
+            echo $grc
+            ../../../processClinGenDosage.py ClinGen_haploinsufficiency_gene_${grc}.bed ClinGen_gene_curation_list_${grc}.tsv haplo | sort -k1,1 -k2,2n > ../output/hg38.clinGenHaplo.bed
+            ../../../processClinGenDosage.py ClinGen_triplosensitivity_gene_${grc}.bed ClinGen_gene_curation_list_${grc}.tsv triplo | sort -k1,1 -k2,2n > ../output/hg38.clinGenTriplo.bed
+            bedToBigBed -type=bed9+10 -as=../../clinGenDosageHaplo.as -tab ../output/hg38.clinGenHaplo.bed /hive/data/genomes/hg38/chrom.sizes ../output/hg38.clinGenHaplo.bb
+            bedToBigBed -type=bed9+10 -as=../../clinGenDosageTriplo.as -tab ../output/hg38.clinGenTriplo.bed /hive/data/genomes/hg38/chrom.sizes ../output/hg38.clinGenTriplo.bb
+            cp ../output/hg38.clinGenHaplo.bb ${WORKDIR}/release/hg38/clinGenHaplo.bb
+            cp ../output/hg38.clinGenTriplo.bb ${WORKDIR}/release/hg38/clinGenTriplo.bb
+        fi
+    done
+    cd ../..
+else
+    echo "No ClinGen CNV update"
+fi