37befdf6676de0022c60966291e21e5977dba086 chmalee Fri Jun 19 13:46:31 2020 -0700 Adding clingen cnv and dosage otto job to source tree now that it's been running for a while, refs #25562 diff --git src/hg/utils/otto/clinGen/makeDosage.sh src/hg/utils/otto/clinGen/makeDosage.sh new file mode 100755 index 0000000..8704a59 --- /dev/null +++ src/hg/utils/otto/clinGen/makeDosage.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# downloads and builds the curated ClinGen Curated CNVS track +# assumes running in the build directory: +# /hive/data/outside/otto/clinGen/ +set -beEu -o pipefail + +WORKDIR=$1 +mkdir -p ${WORKDIR}/clinGenDosage +cd ${WORKDIR}/clinGenDosage + +echo "user anonymous +ls ClinGen* +bye" > ftp.dosage.cmds + +if [ -e release.list ] +then + mv release.list prev.release.list +fi +touch prev.release.list +rm -f release.list + +# connect and list a directory, result to file: ls.check +ftp -n -v -i ftp.clinicalgenome.org 2>&1 < ftp.dosage.cmds &> ls.check +grep "haplo\|triplo\|curation" ls.check | sort > release.list || echo "Error - no bed files found" + +# see if anything is changing, if so, notify, download, and build +diff prev.release.list release.list > release.diff || true +count=`wc -l release.diff | cut -d' ' -f1` +if [ "${count}" -gt 1 ] +then + echo "New ClinGen Dosage update" + today=`date +%F` + mkdir -p ${today}/{download,output} + cd ${today}/download + for grc in GRCh37 GRCh38 + do + wget -N -q "ftp://ftp.clinicalgenome.org/ClinGen_haploinsufficiency_gene_${grc}.bed" + wget -N -q "ftp://ftp.clinicalgenome.org/ClinGen_triplosensitivity_gene_${grc}.bed" + wget -N -q "ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_${grc}.tsv" + wget -N -q "ftp://ftp.clinicalgenome.org/ClinGen_gene_curation_list_${grc}.tsv" + if [ ${grc} == "GRCh37" ] + then + echo $grc + ../../../processClinGenDosage.py ClinGen_haploinsufficiency_gene_${grc}.bed ClinGen_gene_curation_list_${grc}.tsv haplo | sort -k1,1 -k2,2n > ../output/hg19.clinGenHaplo.bed + ../../../processClinGenDosage.py ClinGen_triplosensitivity_gene_${grc}.bed ClinGen_gene_curation_list_${grc}.tsv triplo | sort -k1,1 -k2,2n > ../output/hg19.clinGenTriplo.bed + bedToBigBed -type=bed9+10 -as=../../clinGenDosageHaplo.as -tab ../output/hg19.clinGenHaplo.bed /hive/data/genomes/hg19/chrom.sizes ../output/hg19.clinGenHaplo.bb + bedToBigBed -type=bed9+10 -as=../../clinGenDosageTriplo.as -tab ../output/hg19.clinGenTriplo.bed /hive/data/genomes/hg19/chrom.sizes ../output/hg19.clinGenTriplo.bb + cp ../output/hg19.clinGenHaplo.bb ${WORKDIR}/release/hg19/clinGenHaplo.bb + cp ../output/hg19.clinGenTriplo.bb ${WORKDIR}/release/hg19/clinGenTriplo.bb + elif [ ${grc} == "GRCh38" ] + then + echo $grc + ../../../processClinGenDosage.py ClinGen_haploinsufficiency_gene_${grc}.bed ClinGen_gene_curation_list_${grc}.tsv haplo | sort -k1,1 -k2,2n > ../output/hg38.clinGenHaplo.bed + ../../../processClinGenDosage.py ClinGen_triplosensitivity_gene_${grc}.bed ClinGen_gene_curation_list_${grc}.tsv triplo | sort -k1,1 -k2,2n > ../output/hg38.clinGenTriplo.bed + bedToBigBed -type=bed9+10 -as=../../clinGenDosageHaplo.as -tab ../output/hg38.clinGenHaplo.bed /hive/data/genomes/hg38/chrom.sizes ../output/hg38.clinGenHaplo.bb + bedToBigBed -type=bed9+10 -as=../../clinGenDosageTriplo.as -tab ../output/hg38.clinGenTriplo.bed /hive/data/genomes/hg38/chrom.sizes ../output/hg38.clinGenTriplo.bb + cp ../output/hg38.clinGenHaplo.bb ${WORKDIR}/release/hg38/clinGenHaplo.bb + cp ../output/hg38.clinGenTriplo.bb ${WORKDIR}/release/hg38/clinGenTriplo.bb + fi + done + cd ../.. +else + echo "No ClinGen CNV update" +fi