37befdf6676de0022c60966291e21e5977dba086 chmalee Fri Jun 19 13:46:31 2020 -0700 Adding clingen cnv and dosage otto job to source tree now that it's been running for a while, refs #25562 diff --git src/hg/utils/otto/clinGen/makeCnv.sh src/hg/utils/otto/clinGen/makeCnv.sh new file mode 100755 index 0000000..6462b6b --- /dev/null +++ src/hg/utils/otto/clinGen/makeCnv.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# downloads and builds the curated ClinGen Curated CNVS track +# assumes running in the build directory: +# /hive/data/outside/otto/clinGen/ +set -beEu -o pipefail + +WORKDIR=$1 +mkdir -p ${WORKDIR}/clinGenCnv +cd ${WORKDIR}/clinGenCnv + +echo "user anonymous otto@soe.ucsc.edu +cd /pub/dbVar/data/Homo_sapiens/by_study/gvf +ls nstd45* +bye" > ftp.isca.rsp + +if [ -e release.list ] +then + mv release.list prev.release.list +fi +touch prev.release.list +rm -f release.list + +# connect and list a directory, result to file: ls.check +ftp -n -v -i ftp.ncbi.nlm.nih.gov 2>&1 < ftp.isca.rsp &> ls.check +grep "nstd45.*gvf.gz" ls.check | sort > release.list || echo "Error - no gvf files found" + +# see if anything is changing, if so, notify, download, and build +diff prev.release.list release.list > release.diff || true +count=`wc -l release.diff | cut -d' ' -f1` +if [ "${count}" -gt 1 ] +then + echo "New ClinGen CNV update" + today=`date +%F` + mkdir -p ${today}/{download,output} + cd ${today}/download + hgsql -Ne 'select 0, ca.alias, size, ca.chrom, size from chromInfo ci join chromAlias ca on ci.chrom = ca.chrom where source = "refseq"' hg19 > hg19.lift + hgsql -Ne 'select 0, ca.alias, size, ca.chrom, size from chromInfo ci join chromAlias ca on ci.chrom = ca.chrom where source = "refseq"' hg38 > hg38.lift + for grc in GRCh37 GRCh38 + do + wget -N -q "ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/gvf/nstd45.${grc}.variant*.gvf.gz" + if [ ${grc} == "GRCh37" ] + then + zcat nstd45.${grc}.* | ../../../processClinGenCnv.py stdin hg19.lift | sort -k1,1 -k2,2n | bedClip -truncate stdin /hive/data/genomes/hg19/chrom.sizes stdout > ../output/hg19.clinGenCnv.bed + bedToBigBed -type=bed9+17 -as=../../clinGenCnv.as -tab ../output/hg19.clinGenCnv.bed /hive/data/genomes/hg19/chrom.sizes ../output/hg19.clinGenCnv.bb + cp ../output/hg19.clinGenCnv.bb ${WORKDIR}/release/hg19/clinGenCnv.bb + elif [ ${grc} == "GRCh38" ] + then + zcat nstd45.${grc}.* | ../../../processClinGenCnv.py stdin hg38.lift | sort -k1,1 -k2,2n | bedClip -truncate stdin /hive/data/genomes/hg38/chrom.sizes stdout > ../output/hg38.clinGenCnv.bed + bedToBigBed -type=bed9+17 -as=../../clinGenCnv.as -tab ../output/hg38.clinGenCnv.bed /hive/data/genomes/hg38/chrom.sizes ../output/hg38.clinGenCnv.bb + cp ../output/hg38.clinGenCnv.bb ${WORKDIR}/release/hg38/clinGenCnv.bb + fi + done + cd .. +else + echo "No ClinGen CNV update" +fi