d74dfc1624d496349de6061ed0fb1e068318291d max Wed Mar 4 02:23:29 2020 -0800 adding help message to doClinvarLift, refs #24825 diff --git src/utils/doClinvarLift src/utils/doClinvarLift index 365ee96..8c3f7c1 100755 --- src/utils/doClinvarLift +++ src/utils/doClinvarLift @@ -1,46 +1,64 @@ #!/usr/bin/bash # first argument is the db, e.g. mm10 set -e # stop on error set -o pipefail # stop on errors even in pipes +if [ "$1" = "" ] ; then + echo usage: doClinvarLift '[db] - create clinvarLift track with human SNVs lifted to db' + exit 1 +fi + db=$1 -outDir=/hive/data/genomes/$1/bed/clinvarLift +outDir=/hive/data/genomes/$db/bed/clinvarLift echo making directory $outDir mkdir -p $outDir cd $outDir echo Dumping clinvar bigBedToBed /hive/data/outside/otto/clinvar/clinvarMain.hg38.bb stdout > clinvar.bed # drop the long ones, they are unlikely to be useful -cat clinvar.bed | tawk '($3-$2<10)' > clinvarShort.bed +cat clinvar.bed | tawk '($3-$2<10)' > clinvarLift.bed # need to do this twice so make a function function addPosAndSeq () { # add the position and sequence to the bed file as fields 13 and 14 # arguments: inputfile db outputfile echo Adding position and sequences to $1, for db $2, output into $3 cat $1 | cut -f1-3 | tawk '{$4=$1":"$2"-"$3;print;}'> tmp.bed4 twoBitToFa -bed=tmp.bed4 /hive/data/genomes/$2/$2.2bit tmp.fa faToTab tmp.fa stdout | tawk '{$2=toupper($2); print}' > tmp.faTab cut -f1-12 $1 > tmp.part1 cut -f13- $1 > tmp.part2 paste tmp.part1 tmp.faTab tmp.part2 > $3 rm -f tmp.part1 tmp.faTab tmp.bed4 tmp.part2 } -addPosAndSeq clinvarShort.bed hg38 clinvarShort.withPos.bed +addPosAndSeq clinvarLift.bed hg38 clinvarLift.withPos.bed # uppercase first letter of db dbUp="$(tr '[:lower:]' '[:upper:]' <<< ${db:0:1})${db:1}" -liftOver clinvarShort.withPos.bed /gbdb/hg38/liftOver/hg38To$dbUp.over.chain.gz clinvarShort.$db.bed /dev/null -bedPlus=12 -tab -multiple -addPosAndSeq clinvarShort.$db.bed $db clinvarShort.$db.seq.bed +liftOver clinvarLift.withPos.bed /gbdb/hg38/liftOver/hg38To$dbUp.over.chain.gz clinvarLift.$db.bed /dev/null -bedPlus=12 -tab -multiple +addPosAndSeq clinvarLift.$db.bed $db clinvarLift.$db.seq.bed # remove column 13, the position in $db, as we have that already. # also remove features that are too long -cut clinvarShort.$db.seq.bed -f1-12,14- | tawk '($3-$2<10)' | sort -k1,1 -k2,2n -S10G > clinvarLift.$db.bed +cut clinvarLift.$db.seq.bed -f1-12,14- | tawk '($3-$2<10)' | sort -k1,1 -k2,2n -S10G > clinvarLift.$db.bed cp ~/kent/src/hg/lib/clinvarLift.as ./ sed -i s/DB/$db/g ./clinvarLift.as bedToBigBed clinvarLift.$db.bed -tab -as=clinvarLift.as /hive/data/genomes/$db/chrom.sizes clinvarLift.bb -type=bed12+ if [ ! -e /gbdb/$db/bbi/clinvarLift.bb ]; then ln -s `pwd`/clinvarLift.bb /gbdb/$db/bbi/clinvarLift.bb fi echo clinvarLift job for $db done. +echo Suggested makeDoc entry: +echo '##########################################################################' +echo clinvar lift '('DONE, `date`, $USER')' +echo featureBits hg38 clinvarLift.bed +featureBits hg38 clinvarLift.bed +echo wc -l clinvarLift.bed +wc -l clinvarLift.bed +echo featureBits $db clinvarLift.$db.bed +featureBits $db clinvarLift.$db.bed +echo wc -l clinvarLift.$db.bed +wc -l clinvarLift.$db.bed +echo '##########################################################################' +