76607ba1c1c488b88d7121ce6e78c2cb5d0c59ca max Tue Mar 10 02:51:37 2020 -0700 changes after code review, refs #25105 diff --git src/utils/doClinvarLift src/utils/doClinvarLift index 8c3f7c1..ad8689f 100755 --- src/utils/doClinvarLift +++ src/utils/doClinvarLift @@ -1,64 +1,64 @@ #!/usr/bin/bash # first argument is the db, e.g. mm10 set -e # stop on error set -o pipefail # stop on errors even in pipes if [ "$1" = "" ] ; then echo usage: doClinvarLift '[db] - create clinvarLift track with human SNVs lifted to db' exit 1 fi db=$1 outDir=/hive/data/genomes/$db/bed/clinvarLift echo making directory $outDir mkdir -p $outDir cd $outDir echo Dumping clinvar bigBedToBed /hive/data/outside/otto/clinvar/clinvarMain.hg38.bb stdout > clinvar.bed # drop the long ones, they are unlikely to be useful cat clinvar.bed | tawk '($3-$2<10)' > clinvarLift.bed # need to do this twice so make a function function addPosAndSeq () { # add the position and sequence to the bed file as fields 13 and 14 # arguments: inputfile db outputfile echo Adding position and sequences to $1, for db $2, output into $3 - cat $1 | cut -f1-3 | tawk '{$4=$1":"$2"-"$3;print;}'> tmp.bed4 + cat $1 | cut -f1-3 | tawk '{$4=$1":"($2+1)"-"$3;print;}'> tmp.bed4 twoBitToFa -bed=tmp.bed4 /hive/data/genomes/$2/$2.2bit tmp.fa faToTab tmp.fa stdout | tawk '{$2=toupper($2); print}' > tmp.faTab cut -f1-12 $1 > tmp.part1 cut -f13- $1 > tmp.part2 paste tmp.part1 tmp.faTab tmp.part2 > $3 rm -f tmp.part1 tmp.faTab tmp.bed4 tmp.part2 } addPosAndSeq clinvarLift.bed hg38 clinvarLift.withPos.bed # uppercase first letter of db dbUp="$(tr '[:lower:]' '[:upper:]' <<< ${db:0:1})${db:1}" liftOver clinvarLift.withPos.bed /gbdb/hg38/liftOver/hg38To$dbUp.over.chain.gz clinvarLift.$db.bed /dev/null -bedPlus=12 -tab -multiple addPosAndSeq clinvarLift.$db.bed $db clinvarLift.$db.seq.bed # remove column 13, the position in $db, as we have that already. # also remove features that are too long cut clinvarLift.$db.seq.bed -f1-12,14- | tawk '($3-$2<10)' | sort -k1,1 -k2,2n -S10G > clinvarLift.$db.bed cp ~/kent/src/hg/lib/clinvarLift.as ./ sed -i s/DB/$db/g ./clinvarLift.as bedToBigBed clinvarLift.$db.bed -tab -as=clinvarLift.as /hive/data/genomes/$db/chrom.sizes clinvarLift.bb -type=bed12+ if [ ! -e /gbdb/$db/bbi/clinvarLift.bb ]; then ln -s `pwd`/clinvarLift.bb /gbdb/$db/bbi/clinvarLift.bb fi echo clinvarLift job for $db done. echo Suggested makeDoc entry: echo '##########################################################################' echo clinvar lift '('DONE, `date`, $USER')' echo featureBits hg38 clinvarLift.bed featureBits hg38 clinvarLift.bed echo wc -l clinvarLift.bed wc -l clinvarLift.bed echo featureBits $db clinvarLift.$db.bed featureBits $db clinvarLift.$db.bed echo wc -l clinvarLift.$db.bed wc -l clinvarLift.$db.bed echo '##########################################################################'