a1cbac0f4ffff0ec3f9f709e48ef04fcc9769aa3 max Fri Jan 24 08:00:44 2020 -0800 adding a do script for clinvar lift track and docs page, refs #24825 (Not sure what to do about makedocs for an automated track like this) diff --git src/utils/doClinvarLift src/utils/doClinvarLift new file mode 100755 index 0000000..365ee96 --- /dev/null +++ src/utils/doClinvarLift @@ -0,0 +1,46 @@ +#!/usr/bin/bash +# first argument is the db, e.g. mm10 +set -e # stop on error +set -o pipefail # stop on errors even in pipes + +db=$1 +outDir=/hive/data/genomes/$1/bed/clinvarLift + +echo making directory $outDir +mkdir -p $outDir +cd $outDir +echo Dumping clinvar +bigBedToBed /hive/data/outside/otto/clinvar/clinvarMain.hg38.bb stdout > clinvar.bed +# drop the long ones, they are unlikely to be useful +cat clinvar.bed | tawk '($3-$2<10)' > clinvarShort.bed + +# need to do this twice so make a function +function addPosAndSeq () { + # add the position and sequence to the bed file as fields 13 and 14 + # arguments: inputfile db outputfile + echo Adding position and sequences to $1, for db $2, output into $3 + cat $1 | cut -f1-3 | tawk '{$4=$1":"$2"-"$3;print;}'> tmp.bed4 + twoBitToFa -bed=tmp.bed4 /hive/data/genomes/$2/$2.2bit tmp.fa + faToTab tmp.fa stdout | tawk '{$2=toupper($2); print}' > tmp.faTab + cut -f1-12 $1 > tmp.part1 + cut -f13- $1 > tmp.part2 + paste tmp.part1 tmp.faTab tmp.part2 > $3 + rm -f tmp.part1 tmp.faTab tmp.bed4 tmp.part2 +} + +addPosAndSeq clinvarShort.bed hg38 clinvarShort.withPos.bed + +# uppercase first letter of db +dbUp="$(tr '[:lower:]' '[:upper:]' <<< ${db:0:1})${db:1}" +liftOver clinvarShort.withPos.bed /gbdb/hg38/liftOver/hg38To$dbUp.over.chain.gz clinvarShort.$db.bed /dev/null -bedPlus=12 -tab -multiple +addPosAndSeq clinvarShort.$db.bed $db clinvarShort.$db.seq.bed +# remove column 13, the position in $db, as we have that already. +# also remove features that are too long +cut clinvarShort.$db.seq.bed -f1-12,14- | tawk '($3-$2<10)' | sort -k1,1 -k2,2n -S10G > clinvarLift.$db.bed +cp ~/kent/src/hg/lib/clinvarLift.as ./ +sed -i s/DB/$db/g ./clinvarLift.as +bedToBigBed clinvarLift.$db.bed -tab -as=clinvarLift.as /hive/data/genomes/$db/chrom.sizes clinvarLift.bb -type=bed12+ +if [ ! -e /gbdb/$db/bbi/clinvarLift.bb ]; then + ln -s `pwd`/clinvarLift.bb /gbdb/$db/bbi/clinvarLift.bb +fi +echo clinvarLift job for $db done.