76607ba1c1c488b88d7121ce6e78c2cb5d0c59ca max Tue Mar 10 02:51:37 2020 -0700 changes after code review, refs #25105 diff --git src/utils/doClinvarLift src/utils/doClinvarLift index 8c3f7c1..ad8689f 100755 --- src/utils/doClinvarLift +++ src/utils/doClinvarLift @@ -12,31 +12,31 @@ outDir=/hive/data/genomes/$db/bed/clinvarLift echo making directory $outDir mkdir -p $outDir cd $outDir echo Dumping clinvar bigBedToBed /hive/data/outside/otto/clinvar/clinvarMain.hg38.bb stdout > clinvar.bed # drop the long ones, they are unlikely to be useful cat clinvar.bed | tawk '($3-$2<10)' > clinvarLift.bed # need to do this twice so make a function function addPosAndSeq () { # add the position and sequence to the bed file as fields 13 and 14 # arguments: inputfile db outputfile echo Adding position and sequences to $1, for db $2, output into $3 - cat $1 | cut -f1-3 | tawk '{$4=$1":"$2"-"$3;print;}'> tmp.bed4 + cat $1 | cut -f1-3 | tawk '{$4=$1":"($2+1)"-"$3;print;}'> tmp.bed4 twoBitToFa -bed=tmp.bed4 /hive/data/genomes/$2/$2.2bit tmp.fa faToTab tmp.fa stdout | tawk '{$2=toupper($2); print}' > tmp.faTab cut -f1-12 $1 > tmp.part1 cut -f13- $1 > tmp.part2 paste tmp.part1 tmp.faTab tmp.part2 > $3 rm -f tmp.part1 tmp.faTab tmp.bed4 tmp.part2 } addPosAndSeq clinvarLift.bed hg38 clinvarLift.withPos.bed # uppercase first letter of db dbUp="$(tr '[:lower:]' '[:upper:]' <<< ${db:0:1})${db:1}" liftOver clinvarLift.withPos.bed /gbdb/hg38/liftOver/hg38To$dbUp.over.chain.gz clinvarLift.$db.bed /dev/null -bedPlus=12 -tab -multiple addPosAndSeq clinvarLift.$db.bed $db clinvarLift.$db.seq.bed # remove column 13, the position in $db, as we have that already.