src/utils/doClinvarLift d74dfc1624d496349de6061ed0fb1e068318291d

d74dfc1624d496349de6061ed0fb1e068318291d
max
  Wed Mar 4 02:23:29 2020 -0800
adding help message to doClinvarLift, refs #24825

diff --git src/utils/doClinvarLift src/utils/doClinvarLift
index 365ee96..8c3f7c1 100755
--- src/utils/doClinvarLift
+++ src/utils/doClinvarLift
@@ -1,46 +1,64 @@
 #!/usr/bin/bash
 # first argument is the db, e.g. mm10
 set -e # stop on error
 set -o pipefail # stop on errors even in pipes
 
+if [ "$1" = "" ] ; then
+        echo usage: doClinvarLift '[db] - create clinvarLift track with human SNVs lifted to db'
+        exit 1
+fi
+
 db=$1
-outDir=/hive/data/genomes/$1/bed/clinvarLift
+outDir=/hive/data/genomes/$db/bed/clinvarLift
 
 echo making directory $outDir
 mkdir -p $outDir
 cd $outDir
 echo Dumping clinvar
 bigBedToBed /hive/data/outside/otto/clinvar/clinvarMain.hg38.bb stdout > clinvar.bed
 # drop the long ones, they are unlikely to be useful
-cat clinvar.bed | tawk '($3-$2<10)' > clinvarShort.bed
+cat clinvar.bed | tawk '($3-$2<10)' > clinvarLift.bed
 
 # need to do this twice so make a function
 function addPosAndSeq () {
         # add the position and sequence to the bed file as fields 13 and 14
         # arguments: inputfile db outputfile
         echo Adding position and sequences to $1, for db $2, output into $3
         cat $1 | cut -f1-3 | tawk '{$4=$1":"$2"-"$3;print;}'> tmp.bed4
         twoBitToFa -bed=tmp.bed4 /hive/data/genomes/$2/$2.2bit tmp.fa
         faToTab tmp.fa stdout | tawk '{$2=toupper($2); print}' > tmp.faTab
         cut -f1-12 $1 > tmp.part1
         cut -f13- $1 > tmp.part2
         paste tmp.part1 tmp.faTab tmp.part2 > $3
         rm -f tmp.part1 tmp.faTab tmp.bed4 tmp.part2
 }
 
-addPosAndSeq clinvarShort.bed hg38 clinvarShort.withPos.bed
+addPosAndSeq clinvarLift.bed hg38 clinvarLift.withPos.bed
 
 # uppercase first letter of db
 dbUp="$(tr '[:lower:]' '[:upper:]' <<< ${db:0:1})${db:1}"
-liftOver clinvarShort.withPos.bed /gbdb/hg38/liftOver/hg38To$dbUp.over.chain.gz clinvarShort.$db.bed /dev/null -bedPlus=12 -tab -multiple
-addPosAndSeq clinvarShort.$db.bed $db clinvarShort.$db.seq.bed
+liftOver clinvarLift.withPos.bed /gbdb/hg38/liftOver/hg38To$dbUp.over.chain.gz clinvarLift.$db.bed /dev/null -bedPlus=12 -tab -multiple
+addPosAndSeq clinvarLift.$db.bed $db clinvarLift.$db.seq.bed
 # remove column 13, the position in $db, as we have that already.
 # also remove features that are too long
-cut clinvarShort.$db.seq.bed -f1-12,14- | tawk '($3-$2<10)' | sort -k1,1 -k2,2n -S10G > clinvarLift.$db.bed 
+cut clinvarLift.$db.seq.bed -f1-12,14- | tawk '($3-$2<10)' | sort -k1,1 -k2,2n -S10G > clinvarLift.$db.bed 
 cp ~/kent/src/hg/lib/clinvarLift.as ./
 sed -i s/DB/$db/g ./clinvarLift.as
 bedToBigBed clinvarLift.$db.bed -tab -as=clinvarLift.as /hive/data/genomes/$db/chrom.sizes clinvarLift.bb -type=bed12+
 if [ ! -e /gbdb/$db/bbi/clinvarLift.bb ]; then
     ln -s `pwd`/clinvarLift.bb /gbdb/$db/bbi/clinvarLift.bb
 fi
 echo clinvarLift job for $db done.
+echo Suggested makeDoc entry:
+echo '##########################################################################'
+echo clinvar lift '('DONE, `date`, $USER')'
+echo featureBits hg38 clinvarLift.bed
+featureBits hg38 clinvarLift.bed
+echo wc -l clinvarLift.bed
+wc -l clinvarLift.bed
+echo featureBits $db clinvarLift.$db.bed
+featureBits $db clinvarLift.$db.bed
+echo wc -l clinvarLift.$db.bed
+wc -l clinvarLift.$db.bed
+echo '##########################################################################'
+