a1cbac0f4ffff0ec3f9f709e48ef04fcc9769aa3
max
  Fri Jan 24 08:00:44 2020 -0800
adding a do script for clinvar lift track and docs page, refs #24825
(Not sure what to do about makedocs for an automated track like this)

diff --git src/utils/doClinvarLift src/utils/doClinvarLift
new file mode 100755
index 0000000..365ee96
--- /dev/null
+++ src/utils/doClinvarLift
@@ -0,0 +1,46 @@
+#!/usr/bin/bash
+# first argument is the db, e.g. mm10
+set -e # stop on error
+set -o pipefail # stop on errors even in pipes
+
+db=$1
+outDir=/hive/data/genomes/$1/bed/clinvarLift
+
+echo making directory $outDir
+mkdir -p $outDir
+cd $outDir
+echo Dumping clinvar
+bigBedToBed /hive/data/outside/otto/clinvar/clinvarMain.hg38.bb stdout > clinvar.bed
+# drop the long ones, they are unlikely to be useful
+cat clinvar.bed | tawk '($3-$2<10)' > clinvarShort.bed
+
+# need to do this twice so make a function
+function addPosAndSeq () {
+        # add the position and sequence to the bed file as fields 13 and 14
+        # arguments: inputfile db outputfile
+        echo Adding position and sequences to $1, for db $2, output into $3
+        cat $1 | cut -f1-3 | tawk '{$4=$1":"$2"-"$3;print;}'> tmp.bed4
+        twoBitToFa -bed=tmp.bed4 /hive/data/genomes/$2/$2.2bit tmp.fa
+        faToTab tmp.fa stdout | tawk '{$2=toupper($2); print}' > tmp.faTab
+        cut -f1-12 $1 > tmp.part1
+        cut -f13- $1 > tmp.part2
+        paste tmp.part1 tmp.faTab tmp.part2 > $3
+        rm -f tmp.part1 tmp.faTab tmp.bed4 tmp.part2
+}
+
+addPosAndSeq clinvarShort.bed hg38 clinvarShort.withPos.bed
+
+# uppercase first letter of db
+dbUp="$(tr '[:lower:]' '[:upper:]' <<< ${db:0:1})${db:1}"
+liftOver clinvarShort.withPos.bed /gbdb/hg38/liftOver/hg38To$dbUp.over.chain.gz clinvarShort.$db.bed /dev/null -bedPlus=12 -tab -multiple
+addPosAndSeq clinvarShort.$db.bed $db clinvarShort.$db.seq.bed
+# remove column 13, the position in $db, as we have that already.
+# also remove features that are too long
+cut clinvarShort.$db.seq.bed -f1-12,14- | tawk '($3-$2<10)' | sort -k1,1 -k2,2n -S10G > clinvarLift.$db.bed 
+cp ~/kent/src/hg/lib/clinvarLift.as ./
+sed -i s/DB/$db/g ./clinvarLift.as
+bedToBigBed clinvarLift.$db.bed -tab -as=clinvarLift.as /hive/data/genomes/$db/chrom.sizes clinvarLift.bb -type=bed12+
+if [ ! -e /gbdb/$db/bbi/clinvarLift.bb ]; then
+    ln -s `pwd`/clinvarLift.bb /gbdb/$db/bbi/clinvarLift.bb
+fi
+echo clinvarLift job for $db done.