7355201a77e934e6e7f28242d4beb15d694e7c8d
braney
  Sun Oct 11 13:21:05 2020 -0700
initial work on Clinvar Submissions track #26330

diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt
index 70a0649..95522b0 100644
--- src/hg/makeDb/doc/hg19.txt
+++ src/hg/makeDb/doc/hg19.txt
@@ -33697,45 +33697,43 @@
             $buildDir/$db.rna.fa \
             $pre)
 
 # pslMismatchGapToBed: NM_001365372.1 gapIx 9 shifted right 74 bases, but next block size is only 38; report to NCBI
 # pslMismatchGapToBed: NM_001288811.1 gapIx 1 shifted left 6 bases, but previous block size is only 5; report to NCBI
 
 #  real    0m21.265s
 
  bedToBigBed -type=bed9+ -tab -as=$HOME/kent/src/hg/lib/txAliDiff.as $pre.bed \
         /hive/data/genomes/$db/chrom.sizes $pre.bb
 # pass1 - making usageList (180 chroms): 77 millis
 # pass2 - checking and writing primary data (27362 records, 20 fields): 234 millis
     ln -sf `pwd`/$pre.bb /gbdb/hg19/ncbiRefSeq/$pre.bb
 
 #############################################################################
-# clinvarSub track  IN PROGRESS BRANEY 8/4/20
-mkdir /cluster/data/hg19/bed/clinvarSub
-cd /cluster/data/hg19/bed/clinvarSub
-bigBedToBed /gbdb/hg19/bbi/clinvar/clinvarMain.bb stdout | tawk '{print $40, $1,$2,$2,$4}' | sort > sort.main.bed
-hgsql hg19 -Ne "select varId,clinSign from clinvarSub" | sort -n  > clinSign.txt
-
-join -t $'\t' sort.main.bed clinSign.txt | cut -f 2- | sort -k1,1 -k2,2n -k5,5 | tawk -f gawk   | tawk -f fawk > tmp1
-tawk '{print $1":"$2 + 1"-"$3"<BR>Variants (submissions):"$11}' tmp1 > tmp2
+# clinvarSubLolly track  IN PROGRESS BRANEY 10/11/2020
+mkdir /cluster/data/hg19/bed/clinvarSubLolly
+cd /cluster/data/hg19/bed/clinvarSubLolly
+bigBedToBed /gbdb/hg19/bbi/clinvar/clinvarMain.bb stdout | tawk '{print $40, $1,$2,$3,$4}' | sort  -S 40g > sort.main.bed
+hgsql hg19 -Ne "select varId,clinSign,scv from clinvarSub" | sort  -S 40g  > clinvarSubSub.txt
+
+join -t $'\t' sort.main.bed clinvarSubSub.txt | tawk '{print $2,$3,$4,$5,$6,$1, $7}' | sort -S 40g -k1,1 -k2,2n -k5,5 | tawk -f makeFranklin   | tawk -f assignColors > tmp1
+tawk '{print $1":"$2 + 1"-"$3"←Variants (submissions):"$11}' tmp1 > tmp2
 paste tmp1 tmp2 > bigBedInput.bed
 
-#join -t $'\t' sort.main.bed clinSign.txt   | uniq -c | sed 's/^ *//' | sed 's/ /\t/' | tawk '{print $3,$4,$5, $6, $7, $1}' | sort -k1,1 -k2,2n > sorted.bed
-# fawk written by "hand" 
-#tawk -f fawk sorted.bed > bigBedInput.bed
-bedToBigBed -as=clinsub.as -type=bed9+2 -tab bigBedInput.bed /cluster/data/hg19/chrom.sizes clinsub.bb
-ln -s `pwd`/clinsub.bb /gbdb/hg19
+bedToBigBed -as=$HOME/kent/src/hg/lib/clinvarSubLolly.as -type=bed9+5 -tab bigBedInput.bed /cluster/data/hg19/chrom.sizes clinvarSubLolly.bb
+mkdir -p /gbdb/hg19/clinvarSubLolly
+ln -s `pwd`/clinvarSubLolly.bb /gbdb/hg19/clinvarSubLolly/clinvarSubLolly.bb
 
 #############################################################################
 # Trios for Genome In a Bottle - DONE 08/04/2020 ChrisL
 # see ~/kent/src/hg/makeDb/giab/make.txt
 
 #############################################################################
 # COVID GWAS from  COVID-19 Host Genetics Initiative  Sep 2020  Kate
 # see ~kent/src/hg/makeDb/doc/covid/covidHgiGwas.txt
 
 #############################################################################
 # gnomAD PEXT scores
 
 # PEXT data:
 # The baselevel is the sum of the expression value for all transcripts touching that base
 # The annotation-level is the sum of the expression of transcripts on which a variant has a