2fd288fe28c984b85b1c51fff4ea5ce46caad329
hiram
  Thu Aug 13 19:17:29 2020 -0700
adding ncbiRefSeqGenomicDiff track to hg19 refs #20215

diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt
index 9185f86..8b6f888 100644
--- src/hg/makeDb/doc/hg19.txt
+++ src/hg/makeDb/doc/hg19.txt
@@ -33667,29 +33667,60 @@
 642.857786 ********************************* 248
 714.286429 *************************** 80
 785.715071 ************** 10
 857.143714 ******** 4
 928.572357 *************** 12
 
 # table looks OK, load it
 set lib = ~/kent/src/hg/lib
 hgLoadBed hg19 -noBin -tab -type=bed6+4 \
         -as=$lib/gtexGeneBed.as -sqlTable=$lib/gtexGeneBed.sql -renameSqlTable \
                 gtexGeneV8 gtexGeneBedV8.bed
 # Read 54481 elements of size 10 from gtexGeneBedV8.bed
 
 # TODO: Add to gene sorter
 
+#############################################################################
+# Build ncbiRefSeqGenomicDiff (DONE - 2020-06-10 - Hiram)
+    mkdir /hive/data/genomes/hg19/bed/ncbiRefSeqAnomalies.p13
+    cd /hive/data/genomes/hg19/bed/ncbiRefSeqAnomalies.p13
+
+    db=hg19
+    pre=ncbiRefSeqGenomicDiff
+    buildDir=/hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2020-01-21
+    asmId=GCF_000001405.25_GRCh37.p13
+
+    time (zcat $buildDir/process/$asmId.rna.cds.gz \
+        | egrep '[0-9]+\.\.[0-9]+' \
+        | pslMismatchGapToBed -cdsFile=stdin -db=$db -ignoreQNamePrefix=X \
+            $buildDir/process/$asmId.$db.psl.gz \
+            /hive/data/genomes/$db/$db.2bit \
+            $buildDir/$db.rna.fa \
+            $pre)
+
+# pslMismatchGapToBed: NM_001365372.1 gapIx 9 shifted right 74 bases, but next block size is only 38; report to NCBI
+# pslMismatchGapToBed: NM_001288811.1 gapIx 1 shifted left 6 bases, but previous block size is only 5; report to NCBI
+
+#  real    0m21.265s
+
+ bedToBigBed -type=bed9+ -tab -as=$HOME/kent/src/hg/lib/txAliDiff.as $pre.bed \
+        /hive/data/genomes/$db/chrom.sizes $pre.bb
+# pass1 - making usageList (180 chroms): 77 millis
+# pass2 - checking and writing primary data (27362 records, 20 fields): 234 millis
+    ln -sf `pwd`/$pre.bb /gbdb/hg19/ncbiRefSeq/$pre.bb
+
+#############################################################################
+
 # clinvarSub track  IN PROGRESS BRANEY 8/4/20
 mkdir /cluster/data/hg19/bed/clinvarSub
 cd /cluster/data/hg19/bed/clinvarSub
 bigBedToBed /gbdb/hg19/bbi/clinvar/clinvarMain.bb stdout | tawk '{print $40, $1,$2,$2,$4}' | sort > sort.main.bed
 hgsql hg19 -Ne "select varId,clinSign from clinvarSub" | sort -n  > clinSign.txt
 join -t $'\t' sort.main.bed clinSign.txt   | uniq -c | sed 's/^ *//' | sed 's/ /\t/' | tawk '{print $3,$4,$5, $6, $7, $1}' | sort -k1,1 -k2,2n > sorted.bed
 # fawk written by "hand" 
 tawk -f fawk sorted.bed > bigBedInput.bed
 bedToBigBed -tab bigBedInput.bed /cluster/data/hg19/chrom.sizes clinsub.bb
 ln -s `pwd`/clinsub.bb /gbdb/hg19
 
 #############################################################################
 # Trios for Genome In a Bottle - DONE 08/04/2020 ChrisL
 # see ~/kent/src/hg/makeDb/giab/make.txt