acca3deffc05c4d8d11590a1cf3d893763254712
angie
  Thu Oct 31 13:43:05 2019 -0700
dbSnp153: Adding new ucscNotes suggested by Ana Benet: clinvar{Benign,Conflicting,Pathogenic}, rareAll, rareSome.  refs #23283

diff --git src/hg/makeDb/doc/bigDbSnp.txt src/hg/makeDb/doc/bigDbSnp.txt
index 5b27f29..ae7c3a7 100644
--- src/hg/makeDb/doc/bigDbSnp.txt
+++ src/hg/makeDb/doc/bigDbSnp.txt
@@ -317,107 +317,114 @@
       -buildDir=`pwd` -continue convert -stop install \
       >& redo.log &
     tail -f redo.log
 # *** All done !  (through the 'install' step)  Elapsed time: 263m59s
 # *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-08-07
 
     #*** uh-oh... when checkBigDbSnp failed, doCheck.sh did not fail... I guess backgrounding
     #*** the jobs and 'wait' hide errors?
     cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-08-07
     $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir -buildDir=`pwd` \
       -continue check -stop install \
       >& check.log &
     tail -f check.log
 
     # 9/19/19: and again after changing doBigDbSnp.pl to have args & wait on specific pids:
+    # 10/30/19: and again after adding new ucscNotes (#23283).
+    topDir=/hive/data/outside/dbSNP/153
+    freqSourceOrder=1000Genomes,GnomAD_exomes,TOPMED,ExAC,PAGE_STUDY,GnomAD,GoESP,Estonian,ALSPAC,TWINSUK,NorthernSweden,Vietnamese
     # Run doBigDbSnp.pl (first with -debug to make runDir):
     $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder -debug
-# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-09-19
-    cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-09-19
+# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-10-30
+    cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-10-30
     # Link to ../bigDbSnp.2019-08-07/split, -continue convert to avoid re-splitting (the slowest part of the process):
     rmdir split
     ln -s ../bigDbSnp.2019-08-07/split split
     $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder \
       -buildDir=`pwd` -continue convert -stop install \
       >& do.log &
     tail -f do.log
-# *** All done !  (through the 'install' step)  Elapsed time: 491m30s
-# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-09-19
+# *** All done !  (through the 'install' step)  Elapsed time: 472m19s
+# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-10-30
 
-    # 10/8/19: count up how many variants have freq counts for each project
+    # count up how many variants have freq counts for each project
+#TODO
     cut -f 4 dbSnp153Details.tab \
     | perl -wne 'chomp; next unless $_; @w = split ",";
         if ($w[0]) { print "1000Genomes\n" }
         if ($w[1]) { print "GnomAD_exomes\n"; }
         if ($w[2]) { print "TOPMED\n" }
         if ($w[3]) { print "ExAC\n" }
         if ($w[4]) { print "PAGE_STUDY\n" }
         if ($w[5]) { print "GnomAD\n" }
         if ($w[6]) { print "GoESP\n" }
         if ($w[7]) { print "Estonian\n" }
         if ($w[8]) { print "ALSPAC\n" }
         if ($w[9]) { print "TWINSUK\n" }
         if ($w[10]) { print "NorthernSweden\n" }
         if ($w[11]) { print "Vietnamese\n" }' \
     | sort | uniq -c | sort -nr
 #437625009 TOPMED
 #211192420 GnomAD
 #84744375 1000Genomes
 #44888383 TWINSUK
 #44888383 ALSPAC
 #31397940 Estonian
 #16351632 NorthernSweden
 #12283940 GnomAD_exomes
 #10004052 Vietnamese
 #8854128 ExAC
 #1973841 GoESP
 #1323033 PAGE_STUDY
 
-    # 10/11/19: count up how many instances of each type of ucscNote:
+    # count up how many instances of each type of ucscNote:
     cut -f 15 hg19.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c
 #    10747 altIsAmbiguous
 #     5701 classMismatch
 #   454656 clinvar
+#   143844 clinvarBenign
+#     7932 clinvarConflicting
+#    96242 clinvarPathogenic
 #   113678 clusterError
 # 12178426 commonAll
 # 20534330 commonSome
 #  3522349 diffMajor
 #     7649 freqIsAmbiguous
 #    25413 freqNotRefAlt
 #   561309 multiMap
 #106940656 overlapDiffClass
 # 16890303 overlapSameClass
+#662571654 rareAll
+#670927558 rareSome
 #      101 refIsAmbiguous
 # 16032028 refIsMinor
 #   142937 refIsRare
 #    44382 refIsSingleton
 #        4 refMismatch
 #  3813390 revStrand
     cut -f 15 hg38.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c
 #    10873 altIsAmbiguous
 #     5864 classMismatch
 #   453954 clinvar
+#   143696 clinvarBenign
+#     7950 clinvarConflicting
+#    95262 clinvarPathogenic
 #   126973 clusterError
 # 12430253 commonAll
 # 20893174 commonSome
 #  3573503 diffMajor
 #     7749 freqIsAmbiguous
 #    39038 freqNotRefAlt
 #   132015 multiMap
 #109838613 overlapDiffClass
 # 17228657 overlapSameClass
+#681626796 rareAll
+#690089717 rareSome
 #      111 refIsAmbiguous
 # 16277729 refIsMinor
 #   166192 refIsRare
 #    56491 refIsSingleton
 #       33 refMismatch
 #  4512600 revStrand
 
-    # 10/18/19: add subset tracks
-    # I added new commands to the bigBed and install steps of doBigDbSnp.pl,
-    # ran -debug and copy-pasted commands from the generated scripts.
-    $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 152 $freqSourceOrder \
-       -buildDir=`pwd` -continue=bigBed -stop=install \
-       -debug
-
 
 ##############################################################################