54452ec022a6073410955c04e110a1784f71fb57
angie
  Wed Nov 13 17:37:34 2019 -0800
dbSnp153: add new ucscNote otherMapErr for mappings with the same rs# as a mapping w/inconsistent SPDI in BadCoords/Map Err subtrack.  refs #23283

diff --git src/hg/makeDb/doc/bigDbSnp.txt src/hg/makeDb/doc/bigDbSnp.txt
index 47fa08e..86d46a3 100644
--- src/hg/makeDb/doc/bigDbSnp.txt
+++ src/hg/makeDb/doc/bigDbSnp.txt
@@ -337,97 +337,112 @@
     freqSourceOrder=1000Genomes,GnomAD_exomes,TOPMED,ExAC,PAGE_STUDY,GnomAD,GoESP,Estonian,ALSPAC,TWINSUK,NorthernSweden,Vietnamese
     # Run doBigDbSnp.pl (first with -debug to make runDir):
     $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder -debug
 # *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-08
     cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-08
     # Link to ../bigDbSnp.2019-08-07/split, -continue convert to avoid re-splitting (the slowest part of the process):
     rmdir split
     ln -s ../bigDbSnp.2019-08-07/split split
     $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder \
       -buildDir=`pwd` -continue convert -stop install \
       >& do.log &
     tail -f do.log
 # *** All done !  (through the 'install' step)  Elapsed time: 504m48s
 # *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-08
 
+    # 11/12/19: Add new ucscNote otherMapErr by re-running from check onward.
+    $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder \
+      -buildDir=`pwd` -continue check -stop install \
+      >& check.log &
+    tail -f check.log
+# *** All done !  (through the 'install' step)  Elapsed time: 172m35s
+
     # count up how many variants have freq counts for each project
     cut -f 4 dbSnp153Details.tab \
     | perl -wne 'chomp; next unless $_; @w = split ",";
         if ($w[0]) { print "1000Genomes\n" }
         if ($w[1]) { print "GnomAD_exomes\n"; }
         if ($w[2]) { print "TOPMED\n" }
         if ($w[3]) { print "ExAC\n" }
         if ($w[4]) { print "PAGE_STUDY\n" }
         if ($w[5]) { print "GnomAD\n" }
         if ($w[6]) { print "GoESP\n" }
         if ($w[7]) { print "Estonian\n" }
         if ($w[8]) { print "ALSPAC\n" }
         if ($w[9]) { print "TWINSUK\n" }
         if ($w[10]) { print "NorthernSweden\n" }
         if ($w[11]) { print "Vietnamese\n" }' \
     | sort | uniq -c | sort -nr
 #437625009 TOPMED
 #211192420 GnomAD
 #84744375 1000Genomes
 #44888383 TWINSUK
 #44888383 ALSPAC
 #31397940 Estonian
 #16351632 NorthernSweden
 #12283940 GnomAD_exomes
 #10004052 Vietnamese
 #8854128 ExAC
 #1973841 GoESP
 #1323033 PAGE_STUDY
 
     # count up how many instances of each type of ucscNote:
-    cut -f 15 hg19.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c
+    time cut -f 15 hg19.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c
 #    10754 altIsAmbiguous
 #     5995 classMismatch
 #   454674 clinvar
 #   143860 clinvarBenign
 #     7932 clinvarConflicting
 #    96242 clinvarPathogenic
 #   114685 clusterError
 # 12184226 commonAll
 # 20540882 commonSome
 #  1377817 diffMajor
 #     7656 freqIsAmbiguous
 #    17684 freqNotRefAlt
 #   562157 multiMap
+#   113416 otherMapErr
 #107003090 overlapDiffClass
 # 16910407 overlapSameClass
 #662595470 rareAll
 #670952126 rareSome
 #      101 refIsAmbiguous
 #  3271878 refIsMinor
 #   136452 refIsRare
 #    37783 refIsSingleton
 #        4 refMismatch
 #  3813467 revStrand
+    # Check count of rs's with at least one bad mapping:
+    grep otherMapErr hg19.dbSnp153.checked.bigDbSnp | cut -f 4 | sort -u | wc -l
+#54871
 
-    cut -f 15 hg38.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c
+    time cut -f 15 hg38.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c
 #  10880 altIsAmbiguous
 #   6206 classMismatch
 # 453990 clinvar
 # 143730 clinvarBenign
 #   7950 clinvarConflicting
 #  95262 clinvarPathogenic
 # 128109 clusterError
 #12438325 commonAll
 #20902602 commonSome
 #1399094 diffMajor
 #   7756 freqIsAmbiguous
 #  32150 freqNotRefAlt
 # 132051 multiMap
+# 203580 otherMapErr
 #109991096 overlapDiffClass
 #17281744 overlapSameClass
 #681685476 rareAll
 #690149753 rareSome
 #    111 refIsAmbiguous
 #3360159 refIsMinor
 # 160723 refIsRare
 #  50865 refIsSingleton
 #     33 refMismatch
 #4532270 revStrand
+    # Check count of rs's with at least one bad mapping:
+    grep otherMapErr hg38.dbSnp153.checked.bigDbSnp | cut -f 4 | sort -u | wc -l
+#86258
 
 
 ##############################################################################