54452ec022a6073410955c04e110a1784f71fb57 angie Wed Nov 13 17:37:34 2019 -0800 dbSnp153: add new ucscNote otherMapErr for mappings with the same rs# as a mapping w/inconsistent SPDI in BadCoords/Map Err subtrack. refs #23283 diff --git src/hg/makeDb/doc/bigDbSnp.txt src/hg/makeDb/doc/bigDbSnp.txt index 47fa08e..86d46a3 100644 --- src/hg/makeDb/doc/bigDbSnp.txt +++ src/hg/makeDb/doc/bigDbSnp.txt @@ -337,97 +337,112 @@ freqSourceOrder=1000Genomes,GnomAD_exomes,TOPMED,ExAC,PAGE_STUDY,GnomAD,GoESP,Estonian,ALSPAC,TWINSUK,NorthernSweden,Vietnamese # Run doBigDbSnp.pl (first with -debug to make runDir): $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder -debug # *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-08 cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-08 # Link to ../bigDbSnp.2019-08-07/split, -continue convert to avoid re-splitting (the slowest part of the process): rmdir split ln -s ../bigDbSnp.2019-08-07/split split $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder \ -buildDir=`pwd` -continue convert -stop install \ >& do.log & tail -f do.log # *** All done ! (through the 'install' step) Elapsed time: 504m48s # *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-08 + # 11/12/19: Add new ucscNote otherMapErr by re-running from check onward. + $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder \ + -buildDir=`pwd` -continue check -stop install \ + >& check.log & + tail -f check.log +# *** All done ! (through the 'install' step) Elapsed time: 172m35s + # count up how many variants have freq counts for each project cut -f 4 dbSnp153Details.tab \ | perl -wne 'chomp; next unless $_; @w = split ","; if ($w[0]) { print "1000Genomes\n" } if ($w[1]) { print "GnomAD_exomes\n"; } if ($w[2]) { print "TOPMED\n" } if ($w[3]) { print "ExAC\n" } if ($w[4]) { print "PAGE_STUDY\n" } if ($w[5]) { print "GnomAD\n" } if ($w[6]) { print "GoESP\n" } if ($w[7]) { print "Estonian\n" } if ($w[8]) { print "ALSPAC\n" } if ($w[9]) { print "TWINSUK\n" } if ($w[10]) { print "NorthernSweden\n" } if ($w[11]) { print "Vietnamese\n" }' \ | sort | uniq -c | sort -nr #437625009 TOPMED #211192420 GnomAD #84744375 1000Genomes #44888383 TWINSUK #44888383 ALSPAC #31397940 Estonian #16351632 NorthernSweden #12283940 GnomAD_exomes #10004052 Vietnamese #8854128 ExAC #1973841 GoESP #1323033 PAGE_STUDY # count up how many instances of each type of ucscNote: - cut -f 15 hg19.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c + time cut -f 15 hg19.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c # 10754 altIsAmbiguous # 5995 classMismatch # 454674 clinvar # 143860 clinvarBenign # 7932 clinvarConflicting # 96242 clinvarPathogenic # 114685 clusterError # 12184226 commonAll # 20540882 commonSome # 1377817 diffMajor # 7656 freqIsAmbiguous # 17684 freqNotRefAlt # 562157 multiMap +# 113416 otherMapErr #107003090 overlapDiffClass # 16910407 overlapSameClass #662595470 rareAll #670952126 rareSome # 101 refIsAmbiguous # 3271878 refIsMinor # 136452 refIsRare # 37783 refIsSingleton # 4 refMismatch # 3813467 revStrand + # Check count of rs's with at least one bad mapping: + grep otherMapErr hg19.dbSnp153.checked.bigDbSnp | cut -f 4 | sort -u | wc -l +#54871 - cut -f 15 hg38.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c + time cut -f 15 hg38.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c # 10880 altIsAmbiguous # 6206 classMismatch # 453990 clinvar # 143730 clinvarBenign # 7950 clinvarConflicting # 95262 clinvarPathogenic # 128109 clusterError #12438325 commonAll #20902602 commonSome #1399094 diffMajor # 7756 freqIsAmbiguous # 32150 freqNotRefAlt # 132051 multiMap +# 203580 otherMapErr #109991096 overlapDiffClass #17281744 overlapSameClass #681685476 rareAll #690149753 rareSome # 111 refIsAmbiguous #3360159 refIsMinor # 160723 refIsRare # 50865 refIsSingleton # 33 refMismatch #4532270 revStrand + # Check count of rs's with at least one bad mapping: + grep otherMapErr hg38.dbSnp153.checked.bigDbSnp | cut -f 4 | sort -u | wc -l +#86258 ##############################################################################