f0872f4adaa41af355cf492134407abc595e3830 angie Thu Nov 7 14:55:11 2019 -0800 Rebuild bigDbSnp153 after fixing VCF allele normalization in dbSnpJsonToTab. refs #23283 diff --git src/hg/makeDb/doc/bigDbSnp.txt src/hg/makeDb/doc/bigDbSnp.txt index 9a5b53a..f429904 100644 --- src/hg/makeDb/doc/bigDbSnp.txt +++ src/hg/makeDb/doc/bigDbSnp.txt @@ -319,45 +319,46 @@ tail -f redo.log # *** All done ! (through the 'install' step) Elapsed time: 263m59s # *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-08-07 #*** uh-oh... when checkBigDbSnp failed, doCheck.sh did not fail... I guess backgrounding #*** the jobs and 'wait' hide errors? cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-08-07 $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir -buildDir=`pwd` \ -continue check -stop install \ >& check.log & tail -f check.log # 9/19/19: and again after changing doBigDbSnp.pl to have args & wait on specific pids: # 10/30/19: and again after adding new ucscNotes (#23283). # 11/4/19: and again after finding that refIsMinor & diffMajor could be appended multiple times + # 11/7/19: and again after finding that some cases of freqNotRefAlt are VCF normalization probs topDir=/hive/data/outside/dbSNP/153 freqSourceOrder=1000Genomes,GnomAD_exomes,TOPMED,ExAC,PAGE_STUDY,GnomAD,GoESP,Estonian,ALSPAC,TWINSUK,NorthernSweden,Vietnamese # Run doBigDbSnp.pl (first with -debug to make runDir): $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder -debug -# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-04 - cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-04 +# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-07 + cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-07 # Link to ../bigDbSnp.2019-08-07/split, -continue convert to avoid re-splitting (the slowest part of the process): rmdir split ln -s ../bigDbSnp.2019-08-07/split split $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder \ -buildDir=`pwd` -continue convert -stop install \ >& do.log & tail -f do.log -# *** All done ! (through the 'install' step) Elapsed time: 525m19s -# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-04 +# *** All done ! (through the 'install' step) Elapsed time: 504m48s +# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-11-07 # count up how many variants have freq counts for each project cut -f 4 dbSnp153Details.tab \ | perl -wne 'chomp; next unless $_; @w = split ","; if ($w[0]) { print "1000Genomes\n" } if ($w[1]) { print "GnomAD_exomes\n"; } if ($w[2]) { print "TOPMED\n" } if ($w[3]) { print "ExAC\n" } if ($w[4]) { print "PAGE_STUDY\n" } if ($w[5]) { print "GnomAD\n" } if ($w[6]) { print "GoESP\n" } if ($w[7]) { print "Estonian\n" } if ($w[8]) { print "ALSPAC\n" } if ($w[9]) { print "TWINSUK\n" } if ($w[10]) { print "NorthernSweden\n" } @@ -375,56 +376,56 @@ #8854128 ExAC #1973841 GoESP #1323033 PAGE_STUDY # count up how many instances of each type of ucscNote: cut -f 15 hg19.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c # 10747 altIsAmbiguous # 5701 classMismatch # 454656 clinvar # 143844 clinvarBenign # 7932 clinvarConflicting # 96242 clinvarPathogenic # 113678 clusterError # 12178426 commonAll # 20534330 commonSome -# 1378125 diffMajor +# 1377402 diffMajor # 7649 freqIsAmbiguous -# 25413 freqNotRefAlt +# 16950 freqNotRefAlt # 561309 multiMap #106940656 overlapDiffClass # 16890303 overlapSameClass #662571654 rareAll #670927558 rareSome # 101 refIsAmbiguous -# 3277722 refIsMinor -# 142937 refIsRare -# 44382 refIsSingleton +# 3269451 refIsMinor +# 135265 refIsRare +# 36709 refIsSingleton # 4 refMismatch # 3813390 revStrand cut -f 15 hg38.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c # 10873 altIsAmbiguous # 5864 classMismatch # 453954 clinvar # 143696 clinvarBenign # 7950 clinvarConflicting # 95262 clinvarPathogenic # 126973 clusterError # 12430253 commonAll # 20893174 commonSome -# 1399317 diffMajor +# 1398591 diffMajor # 7749 freqIsAmbiguous -# 39038 freqNotRefAlt +# 30615 freqNotRefAlt # 132015 multiMap #109838613 overlapDiffClass # 17228657 overlapSameClass #681626796 rareAll #690089717 rareSome # 111 refIsAmbiguous -# 3364788 refIsMinor -# 166192 refIsRare -# 56491 refIsSingleton +# 3356557 refIsMinor +# 158562 refIsRare +# 48859 refIsSingleton # 33 refMismatch # 4512600 revStrand ##############################################################################