acca3deffc05c4d8d11590a1cf3d893763254712 angie Thu Oct 31 13:43:05 2019 -0700 dbSnp153: Adding new ucscNotes suggested by Ana Benet: clinvar{Benign,Conflicting,Pathogenic}, rareAll, rareSome. refs #23283 diff --git src/hg/makeDb/doc/bigDbSnp.txt src/hg/makeDb/doc/bigDbSnp.txt index 5b27f29..ae7c3a7 100644 --- src/hg/makeDb/doc/bigDbSnp.txt +++ src/hg/makeDb/doc/bigDbSnp.txt @@ -317,107 +317,114 @@ -buildDir=`pwd` -continue convert -stop install \ >& redo.log & tail -f redo.log # *** All done ! (through the 'install' step) Elapsed time: 263m59s # *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-08-07 #*** uh-oh... when checkBigDbSnp failed, doCheck.sh did not fail... I guess backgrounding #*** the jobs and 'wait' hide errors? cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-08-07 $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir -buildDir=`pwd` \ -continue check -stop install \ >& check.log & tail -f check.log # 9/19/19: and again after changing doBigDbSnp.pl to have args & wait on specific pids: + # 10/30/19: and again after adding new ucscNotes (#23283). + topDir=/hive/data/outside/dbSNP/153 + freqSourceOrder=1000Genomes,GnomAD_exomes,TOPMED,ExAC,PAGE_STUDY,GnomAD,GoESP,Estonian,ALSPAC,TWINSUK,NorthernSweden,Vietnamese # Run doBigDbSnp.pl (first with -debug to make runDir): $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder -debug -# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-09-19 - cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-09-19 +# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-10-30 + cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-10-30 # Link to ../bigDbSnp.2019-08-07/split, -continue convert to avoid re-splitting (the slowest part of the process): rmdir split ln -s ../bigDbSnp.2019-08-07/split split $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder \ -buildDir=`pwd` -continue convert -stop install \ >& do.log & tail -f do.log -# *** All done ! (through the 'install' step) Elapsed time: 491m30s -# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-09-19 +# *** All done ! (through the 'install' step) Elapsed time: 472m19s +# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-10-30 - # 10/8/19: count up how many variants have freq counts for each project + # count up how many variants have freq counts for each project +#TODO cut -f 4 dbSnp153Details.tab \ | perl -wne 'chomp; next unless $_; @w = split ","; if ($w[0]) { print "1000Genomes\n" } if ($w[1]) { print "GnomAD_exomes\n"; } if ($w[2]) { print "TOPMED\n" } if ($w[3]) { print "ExAC\n" } if ($w[4]) { print "PAGE_STUDY\n" } if ($w[5]) { print "GnomAD\n" } if ($w[6]) { print "GoESP\n" } if ($w[7]) { print "Estonian\n" } if ($w[8]) { print "ALSPAC\n" } if ($w[9]) { print "TWINSUK\n" } if ($w[10]) { print "NorthernSweden\n" } if ($w[11]) { print "Vietnamese\n" }' \ | sort | uniq -c | sort -nr #437625009 TOPMED #211192420 GnomAD #84744375 1000Genomes #44888383 TWINSUK #44888383 ALSPAC #31397940 Estonian #16351632 NorthernSweden #12283940 GnomAD_exomes #10004052 Vietnamese #8854128 ExAC #1973841 GoESP #1323033 PAGE_STUDY - # 10/11/19: count up how many instances of each type of ucscNote: + # count up how many instances of each type of ucscNote: cut -f 15 hg19.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c # 10747 altIsAmbiguous # 5701 classMismatch # 454656 clinvar +# 143844 clinvarBenign +# 7932 clinvarConflicting +# 96242 clinvarPathogenic # 113678 clusterError # 12178426 commonAll # 20534330 commonSome # 3522349 diffMajor # 7649 freqIsAmbiguous # 25413 freqNotRefAlt # 561309 multiMap #106940656 overlapDiffClass # 16890303 overlapSameClass +#662571654 rareAll +#670927558 rareSome # 101 refIsAmbiguous # 16032028 refIsMinor # 142937 refIsRare # 44382 refIsSingleton # 4 refMismatch # 3813390 revStrand cut -f 15 hg38.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c # 10873 altIsAmbiguous # 5864 classMismatch # 453954 clinvar +# 143696 clinvarBenign +# 7950 clinvarConflicting +# 95262 clinvarPathogenic # 126973 clusterError # 12430253 commonAll # 20893174 commonSome # 3573503 diffMajor # 7749 freqIsAmbiguous # 39038 freqNotRefAlt # 132015 multiMap #109838613 overlapDiffClass # 17228657 overlapSameClass +#681626796 rareAll +#690089717 rareSome # 111 refIsAmbiguous # 16277729 refIsMinor # 166192 refIsRare # 56491 refIsSingleton # 33 refMismatch # 4512600 revStrand - # 10/18/19: add subset tracks - # I added new commands to the bigBed and install steps of doBigDbSnp.pl, - # ran -debug and copy-pasted commands from the generated scripts. - $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 152 $freqSourceOrder \ - -buildDir=`pwd` -continue=bigBed -stop=install \ - -debug - ##############################################################################