acca3deffc05c4d8d11590a1cf3d893763254712
angie
Thu Oct 31 13:43:05 2019 -0700
dbSnp153: Adding new ucscNotes suggested by Ana Benet: clinvar{Benign,Conflicting,Pathogenic}, rareAll, rareSome. refs #23283
diff --git src/hg/makeDb/doc/bigDbSnp.txt src/hg/makeDb/doc/bigDbSnp.txt
index 5b27f29..ae7c3a7 100644
--- src/hg/makeDb/doc/bigDbSnp.txt
+++ src/hg/makeDb/doc/bigDbSnp.txt
@@ -317,107 +317,114 @@
-buildDir=`pwd` -continue convert -stop install \
>& redo.log &
tail -f redo.log
# *** All done ! (through the 'install' step) Elapsed time: 263m59s
# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-08-07
#*** uh-oh... when checkBigDbSnp failed, doCheck.sh did not fail... I guess backgrounding
#*** the jobs and 'wait' hide errors?
cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-08-07
$HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir -buildDir=`pwd` \
-continue check -stop install \
>& check.log &
tail -f check.log
# 9/19/19: and again after changing doBigDbSnp.pl to have args & wait on specific pids:
+ # 10/30/19: and again after adding new ucscNotes (#23283).
+ topDir=/hive/data/outside/dbSNP/153
+ freqSourceOrder=1000Genomes,GnomAD_exomes,TOPMED,ExAC,PAGE_STUDY,GnomAD,GoESP,Estonian,ALSPAC,TWINSUK,NorthernSweden,Vietnamese
# Run doBigDbSnp.pl (first with -debug to make runDir):
$HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder -debug
-# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-09-19
- cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-09-19
+# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-10-30
+ cd /hive/data/outside/dbSNP/153/bigDbSnp.2019-10-30
# Link to ../bigDbSnp.2019-08-07/split, -continue convert to avoid re-splitting (the slowest part of the process):
rmdir split
ln -s ../bigDbSnp.2019-08-07/split split
$HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 153 $freqSourceOrder \
-buildDir=`pwd` -continue convert -stop install \
>& do.log &
tail -f do.log
-# *** All done ! (through the 'install' step) Elapsed time: 491m30s
-# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-09-19
+# *** All done ! (through the 'install' step) Elapsed time: 472m19s
+# *** Steps were performed in /hive/data/outside/dbSNP/153/bigDbSnp.2019-10-30
- # 10/8/19: count up how many variants have freq counts for each project
+ # count up how many variants have freq counts for each project
+#TODO
cut -f 4 dbSnp153Details.tab \
| perl -wne 'chomp; next unless $_; @w = split ",";
if ($w[0]) { print "1000Genomes\n" }
if ($w[1]) { print "GnomAD_exomes\n"; }
if ($w[2]) { print "TOPMED\n" }
if ($w[3]) { print "ExAC\n" }
if ($w[4]) { print "PAGE_STUDY\n" }
if ($w[5]) { print "GnomAD\n" }
if ($w[6]) { print "GoESP\n" }
if ($w[7]) { print "Estonian\n" }
if ($w[8]) { print "ALSPAC\n" }
if ($w[9]) { print "TWINSUK\n" }
if ($w[10]) { print "NorthernSweden\n" }
if ($w[11]) { print "Vietnamese\n" }' \
| sort | uniq -c | sort -nr
#437625009 TOPMED
#211192420 GnomAD
#84744375 1000Genomes
#44888383 TWINSUK
#44888383 ALSPAC
#31397940 Estonian
#16351632 NorthernSweden
#12283940 GnomAD_exomes
#10004052 Vietnamese
#8854128 ExAC
#1973841 GoESP
#1323033 PAGE_STUDY
- # 10/11/19: count up how many instances of each type of ucscNote:
+ # count up how many instances of each type of ucscNote:
cut -f 15 hg19.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c
# 10747 altIsAmbiguous
# 5701 classMismatch
# 454656 clinvar
+# 143844 clinvarBenign
+# 7932 clinvarConflicting
+# 96242 clinvarPathogenic
# 113678 clusterError
# 12178426 commonAll
# 20534330 commonSome
# 3522349 diffMajor
# 7649 freqIsAmbiguous
# 25413 freqNotRefAlt
# 561309 multiMap
#106940656 overlapDiffClass
# 16890303 overlapSameClass
+#662571654 rareAll
+#670927558 rareSome
# 101 refIsAmbiguous
# 16032028 refIsMinor
# 142937 refIsRare
# 44382 refIsSingleton
# 4 refMismatch
# 3813390 revStrand
cut -f 15 hg38.dbSnp153.checked.bigDbSnp | sed -re 's/,/\n/g;' | g . | sort | uniq -c
# 10873 altIsAmbiguous
# 5864 classMismatch
# 453954 clinvar
+# 143696 clinvarBenign
+# 7950 clinvarConflicting
+# 95262 clinvarPathogenic
# 126973 clusterError
# 12430253 commonAll
# 20893174 commonSome
# 3573503 diffMajor
# 7749 freqIsAmbiguous
# 39038 freqNotRefAlt
# 132015 multiMap
#109838613 overlapDiffClass
# 17228657 overlapSameClass
+#681626796 rareAll
+#690089717 rareSome
# 111 refIsAmbiguous
# 16277729 refIsMinor
# 166192 refIsRare
# 56491 refIsSingleton
# 33 refMismatch
# 4512600 revStrand
- # 10/18/19: add subset tracks
- # I added new commands to the bigBed and install steps of doBigDbSnp.pl,
- # ran -debug and copy-pasted commands from the generated scripts.
- $HOME/kent/src/hg/utils/automation/doBigDbSnp.pl $topDir 152 $freqSourceOrder \
- -buildDir=`pwd` -continue=bigBed -stop=install \
- -debug
-
##############################################################################