56f91678d7671b26ab3d70711429e52c26585a03 hiram Mon Nov 6 13:21:34 2023 -0800 adding crispr tracks for danRer10 danRer11 refs #21863 diff --git src/hg/makeDb/doc/danRer10/initialBuild.txt src/hg/makeDb/doc/danRer10/initialBuild.txt index 0c61414..27d140e 100644 --- src/hg/makeDb/doc/danRer10/initialBuild.txt +++ src/hg/makeDb/doc/danRer10/initialBuild.txt @@ -588,16 +588,95 @@ -twoBit=`pwd`/GCF_000002035.5_GRCz10.ncbi.2bit \ -buildDir=`pwd`) > do.log 2>&1 # real 0m47.365s cd /hive/data/genomes/danRer10/bed/chromAlias join ../idKeys/danRer10.idKeys.txt \ /hive/users/hiram/idKeys/ensembl/release-85/danio_rerio/Danio_rerio.GRCz10.idKeys.txt \ | awk '{printf "%s\t%s\n", $2,$3}' | sort > ucsc.ensembl1.tab ~/kent/src/hg/utils/automation/chromAlias.pl hgLoadSqlTab danRer10 chromAlias ~/kent/src/hg/lib/chromAlias.sql \ danRer10.chromAlias.tab -######################################################################### +############################################################################## +# crispr whole genome (DONE - 2023-11-04 - Hiram) + # redmine issue 21863: https://redmine.soe.ucsc.edu/issues/21863 + + mkdir /hive/data/genomes/danRer10/bed/crisprAll + cd /hive/data/genomes/danRer10/bed/crisprAll + + # make sure it can get started + time (~/kent/src/hg/utils/automation/doCrispr.pl \ + -stop=guides -buildDir=`pwd` -smallClusterHub=hgwdev danRer10) \ + > guides.log 2>&1 + # real 30m39.710s + sed -e 's/^/# /;' guides/run.time +# Completed: 99 of 99 jobs +# CPU time in finished jobs: 5510s 91.83m 1.53h 0.06d 0.000 y +# IO & Wait Time: 273s 4.55m 0.08h 0.00d 0.000 y +# Average job time: 58s 0.97m 0.02h 0.00d +# Longest finished job: 96s 1.60m 0.03h 0.00d +# Submission to last job: 105s 1.75m 0.03h 0.00d + + # looks good, let it run through the load: + time ~/kent/src/hg/utils/automation/doCrispr.pl -continue=specScoreJobList \ + -stop=load -buildDir=`pwd` -smallClusterHub=hgwdev danRer10) \ + > load.log 2>&1 + # real 2523m24.976s + + sed -e 's/^/# /;' specScores/run.time +# Completed: 840309 of 840309 jobs +# CPU time in finished jobs: 46880766s 781346.10m 13022.43h 542.60d 1.487 y +# IO & Wait Time: 1164411s 19406.85m 323.45h 13.48d 0.037 y +# Average job time: 57s 0.95m 0.02h 0.00d +# Longest finished job: 145s 2.42m 0.04h 0.00d +# Submission to last job: 116299s 1938.32m 32.31h 1.35d + + sed -e 's/^/# /;' effScores/run.time +# Completed: 9536 of 9536 jobs +# CPU time in finished jobs: 4820886s 80348.09m 1339.13h 55.80d 0.153 y +# IO & Wait Time: 41241s 687.36m 11.46h 0.48d 0.001 y +# Average job time: 510s 8.50m 0.14h 0.01d +# Longest finished job: 2057s 34.28m 0.57h 0.02d +# Submission to last job: 9265s 154.42m 2.57h 0.11d + + sed -e 's/^/# /;' offTargets/run.time +# Completed: 42016 of 42016 jobs +# CPU time in finished jobs: 558233s 9303.88m 155.06h 6.46d 0.018 y +# IO & Wait Time: 309956s 5165.94m 86.10h 3.59d 0.010 y +# Average job time: 21s 0.34m 0.01h 0.00d +# Longest finished job: 34s 0.57m 0.01h 0.00d +# Submission to last job: 1102s 18.37m 0.31h 0.01d + + + # that made the table crispr10K and symlinks in /gbdb/danRer10/crisrp10K/ + # when it should have been instead crisprAll, reset the links and reload + # the correct table: +mkdir -p /gbdb/danRer10/crisprAll/ +rm -f /gbdb/danRer10/crisprAll/crispr.bb +rm -f /gbdb/danRer10/crisprAll/crisprDetails.tab +ln -sf `pwd`/crispr.bb /gbdb/danRer10/crisprAll/crispr.bb +ln -sf `pwd`/crisprDetails.tab /gbdb/danRer10/crisprAll/crisprDetails.tab +hgBbiDbLink danRer10 crisprAllTargets /gbdb/danRer10/crisprAll/crispr.bb + + hgsql -e 'drop table crispr10K;' danRer10 + + grep -c . effScores.tab + # 95378380 + grep -c . specScores.tab + # 61805075 + + time (cut -f1-3 crispr.bed | bedSingleCover.pl stdin \ + | awk '{sum+=$3-$2}END{printf "%d bases\n", sum}') \ + > coverage.crispr.bed.txt 2>&1 + 936176533 bases + real 4m42.959s + ave -col=2 ../../*.sizes | grep total + total 1371719383.000000 + # 'featureBits' result: + echo "scale+=3; 100.0 * 936176533 / 1371719383" | bc + 68.248 + +##############################################################################