8e8159481aabadf35f639d236009d14c8562f7fe hiram Mon Jun 22 14:23:47 2020 -0700 canFam4 done and QA ready refs @25279 diff --git src/hg/makeDb/doc/canFam4/initialBuild.txt src/hg/makeDb/doc/canFam4/initialBuild.txt index 49d210b..628a83b 100644 --- src/hg/makeDb/doc/canFam4/initialBuild.txt +++ src/hg/makeDb/doc/canFam4/initialBuild.txt @@ -428,65 +428,61 @@ export chrSize=`cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1` echo $chrSize # 23 # use the $chrSize in this sed sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \ | hgLoadSqlTab canFam4 ucscToINSDC stdin ucscToINSDC.bed # should be quiet for all OK checkTableCoords canFam4 # should cover %100 entirely: featureBits -countGaps canFam4 ucscToINSDC # 2482000080 bases of 2482000080 (100.000%) in intersection ######################################################################### -# add chromAlias table (DONE - 2020-04-02 - Hiram) +# add chromAlias table (DONE - 2020-05-20 - Hiram) mkdir /hive/data/genomes/canFam4/bed/chromAlias cd /hive/data/genomes/canFam4/bed/chromAlias hgsql -N -e 'select chrom,name from ucscToRefSeq;' canFam4 \ | sort -k1,1 > ucsc.refseq.tab hgsql -N -e 'select chrom,name from ucscToINSDC;' canFam4 \ | sort -k1,1 > ucsc.genbank.tab wc -l *.tab - # 5486 ucsc.genbank.tab - # 5486 ucsc.refseq.tab - + # 2198 ucsc.genbank.tab ~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.tab \ > canFam4.chromAlias.tab -for t in refseq genbank +for t in genbank do c0=`cat ucsc.$t.tab | wc -l` c1=`grep $t canFam4.chromAlias.tab | wc -l` ok="OK" if [ "$c0" -ne "$c1" ]; then ok="ERROR" fi printf "# checking $t: $c0 =? $c1 $ok\n" done -# checking refseq: 5486 =? 5486 OK -# checking genbank: 5486 =? 5486 OK +# checking genbank: 2198 =? 2198 OK # verify chrM is here properly: grep chrM canFam4.chromAlias.tab -# NC_011120.1 chrM refseq -# X93347.1 chrM genbank +# CM022001.1 chrM genbank hgLoadSqlTab canFam4 chromAlias ~/kent/src/hg/lib/chromAlias.sql \ canFam4.chromAlias.tab ######################################################################### # fixup search rule for assembly track/gold table (DONE - 2020-04-02 - Hiram) cd ~/kent/src/hg/makeDb/trackDb/dog/canFam4 # preview prefixes and suffixes: hgsql -N -e "select frag from gold;" canFam4 \ | sed -e 's/[0-9][0-9]*//;' | sort | uniq -c 2783 JAAHUQ.1 # implies a rule: 'JAAHUQ[0-9]+(\.[0-9]+)?' # verify this rule will find them all and eliminate them all: @@ -918,36 +914,35 @@ # see if the liftOver menus function in the browser from canFam4 to canFam3 ######################################################################### # BLATSERVERS ENTRY (DONE - 2020-04-02 - Hiram) # After getting a blat server assigned by the Blat Server Gods, ssh hgwdev hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("canFam4", "blat1b", "17904", "1", "0"); \ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("canFam4", "blat1b", "17905", "0", "1");' \ hgcentraltest # test it with some sequence ############################################################################ -## reset default position similar to canFam3 found via blat -## of NM_001003070.1 mRNA -## (DONE - 2020-04-02 - Hiram) +## reset default position to gene: CDH2 upon recommendation from Kerstin +## (DONE - 2020-06-22 - Hiram) ssh hgwdev - hgsql -e 'update dbDb set defaultPos="chr14:7969766-7997673" + hgsql -e 'update dbDb set defaultPos="chr7:60744942-60958801" where name="canFam4";' hgcentraltest ############################################################################## # crispr whole genome (DONE - 2020-04-09 - Hiram) mkdir /hive/data/genomes/canFam4/bed/crisprAll cd /hive/data/genomes/canFam4/bed/crisprAll # the large shoulder argument will cause the entire genome to be scanned # this takes a while for a new genome to get the bwa indexing done time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 -stop=ranges \ canFam4 genscan -shoulder=250000000 -tableName=crisprAll \ -fileServer=hgwdev \ -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \ -workhorse=hgwdev) > ranges.log 2>&1 # real 1m16.539s @@ -994,77 +989,104 @@ # Submission to last job: 15067s 251.12m 4.19h 0.17d cat offTargets/run.time | sed -e 's/^/# /;' # Completed: 154829 of 154829 jobs # CPU time in finished jobs: 1805712s 30095.20m 501.59h 20.90d 0.057 y # IO & Wait Time: 3128264s 52137.73m 868.96h 36.21d 0.099 y # Average job time: 32s 0.53m 0.01h 0.00d # Longest finished job: 273s 4.55m 0.08h 0.00d # Submission to last job: 5337s 88.95m 1.48h 0.06d ######################################################################### # all.joiner update, downloads and in pushQ - (WORKING - 2019-11-20 - Hiram) cd $HOME/kent/src/hg/makeDb/schema # verify all the business is done for release ~/kent/src/hg/utils/automation/verifyBrowser.pl canFam4 +# 66 tables in database canFam4 - Dog, Canis lupus familiaris +# verified 55 tables in database canFam4, 11 extra tables, 14 optional tables +# chainNetRBestHg38 3 optional tables +# chainNetRBestMm10 3 optional tables +# chainNetSynHg38 3 optional tables +# chainNetSynMm10 3 optional tables +# gapOverlap 1 optional tables +# tandemDups 1 optional tables +# 1 chainCanFam3 - extra table +# 2 chainCanFam3Link - extra table +# 3 chainRBestCanFam3 - extra table +# 4 chainRBestCanFam3Link - extra table +# . . . etc . . . +# 8 crisprAllTargets - extra table +# 9 netCanFam3 - extra table +# 10 netRBestCanFam3 - extra table +# 11 netSynCanFam3 - extra table +# 13 genbank tables found +# verified 28 required tables, 1 missing tables +# 1 ucscToRefSeq - missing table +# hg38 chainNet to canFam4 found 3 required tables +# mm10 chainNet to canFam4 found 3 required tables +# hg38 chainNet RBest and syntenic to canFam4 found 6 optional tables +# mm10 chainNet RBest and syntenic to canFam4 found 3 optional tables +# liftOver to previous versions: 1, from previous versions: 1 -XXX - wait for genbank to be loaded # fixup all.joiner until this is a clean output joinerCheck -database=canFam4 -tableCoverage all.joiner joinerCheck -database=canFam4 -times all.joiner joinerCheck -database=canFam4 -keys all.joiner # when clean, check in: git commit -m 'adding rules for canFam4 refs #25279' all.joiner git push # run up a 'make alpha' in hg/hgTables to get this all.joiner file # into the hgwdev/genome-test system cd /hive/data/genomes/canFam4 time (makeDownloads.pl canFam4) > downloads.log 2>&1 - # real 17m47.024s + # real 16m11.233s # now ready for pushQ entry mkdir /hive/data/genomes/canFam4/pushQ cd /hive/data/genomes/canFam4/pushQ time ($HOME/kent/src/hg/utils/automation/makePushQSql.pl -redmineList canFam4) > canFam4.pushQ.sql 2> stderr.out - # real 15m52.548s + # real 15m2.385s +XXXX # remove the tandemDups and gapOverlap from the file list: sed -i -e "/tandemDups/d" redmine.canFam4.table.list sed -i -e "/Tandem Dups/d" redmine.canFam4.releaseLog.txt sed -i -e "/gapOverlap/d" redmine.canFam4.table.list sed -i -e "/Gap Overlaps/d" redmine.canFam4.releaseLog.txt # check for errors in stderr.out, some are OK, e.g.: - # WARNING: hgwdev does not have /gbdb/canFam4/wib/gc5Base.wib - # WARNING: hgwdev does not have /gbdb/canFam4/wib/quality.wib - # WARNING: hgwdev does not have /gbdb/canFam4/bbi/quality.bw + # WARNING: canFam4 does not have ucscToRefSeq + # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/ncbiRefSeqVersion.txt + # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/ncbiRefSeqOther.bb + # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/ncbiRefSeqOther.ix + # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/ncbiRefSeqOther.ixx + # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/seqNcbiRefSeq.rna.fa # WARNING: canFam4 does not have seq # WARNING: canFam4 does not have extFile - # verify the file list does correctly match to files cat redmine.canFam4.file.list | while read L do eval ls $L > /dev/null done # should be silent, missing files will show as errors # verify database tables, how many to expect: wc -l redmine.canFam4.table.list - # 63 redmine.canFam4.table.list + # 52 redmine.canFam4.table.list # how many actual: awk -F'.' '{printf "hgsql -N %s -e '"'"'show table status like \"%s\";'"'"'\n", $1, $2}' redmine.canFam4.table.list | sh | wc -l - # 63 + # 52 # would be a smaller number actual if some were missing # add the path names to the listing files in the redmine issue # in the three appropriate entry boxes: # /hive/data/genomes/canFam4/pushQ/redmine.canFam4.file.list # /hive/data/genomes/canFam4/pushQ/redmine.canFam4.releaseLog.txt # /hive/data/genomes/canFam4/pushQ/redmine.canFam4.table.list #########################################################################