8e8159481aabadf35f639d236009d14c8562f7fe
hiram
  Mon Jun 22 14:23:47 2020 -0700
canFam4 done and QA ready refs @25279

diff --git src/hg/makeDb/doc/canFam4/initialBuild.txt src/hg/makeDb/doc/canFam4/initialBuild.txt
index 49d210b..628a83b 100644
--- src/hg/makeDb/doc/canFam4/initialBuild.txt
+++ src/hg/makeDb/doc/canFam4/initialBuild.txt
@@ -428,65 +428,61 @@
     export chrSize=`cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1`
     echo $chrSize
     # 23
     # use the $chrSize in this sed
     sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
          | hgLoadSqlTab canFam4 ucscToINSDC stdin ucscToINSDC.bed
 
     # should be quiet for all OK
     checkTableCoords canFam4
 
     # should cover %100 entirely:
     featureBits -countGaps canFam4 ucscToINSDC
     # 2482000080 bases of 2482000080 (100.000%) in intersection
 
 #########################################################################
-# add chromAlias table (DONE - 2020-04-02 - Hiram)
+# add chromAlias table (DONE - 2020-05-20 - Hiram)
 
     mkdir /hive/data/genomes/canFam4/bed/chromAlias
     cd /hive/data/genomes/canFam4/bed/chromAlias
 
     hgsql -N -e 'select chrom,name from ucscToRefSeq;' canFam4 \
         | sort -k1,1 > ucsc.refseq.tab
     hgsql -N -e 'select chrom,name from ucscToINSDC;' canFam4 \
         | sort -k1,1 > ucsc.genbank.tab
 
     wc -l *.tab
-    #	5486 ucsc.genbank.tab
-    #	5486 ucsc.refseq.tab
-
+    #	2198 ucsc.genbank.tab
 
     ~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.tab \
         > canFam4.chromAlias.tab
 
-for t in refseq genbank
+for t in genbank
 do
   c0=`cat ucsc.$t.tab | wc -l`
   c1=`grep $t canFam4.chromAlias.tab | wc -l`
   ok="OK"
   if [ "$c0" -ne "$c1" ]; then
      ok="ERROR"
   fi
   printf "# checking $t: $c0 =? $c1 $ok\n"
 done
-# checking refseq: 5486 =? 5486 OK
-# checking genbank: 5486 =? 5486 OK
+# checking genbank: 2198 =? 2198 OK
 
     # verify chrM is here properly:
     grep chrM canFam4.chromAlias.tab 
-# NC_011120.1     chrM    refseq
-# X93347.1        chrM    genbank
+# CM022001.1      chrM    genbank
 
     hgLoadSqlTab canFam4 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
         canFam4.chromAlias.tab
 
 #########################################################################
 # fixup search rule for assembly track/gold table (DONE - 2020-04-02 - Hiram)
     cd ~/kent/src/hg/makeDb/trackDb/dog/canFam4
     # preview prefixes and suffixes:
     hgsql -N -e "select frag from gold;" canFam4 \
       | sed -e 's/[0-9][0-9]*//;' | sort | uniq -c 
    2783 JAAHUQ.1
 
     # implies a rule: 'JAAHUQ[0-9]+(\.[0-9]+)?'
 
     # verify this rule will find them all and eliminate them all:
@@ -918,36 +914,35 @@
     # see if the liftOver menus function in the browser from canFam4 to canFam3
 
 #########################################################################
 #  BLATSERVERS ENTRY (DONE - 2020-04-02 - Hiram)
 #	After getting a blat server assigned by the Blat Server Gods,
     ssh hgwdev
 
     hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("canFam4", "blat1b", "17904", "1", "0"); \
 	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("canFam4", "blat1b", "17905", "0", "1");' \
 	    hgcentraltest
     #	test it with some sequence
 
 ############################################################################
-## reset default position similar to canFam3 found via blat
-##	of NM_001003070.1 mRNA
-##  (DONE - 2020-04-02 - Hiram)
+## reset default position to gene: CDH2 upon recommendation from Kerstin
+##  (DONE - 2020-06-22 - Hiram)
 
     ssh hgwdev
-    hgsql -e 'update dbDb set defaultPos="chr14:7969766-7997673"
+    hgsql -e 'update dbDb set defaultPos="chr7:60744942-60958801"
 	where name="canFam4";' hgcentraltest
 
 ##############################################################################
 # crispr whole genome (DONE - 2020-04-09 - Hiram)
     mkdir /hive/data/genomes/canFam4/bed/crisprAll
     cd /hive/data/genomes/canFam4/bed/crisprAll
 
     # the large shoulder argument will cause the entire genome to be scanned
     # this takes a while for a new genome to get the bwa indexing done
     time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 -stop=ranges \
     canFam4 genscan -shoulder=250000000 -tableName=crisprAll \
     -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev) > ranges.log 2>&1
     # real    1m16.539s
@@ -994,77 +989,104 @@
 # Submission to last job:         15067s     251.12m     4.19h    0.17d
 
     cat offTargets/run.time | sed -e 's/^/# /;'
 # Completed: 154829 of 154829 jobs
 # CPU time in finished jobs:    1805712s   30095.20m   501.59h   20.90d  0.057 y
 # IO & Wait Time:               3128264s   52137.73m   868.96h   36.21d  0.099 y
 # Average job time:                  32s       0.53m     0.01h    0.00d
 # Longest finished job:             273s       4.55m     0.08h    0.00d
 # Submission to last job:          5337s      88.95m     1.48h    0.06d
 
 #########################################################################
 # all.joiner update, downloads and in pushQ - (WORKING - 2019-11-20 - Hiram)
     cd $HOME/kent/src/hg/makeDb/schema
     # verify all the business is done for release
     ~/kent/src/hg/utils/automation/verifyBrowser.pl canFam4
+# 66 tables in database canFam4 - Dog, Canis lupus familiaris
+# verified 55 tables in database canFam4, 11 extra tables, 14 optional tables
+# chainNetRBestHg38     3 optional tables
+# chainNetRBestMm10     3 optional tables
+# chainNetSynHg38       3 optional tables
+# chainNetSynMm10       3 optional tables
+# gapOverlap    1 optional tables
+# tandemDups    1 optional tables
+# 1     chainCanFam3    - extra table
+# 2     chainCanFam3Link        - extra table
+# 3     chainRBestCanFam3       - extra table
+# 4     chainRBestCanFam3Link   - extra table
+# . . . etc . . .
+# 8     crisprAllTargets        - extra table
+# 9     netCanFam3      - extra table
+# 10    netRBestCanFam3 - extra table
+# 11    netSynCanFam3   - extra table
+# 13 genbank tables found
+# verified 28 required tables, 1 missing tables
+# 1     ucscToRefSeq    - missing table
+# hg38 chainNet to canFam4 found 3 required tables
+# mm10 chainNet to canFam4 found 3 required tables
+# hg38 chainNet RBest and syntenic to canFam4 found 6 optional tables
+# mm10 chainNet RBest and syntenic to canFam4 found 3 optional tables
+# liftOver to previous versions: 1, from previous versions: 1
 
-XXX - wait for genbank to be loaded
     # fixup all.joiner until this is a clean output
     joinerCheck -database=canFam4 -tableCoverage all.joiner
     joinerCheck -database=canFam4 -times all.joiner
     joinerCheck -database=canFam4 -keys all.joiner
 
     # when clean, check in:
     git commit -m 'adding rules for canFam4 refs #25279' all.joiner
     git push
     # run up a 'make alpha' in hg/hgTables to get this all.joiner file
     # into the hgwdev/genome-test system
 
     cd /hive/data/genomes/canFam4
     time (makeDownloads.pl canFam4) > downloads.log 2>&1
-    #  real    17m47.024s
+    #  real    16m11.233s
 
     #   now ready for pushQ entry
     mkdir /hive/data/genomes/canFam4/pushQ
     cd /hive/data/genomes/canFam4/pushQ
  time ($HOME/kent/src/hg/utils/automation/makePushQSql.pl -redmineList canFam4) > canFam4.pushQ.sql 2> stderr.out
-    # real    15m52.548s
+    # real    15m2.385s
+XXXX
 
     # remove the tandemDups and gapOverlap from the file list:
     sed -i -e "/tandemDups/d" redmine.canFam4.table.list
     sed -i -e "/Tandem Dups/d" redmine.canFam4.releaseLog.txt
     sed -i -e "/gapOverlap/d" redmine.canFam4.table.list
     sed -i -e "/Gap Overlaps/d" redmine.canFam4.releaseLog.txt
 
     #   check for errors in stderr.out, some are OK, e.g.:
-    # WARNING: hgwdev does not have /gbdb/canFam4/wib/gc5Base.wib
-    # WARNING: hgwdev does not have /gbdb/canFam4/wib/quality.wib
-    # WARNING: hgwdev does not have /gbdb/canFam4/bbi/quality.bw
+  # WARNING: canFam4 does not have ucscToRefSeq
+  # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/ncbiRefSeqVersion.txt
+  # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/ncbiRefSeqOther.bb
+  # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/ncbiRefSeqOther.ix
+  # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/ncbiRefSeqOther.ixx
+  # WARNING: hgwdev does not have /gbdb/canFam4/ncbiRefSeq/seqNcbiRefSeq.rna.fa
   # WARNING: canFam4 does not have seq
   # WARNING: canFam4 does not have extFile
 
-
     # verify the file list does correctly match to files
     cat redmine.canFam4.file.list | while read L
 do
   eval ls $L > /dev/null
 done
     # should be silent, missing files will show as errors
 
     # verify database tables, how many to expect:
     wc -l redmine.canFam4.table.list
-    # 63 redmine.canFam4.table.list
+    # 52 redmine.canFam4.table.list
 
     # how many actual:
     awk -F'.' '{printf "hgsql -N %s -e '"'"'show table status like \"%s\";'"'"'\n", $1, $2}' redmine.canFam4.table.list | sh | wc -l
-    # 63
+    # 52
 
     # would be a smaller number actual if some were missing
 
     # add the path names to the listing files in the redmine issue
     # in the three appropriate entry boxes:
 
 #	/hive/data/genomes/canFam4/pushQ/redmine.canFam4.file.list
 #	/hive/data/genomes/canFam4/pushQ/redmine.canFam4.releaseLog.txt
 #	/hive/data/genomes/canFam4/pushQ/redmine.canFam4.table.list
 
 #########################################################################