767c1233bac4f69e91864ca7100beee1f45e51d4
hiram
  Mon Apr 20 14:32:43 2020 -0700
lastz chainNet to mm10 and hg38 done refs #25279

diff --git src/hg/makeDb/doc/canFam4/initialBuild.txt src/hg/makeDb/doc/canFam4/initialBuild.txt
index 4b34ae8..49d210b 100644
--- src/hg/makeDb/doc/canFam4/initialBuild.txt
+++ src/hg/makeDb/doc/canFam4/initialBuild.txt
@@ -310,55 +310,53 @@
     hgsql -e 'select * from gap;' canFam4 | awk '{print $4-$3}' \
 	| ave stdin | grep -w total
     # total 58500.000000
 
     # equal amounts, no need to adjust the gap table
 
 ##############################################################################
 # cpgIslands on UNMASKED sequence (DONE - 2020-03-31 - Hiram)
     mkdir /hive/data/genomes/canFam4/bed/cpgIslandsUnmasked
     cd /hive/data/genomes/canFam4/bed/cpgIslandsUnmasked
 
     time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
        -tableName=cpgIslandExtUnmasked \
           -maskedSeq=/hive/data/genomes/canFam4/canFam4.unmasked.2bit \
              -workhorse=hgwdev -smallClusterHub=ku canFam4) > do.log 2>&1
-XXX - running - Tue Mar 31 10:53:35 PDT 2020
-    # real    4m13.285s
+    # real    3m30.591s
 
     cat fb.canFam4.cpgIslandExtUnmasked.txt
-    # 28001209 bases of 2999027915 (0.934%) in intersection
+    # 56535294 bases of 2481941580 (2.278%) in intersection
 
 #############################################################################
 # cytoBandIdeo - (DONE - 2020-03-31 - Hiram)
     mkdir /hive/data/genomes/canFam4/bed/cytoBand
     cd /hive/data/genomes/canFam4/bed/cytoBand
     makeCytoBandIdeo.csh canFam4
 
 #############################################################################
 # run up idKeys files for chromAlias/ncbiRefSeq (done - 2020-03-31 - Hiram)
     mkdir /hive/data/genomes/canFam4/bed/idKeys
     cd /hive/data/genomes/canFam4/bed/idKeys
 
     time (doIdKeys.pl \
         -twoBit=/hive/data/genomes/canFam4/canFam4.unmasked.2bit \
         -buildDir=`pwd` canFam4) > do.log 2>&1 &
-XXX - running - Tue Mar 31 10:54:22 PDT 2020
-    # real    2m48.092s
+    # real    3m22.298s
 
     cat canFam4.keySignature.txt
-    #  10c42ee6ea4a90775c5da9d8b83854aa
+    #  174191aae5515d1114a9d6320b152b1a
 
 #############################################################################
 # gapOverlap (DONE - 2020-03-31 - Hiram)
     mkdir /hive/data/genomes/canFam4/bed/gapOverlap
     cd /hive/data/genomes/canFam4/bed/gapOverlap
     time (doGapOverlap.pl \
         -twoBit=/hive/data/genomes/canFam4/canFam4.unmasked.2bit canFam4 ) \
         > do.log 2>&1 &
     # real    1m49.489s
 
     # there only only nine:
     wc -l bed.tab
     # 9 bed.tab
     cut -f2- bed.tab
 chr1    41008264        41010364        chr1:41008265-41010364  1000    +      41008264 41010364        0       2       1000,1000       0,1100
@@ -396,106 +394,65 @@
 #  basesCovered: 1,635,503,835
 #  meanDepth (of bases covered): 14.396921
 #  minDepth: 1.000000
 #  maxDepth: 381.000000
 #  std of depth: 29.341113
 
 #########################################################################
 # ucscToINSDC and ucscToRefSeq table/track (DONE - 2020-03-31 - Hiram)
     # construct idKeys for the genbank sequence
     mkdir /hive/data/genomes/canFam4/genbank/idKeys
     cd /hive/data/genomes/canFam4/genbank/idKeys
     faToTwoBit ../GCA_*0_genomic.fna.gz canFam4.genbank.2bit
 
     time (doIdKeys.pl -buildDir=`pwd` \
         -twoBit=`pwd`/canFam4.genbank.2bit genbankCanFam4)  > do.log 2>&1 &
-XXX - running - Tue Mar 31 10:58:05 PDT 2020
-    # real    2m50.723s
+    # real    3m30.599s
 
     cat genbankCanFam4.keySignature.txt
-    #  10c42ee6ea4a90775c5da9d8b83854aa
-
-    # and the genbank sequence needs keys too:
-    mkdir /hive/data/genomes/canFam4/genbank/idKeysGenbank
-    cd /hive/data/genomes/canFam4/genbank/idKeysGenbank
-    faToTwoBit /hive/data/outside/ncbi/genomes/genbank/vertebrate_mammalian/Gorilla_gorilla/all_assembly_versions/GCA_008122165.1_Kamilah_GGO_v0/GCA_008122165.1_Kamilah_GGO_v0_genomic.fna.gz canFam4.genbank.2bit
-
-    time (doIdKeys.pl -buildDir=`pwd` \
-        -twoBit=`pwd`/canFam4.genbank.2bit genbankCanFam4)  > do.log 2>&1 &
-    # real    3m11.098s
-
-    cat genbankCanFam4.keySignature.txt
-    #  84734b343949ddf1e28b453d25d3ddf7
+    #  174191aae5515d1114a9d6320b152b1a
 
     mkdir /hive/data/genomes/canFam4/bed/chromAlias
     cd /hive/data/genomes/canFam4/bed/chromAlias
 
     join -t$'\t' ../idKeys/canFam4.idKeys.txt \
-        ../../genbank/idKeysGenbank/genbankCanFam4.idKeys.txt | cut -f2- \
-          | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
-            | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
-               | sort -k1,1 -k2,2n > ucscToINSDC.bed
-
-    join -t$'\t' ../idKeys/canFam4.idKeys.txt \
         ../../genbank/idKeys/genbankCanFam4.idKeys.txt | cut -f2- \
           | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
             | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
-               | sort -k1,1 -k2,2n > ucscToRefSeq.bed
+               | sort -k1,1 -k2,2n > ucscToINSDC.bed
 
     # should be same line counts throughout:
     wc -l * ../../chrom.sizes
-    #	5485 ucscToINSDC.bed
-    #	5486 ucscToRefSeq.bed
-    #	5486 ../../chrom.sizes
-
-    # need to find the accession for the INSDC equivalent to chrM:
-    egrep chrM *
-# ucscToRefSeq.bed:chrM   0       16412   NC_011120.1
-
-    # lookup that accession at NCBI Entrez: X93347.1
-    # and add to ucscToINSDC.bed:
-    printf "chrM\t0\t16564\tAY612638.1\n" >> ucscToINSDC.bed
-    # verify:
-    grep chrM *
-ucscToINSDC.bed:chrM    0       16412   X93347.1
-ucscToRefSeq.bed:chrM   0       16412   NC_011120.1
+    #   2198 ucscToINSDC.bed
+    #	2198 ../../chrom.sizes
 
     export chrSize=`cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1`
     echo $chrSize
-    # 26
+    # 23
     # use the $chrSize in this sed
     sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
          | hgLoadSqlTab canFam4 ucscToINSDC stdin ucscToINSDC.bed
-     # should be the same for ucscToRefSeq:
-    export chrSize=`cut -f1 ucscToRefSeq.bed | awk '{print length($0)}' | sort -n | tail -1`
-    echo $chrSize
-    # 26
-    sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
-       | sed -e 's/INSDC/RefSeq/g;' \
-         | hgLoadSqlTab canFam4 ucscToRefSeq stdin ucscToRefSeq.bed
 
     # should be quiet for all OK
     checkTableCoords canFam4
 
     # should cover %100 entirely:
     featureBits -countGaps canFam4 ucscToINSDC
-    # 3044872214 bases of 3044872214 (100.000%) in intersection
-    featureBits -countGaps canFam4 ucscToRefSeq
-    # 3044872214 bases of 3044872214 (100.000%) in intersection
+    # 2482000080 bases of 2482000080 (100.000%) in intersection
 
 #########################################################################
-# add chromAlias table (TBD - 2019-11-19 - Hiram)
+# add chromAlias table (DONE - 2020-04-02 - Hiram)
 
     mkdir /hive/data/genomes/canFam4/bed/chromAlias
     cd /hive/data/genomes/canFam4/bed/chromAlias
 
     hgsql -N -e 'select chrom,name from ucscToRefSeq;' canFam4 \
         | sort -k1,1 > ucsc.refseq.tab
     hgsql -N -e 'select chrom,name from ucscToINSDC;' canFam4 \
         | sort -k1,1 > ucsc.genbank.tab
 
     wc -l *.tab
     #	5486 ucsc.genbank.tab
     #	5486 ucsc.refseq.tab
 
 
     ~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.tab \
@@ -511,61 +468,63 @@
   fi
   printf "# checking $t: $c0 =? $c1 $ok\n"
 done
 # checking refseq: 5486 =? 5486 OK
 # checking genbank: 5486 =? 5486 OK
 
     # verify chrM is here properly:
     grep chrM canFam4.chromAlias.tab 
 # NC_011120.1     chrM    refseq
 # X93347.1        chrM    genbank
 
     hgLoadSqlTab canFam4 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
         canFam4.chromAlias.tab
 
 #########################################################################
-# fixup search rule for assembly track/gold table (TBD - 2019-11-19 - Hiram)
-    cd ~/kent/src/hg/makeDb/trackDb/gorilla/canFam4
+# fixup search rule for assembly track/gold table (DONE - 2020-04-02 - Hiram)
+    cd ~/kent/src/hg/makeDb/trackDb/dog/canFam4
     # preview prefixes and suffixes:
     hgsql -N -e "select frag from gold;" canFam4 \
       | sed -e 's/[0-9][0-9]*//;' | sort | uniq -c 
-      1 NC_.1
-   6344 SRLZ.1
+   2783 JAAHUQ.1
 
-    # implies a rule: '[NS][CR][L0-9_][Z0-9][0-9]+(\.[0-9]+)?'
+    # implies a rule: 'JAAHUQ[0-9]+(\.[0-9]+)?'
 
     # verify this rule will find them all and eliminate them all:
     hgsql -N -e "select frag from gold;" canFam4 | wc -l
-    # 6345
+    # 2783
 
     hgsql -N -e "select frag from gold;" canFam4 \
-       | egrep -e '[NS][CR][L0-9_][Z0-9][0-9]+(\.[0-9]+)?' | wc -l
-    # 6345
+       | egrep -e 'JAAHUQ[0-9]+(\.[0-9]+)?' | wc -l
+    # 2783
 
     hgsql -N -e "select frag from gold;" canFam4 \
-       | egrep -v -e '[NS][CR][L0-9_][Z0-9][0-9]+(\.[0-9]+)?' | wc -l
+       | egrep -v -e 'JAAHUQ[0-9]+(\.[0-9]+)?' | wc -l
     # 0
 
     # hence, add to trackDb/rhesus/canFam4/trackDb.ra
 searchTable gold
 shortCircuit 1
-termRegex [NS][CR][L0-9_][Z0-9][0-9]+(\.[0-9]+)?
+termRegex JAAHUQ[0-9]+(\.[0-9]+)?
 query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
 searchPriority 8
 
     # verify searches work in the position box
 
+    git commit -m 'adding search rule for gold/assembly track refs #25279' \
+       trackDb.ra
+
 ##########################################################################
 # running repeat masker (DONE - 2020-03-31 - Hiram)
     mkdir /hive/data/genomes/canFam4/bed/repeatMasker
     cd /hive/data/genomes/canFam4/bed/repeatMasker
     time  (doRepeatMasker.pl -buildDir=`pwd` \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -smallClusterHub=ku canFam4) > do.log 2>&1
     # real    293m51.353s
 
     cat faSize.rmsk.txt
 # 2482000080 bases (58500 N's 2481941580 real 1403544550 upper
 #	1078397030 lower) in 2198 sequences in 1 files
 # Total size: mean 1129208.4 sd 8542765.0 min 13084 (chrUn_JAAHUQ010000994v1)
 #	max 124992030 (chrX) median 43246
 # %43.45 masked total, %43.45 masked real
@@ -648,238 +607,267 @@
         -dbHost=hgwdev canFam4) > do.log 2>&1
     # real    90m16.169s
 
     # Masking statistics
     cat faSize.canFam4.cleanWMSdust.txt
 # 2482000080 bases (58500 N's 2481941580 real 1630728232 upper 851213348 lower)
 #	in 2198 sequences in 1 files
 # Total size: mean 1129208.4 sd 8542765.0 min 13084 (chrUn_JAAHUQ010000994v1)
 #	max 124992030 (chrX) median 43246
 # %34.30 masked total, %34.30 masked real
 
     cat fb.canFam4.rmsk.windowmaskerSdust.txt
     # 598271411 bases of 2482000080 (24.104%) in intersection
 
 ##########################################################################
-# cpgIslands - (TBD - 2019-11-20 - Hiram)
+# cpgIslands - (DONE - 2020-04-02 - Hiram)
     mkdir /hive/data/genomes/canFam4/bed/cpgIslands
     cd /hive/data/genomes/canFam4/bed/cpgIslands
     time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev -smallClusterHub=ku canFam4) > do.log 2>&1
-    # real    4m0.657s
+    # real    3m29.034s
 
     cat fb.canFam4.cpgIslandExt.txt
-    # 20339043 bases of 2999027915 (0.678%) in intersection
+    # 47618882 bases of 2481941580 (1.919%) in intersection
 
 ##############################################################################
-# genscan - (TBD - 2019-11-20 - Hiram)
+# genscan - (DONE - 2020-04-02 - Hiram)
     mkdir /hive/data/genomes/canFam4/bed/genscan
     cd /hive/data/genomes/canFam4/bed/genscan
     time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
       -bigClusterHub=ku canFam4) > do.log 2>&1
-    # real    100m37.264s
+    # real    8m19.775s
+
+    # two jobs broken:
+./runGsBig2M.csh chr22 000 gtf/000/chr22.gtf pep/000/chr22.pep subopt/000/chr22.bed &
+./runGsBig2M.csh chr34 000 gtf/000/chr34.gtf pep/000/chr34.pep subopt/000/chr34.bed
+wait
+    # real    14m27.845s
+
+    time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
+      -continue=makeBed -bigClusterHub=ku canFam4) > makeBed.log 2>&1
+    # real    0m45.365s
 
     cat fb.canFam4.genscan.txt
-    # 51534246 bases of 2999027915 (1.718%) in intersection
+    # 57650331 bases of 2481941580 (2.323%) in intersection
 
     cat fb.canFam4.genscanSubopt.txt
-    # 53019930 bases of 2999027915 (1.768%) in intersection
+    # 50129491 bases of 2481941580 (2.020%) in intersection
 
 #########################################################################
-# Create kluster run files (TBD - 2019-11-20 - Hiram)
+# Create kluster run files (DONE - 2020-04-02 - Hiram)
 
     # numerator is canFam4 gapless bases "real" as reported by:
     featureBits -noRandom -noHap canFam4 gap
-    # 41796384 bases of 2715375767 (1.539%) in intersection
+    # 36700 bases of 2353522726 (0.002%) in intersection
     #                      ^^^
 
     # denominator is hg19 gapless bases as reported by:
     #   featureBits -noRandom -noHap hg19 gap
     #     234344806 bases of 2861349177 (8.190%) in intersection
     # 1024 is threshold used for human -repMatch:
-    calc \( 2715375767 / 2861349177 \) \* 1024
-    #  ( 2715375767 / 2861349177 ) * 1024 = 971.760038
+    calc \( 2353522726 / 2861349177 \) \* 1024
+    #  ( 2353522726 / 2861349177 ) * 1024 = 842.262556
 
-    # ==> use -repMatch=950 according to size scaled down from 1024 for human.
+    # ==> use -repMatch=800 according to size scaled down from 1024 for human.
     #   and rounded down to nearest 50
     cd /hive/data/genomes/canFam4
     time blat canFam4.2bit \
          /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/canFam4.11.ooc \
-        -repMatch=950
-    #   Wrote 39217 overused 11-mers to jkStuff/canFam4.11.ooc
+        -repMatch=800
+    #	Wrote 34718 overused 11-mers to jkStuff/canFam4.11.ooc
+    #	real    0m21.985s
 
-    # gorGor5 at repMatch=1100:
-    #   Wrote 31384 overused 11-mers to jkStuff/gorGor5.11.ooc
-    # gorGor4 at repMatch=1000:
-    # Wrote 32028 overused 11-mers to jkStuff/gorGor4.11.ooc
+    # canFam3 at repMatch=900:
+    #   Wrote 24788 overused 11-mers to jkStuff/canFam3.11.ooc
+    #	real    1m11.629s
 
-    #   check non-bridged gaps to see what the typical size is:
+    #   there are no non-bridged gaps
     hgsql -N \
         -e 'select * from gap where bridge="no" order by size;' canFam4 \
-        | sort -k7,7nr | ave -col=7 stdin
-    # min 100.000000
-    # max 100.000000
-    # they are all 100 sized, 220 gaps
 
+    # HOWEVER, every gap in this assembly is the same 'within scaffold'
+    # at size 100:
+    hgsql -N -e 'select size from gap where bridge="yes" order by size;'
+     canFam4  | sort | uniq -c
+    # 585 100
+
+    # using these gaps to make a lift file
     # minimum gap size is 100 and produces a reasonable number of lifts
     gapToLift -verbose=2 -minGap=100 canFam4 jkStuff/canFam4.nonBridged.lft \
         -bedFile=jkStuff/canFam4.nonBridged.bed
     wc -l jkStuff/canFam4.nonBri*
-    #	5706 jkStuff/canFam4.nonBridged.bed
-    #	5706 jkStuff/canFam4.nonBridged.lft
+    #	2198 jkStuff/canFam4.nonBridged.bed
+    #	2198 jkStuff/canFam4.nonBridged.lft
 
 ########################################################################
-# lastz/chain/net swap human/hg38 (TBD - 2019-11-20 - Hiram)
+# lastz/chain/net swap human/hg38 (DONE - 2020-04-10 - Hiram)
 
     # original alignment
-    cd /hive/data/genomes/hg38/bed/lastzCanFam4.2019-11-20
+    cd /hive/data/genomes/hg38/bed/lastzCanFam4.2020-04-02
 
     cat fb.hg38.chainCanFam4Link.txt
-    # 2908900659 bases of 3095998939 (93.957%) in intersection
+    # 1549397508 bases of 3110768607 (49.808%) in intersection
     cat fb.hg38.chainSynCanFam4Link.txt
-    # 2885980361 bases of 3095998939 (93.216%) in intersection
+    # 1488468205 bases of 3110768607 (47.849%) in intersection
+
+    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
+	hg38 canFam4) > rbest.log 2>&1 &
+    # real    310m32.196s
+
     cat fb.hg38.chainRBest.CanFam4.txt
-    # 2693876207 bases of 3095998939 (87.012%) in intersection
+    # 1425406620 bases of 3110768607 (45.822%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/canFam4/bed/blastz.hg38.swap
     cd /hive/data/genomes/canFam4/bed/blastz.hg38.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
-      /hive/data/genomes/hg38/bed/lastzCanFam4.2019-11-20/DEF \
+      /hive/data/genomes/hg38/bed/lastzCanFam4.2020-04-02/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
-          -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+          -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
-    #  real    63m46.473s
+    #  real    99m10.990s
 
     cat fb.canFam4.chainHg38Link.txt
-    # 2738870921 bases of 2999027915 (91.325%) in intersection
+    # 1493209286 bases of 2481941580 (60.163%) in intersection
     cat fb.canFam4.chainSynHg38Link.txt
-    # 2728591501 bases of 2999027915 (90.983%) in intersection
+    # 1448164376 bases of 2481941580 (58.348%) in intersection
+
+    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
+	canFam4 hg38) > rbest.log 2>&1 &
+    # real    257m59.713s
 
-    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` canFam4 hg38) \
-       > rbest.log 2>&1
-XXX - running - Tue Nov 26 11:55:51 PST 2019
-    # real    125m35.459s
+    cat fb.canFam4.chainRBest.Hg38.txt
+    # 1425296830 bases of 2481941580 (57.427%) in intersection
 
 ###########################################################################
-# lastz/chain/net swap mouse/mm10 (TBD - 2019-11-21 - Hiram)
+# lastz/chain/net swap mouse/mm10 (DONE - 2020-04-20 - Hiram)
 
     # original alignment
-    cd /hive/data/genomes/mm10/bed/lastzCanFam4.2019-11-20
     cat fb.mm10.chainCanFam4Link.txt
-    #	929953885 bases of 2652783500 (35.056%) in intersection
+    #	777883731 bases of 2652783500 (29.323%) in intersection
     cat fb.mm10.chainSynCanFam4Link.txt
-    #   882047357 bases of 2652783500 (33.250%) in intersection
+    #   736602602 bases of 2652783500 (27.767%) in intersection
+
+    time (doRecipBest.pl -load -workhorse=hgwdev mm10 canFam4 \
+      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
+    #	real    219m16.168s
+
     cat fb.mm10.chainRBest.CanFam4.txt
-    # 885135149 bases of 2652783500 (33.366%) in intersection
+    # 741307883 bases of 2652783500 (27.945%) in intersection
 
     mkdir /hive/data/genomes/canFam4/bed/blastz.mm10.swap
     cd /hive/data/genomes/canFam4/bed/blastz.mm10.swap
     time (doBlastzChainNet.pl -verbose=2 \
-	/hive/data/genomes/mm10/bed/lastzCanFam4.2019-11-20/DEF \
+	/hive/data/genomes/mm10/bed/lastzCanFam4.2020-04-02/DEF \
 	-swap -syntenicNet \
-	-workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
-	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
-    #	real    72m34.088s
+	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
+	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
+    #	real    50m20.639s
 
     cat fb.canFam4.chainMm10Link.txt
-    #	1017872526 bases of 2999027915 (33.940%) in intersection
+    #	772902855 bases of 2481941580 (31.141%) in intersection
     cat fb.canFam4.chainSynMm10Link.txt
-    #    880983055 bases of 2999027915 (29.376%) in intersection
+    #   737924732 bases of 2481941580 (29.732%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev canFam4 mm10 \
       -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
-    # real    237m38.959s
+    # real    173m38.016s
 
     cat fb.canFam4.chainRBest.Mm10.txt
-    # 883663662 bases of 2999027915 (29.465%) in intersection
+    # 740357755 bases of 2481941580 (29.830%) in intersection
 
 ##############################################################################
-# GENBANK AUTO UPDATE (TBD - 2019-11-20 - Hiram)
+# GENBANK AUTO UPDATE (DONE - 2020-04-09 - Hiram)
     ssh hgwdev
     cd $HOME/kent/src/hg/makeDb/genbank
     git pull
     # /cluster/data/genbank/data/organism.lst shows:
     # organism       mrnaCnt estCnt  refSeqCnt
-    # Gorilla 1       0       0
-    # Gorilla gorilla 617     30      95
-    # Gorilla gorilla gorilla 4       0       0
+    # Canis latrans   2       0       0
+    # Canis lupus     36      0       0
+    # Canis lupus familiaris  3351    382644  1718
+    # Canis lupus laniger     2       0       0
+    # Canis lupus lupus       2       0       0
+    # Canis mesomelas 1       0       0
+    # Canis sp.       45      0       0
 
-    # that single 'Gorilla' name is a new one, adding that to
-    # the list of Gorilla names in src/lib/gbGenome.c
+    # the latrans is the Coyota, the mesomelas
+    # is the Black-backed jackal from Africa and the langier is the Tibetan wolf
+    # lupus lupus is the Eurasian wolf
 
-    # edit etc/genbank.conf to add canFam4 just before galGal5
+    # edit etc/genbank.conf to add canFam4 just after canFam3
 
-# Gorilla - genbank assembly: GCA_011100685.1
+# canFam4 (German shepard - GCA_011100685.1 - UU_Cfam_GSD_1.0)
 canFam4.serverGenome = /hive/data/genomes/canFam4/canFam4.2bit
 canFam4.ooc = /hive/data/genomes/canFam4/jkStuff/canFam4.11.ooc
 canFam4.lift = /hive/data/genomes/canFam4/jkStuff/canFam4.nonBridged.lft
-canFam4.perChromTables = no
-canFam4.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
-canFam4.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
-canFam4.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
-canFam4.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
-canFam4.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
-canFam4.genbank.est.xeno.pslCDnaFilter    = ${ordered.genbank.est.xeno.pslCDnaFilter}
+canFam4.align.unplacedChroms = chrUn_*
+canFam4.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
+canFam4.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
+canFam4.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
+canFam4.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
+canFam4.genbank.est.native.pslCDnaFilter  = ${finished.genbank.est.native.pslCDnaFilter}
+canFam4.refseq.mrna.native.load = yes
+canFam4.refseq.mrna.xeno.load = yes
+# DO NOT NEED genbank.mrna.xeno except for human, mouse
+canFam4.genbank.mrna.xeno.load = yes
 canFam4.downloadDir = canFam4
-# default yes refseq.mrna.native refseq.mrna.xeno genbank.mrna.native
-# default yes genbank.est.native
-# default no genbank.mrna.xeno genbank.est.xeno
+canFam4.upstreamGeneTbl = refGene
+canFam4.perChromTables = no
 
     # verify the files specified exist before checking in the file:
   grep ^canFam4 etc/genbank.conf | grep hive | awk '{print $NF}' | xargs ls -og
-# -rw-rw-r-- 1 792944027 Nov 20 10:59 /hive/data/genomes/canFam4/canFam4.2bit
-# -rw-rw-r-- 1    156876 Nov 20 11:06 /hive/data/genomes/canFam4/jkStuff/canFam4.11.ooc
-# -rw-rw-r-- 1    333597 Nov 20 11:08 /hive/data/genomes/canFam4/jkStuff/canFam4.nonBridged.lft
+# -rw-rw-r-- 1 651703337 Apr  2 08:57 /hive/data/genomes/canFam4/canFam4.2bit
+# -rw-rw-r-- 1    138880 Apr  2 09:51 /hive/data/genomes/canFam4/jkStuff/canFam4.11.ooc
+# -rw-rw-r-- 1    139818 Apr  2 09:56 /hive/data/genomes/canFam4/jkStuff/canFam4.nonBridged.lft
 
-    git commit -m "Added canFam4 gorilla; refs #24524" etc/genbank.conf src/lib/gbGenome.c
+    git commit -m "Added canFam4 dog; refs #25279" etc/genbank.conf
     git push
 
-    # update the binaries due to the update in lib/src/gbGenome.c
-    make install-server
-
     # update /cluster/data/genbank/:
     make etc-update
 
     # enable daily alignment and update of hgwdev
     cd ~/kent/src/hg/makeDb/genbank
     git pull
     # add canFam4 to:
     #   etc/hgwdev.dbs etc/align.dbs
-    git commit -m "Added canFam4 - gorilla refs #24524" etc/hgwdev.dbs etc/align.dbs
+    git commit -m "Added canFam4 - dog refs #25279" etc/hgwdev.dbs etc/align.dbs
     git push
     make etc-update
 
     # wait a few days for genbank magic to take place, the tracks will
     # appear
 
 #############################################################################
-# augustus gene track (TBD - 2019-11-20 - Hiram)
+# augustus gene track (DONE - 2020-04-10 - Hiram)
 
     mkdir /hive/data/genomes/canFam4/bed/augustus
     cd /hive/data/genomes/canFam4/bed/augustus
     time (doAugustus.pl -buildDir=`pwd` -bigClusterHub=ku \
         -species=human -dbHost=hgwdev \
            -workhorse=hgwdev canFam4) > do.log 2>&1
-    # real    139m55.244s
+    # real    74m39.734s
 
     cat fb.canFam4.augustusGene.txt
-    # 55005426 bases of 2999027915 (1.834%) in intersection
+    # 49999966 bases of 2481941580 (2.015%) in intersection
 
 #########################################################################
 # ncbiRefSeq (TBD - 2019-11-20 - Hiram)
+    ### XXX ### Not available on GCA/genbank assemblies
 
     mkdir /hive/data/genomes/canFam4/bed/ncbiRefSeq
     cd /hive/data/genomes/canFam4/bed/ncbiRefSeq
     # running step wise just to be careful
     time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
       -bigClusterHub=ku -dbHost=hgwdev \
       -stop=download -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
       refseq vertebrate_mammalian Gorilla_gorilla \
       GCA_008122165.1_Kamilah_GGO_v0 canFam4) > download.log 2>&1
     # real    1m37.523s
 
     time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
       -continue=process -bigClusterHub=ku -dbHost=hgwdev \
       -stop=process -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
       refseq vertebrate_mammalian Gorilla_gorilla \
@@ -901,172 +889,150 @@
 
     # XXX 2019-11-20 - ready for this after genbank runs
 
     featureBits -enrichment canFam4 refGene ncbiRefSeq 
  # refGene 0.402%, ncbiRefSeq 3.148%, both 0.402%, cover 99.90%, enrich 31.73x
     featureBits -enrichment canFam4 ncbiRefSeq refGene
  # ncbiRefSeq 3.148%, refGene 0.402%, both 0.402%, cover 12.76%, enrich 31.73x
 
     featureBits -enrichment canFam4 ncbiRefSeqCurated refGene
  # ncbiRefSeqCurated 0.401%, refGene 0.402%, both 0.400%, cover 99.66%, enrich 247.79x
 
     featureBits -enrichment canFam4 refGene ncbiRefSeqCurated
  # refGene 0.402%, ncbiRefSeqCurated 0.401%, both 0.400%, cover 99.33%, enrich 247.79x
 
 #########################################################################
-# LIFTOVER TO gorGor5 (TBD - 2019-11-20 - Hiram)
-    ssh hgwdev
-    mkdir /hive/data/genomes/canFam4/bed/blat.gorGor5.2019-11-20
-    cd /hive/data/genomes/canFam4/bed/blat.gorGor5.2019-11-20
-    doSameSpeciesLiftOver.pl -verbose=2 \
-        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
-        -ooc=/hive/data/genomes/canFam4/jkStuff/canFam4.11.ooc \
-         canFam4 gorGor5
-    time (doSameSpeciesLiftOver.pl -verbose=2 \
-        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
-        -ooc=/hive/data/genomes/canFam4/jkStuff/canFam4.11.ooc \
-         canFam4 gorGor5) > doLiftOverToGorGor5.log 2>&1
-    # real    936m35.524s
-
-    # see if the liftOver menus function in the browser from canFam4 to gorGor5
-
-#########################################################################
-# LIFTOVER TO gorGor4 (TBD - 2019-11-20 - Hiram)
+# LIFTOVER TO canFam3 (DONE - 2020-04-02 - Hiram)
     ssh hgwdev
-    mkdir /hive/data/genomes/canFam4/bed/blat.gorGor4.2019-11-20
-    cd /hive/data/genomes/canFam4/bed/blat.gorGor4.2019-11-20
+    mkdir /hive/data/genomes/canFam4/bed/blat.canFam3.2020-04-02
+    cd /hive/data/genomes/canFam4/bed/blat.canFam3.2020-04-02
     doSameSpeciesLiftOver.pl -verbose=2 \
         -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -ooc=/hive/data/genomes/canFam4/jkStuff/canFam4.11.ooc \
-         canFam4 gorGor4
+         canFam4 canFam3
     time (doSameSpeciesLiftOver.pl -verbose=2 \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -ooc=/hive/data/genomes/canFam4/jkStuff/canFam4.11.ooc \
-         canFam4 gorGor4) > doLiftOverToGorGor4.log 2>&1
-    # real    654m46.645s
+         canFam4 canFam3) > doLiftOverToCanFam3.log 2>&1
+    # real    1100m17.743s
 
-    # see if the liftOver menus function in the browser from canFam4 to gorGor4
+    # see if the liftOver menus function in the browser from canFam4 to canFam3
 
 #########################################################################
-#  BLATSERVERS ENTRY (TBD - 2019-11-20 - Hiram)
+#  BLATSERVERS ENTRY (DONE - 2020-04-02 - Hiram)
 #	After getting a blat server assigned by the Blat Server Gods,
     ssh hgwdev
 
     hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
-	VALUES ("canFam4", "blat1c", "17914", "1", "0"); \
+	VALUES ("canFam4", "blat1b", "17904", "1", "0"); \
 	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
-	VALUES ("canFam4", "blat1c", "17915", "0", "1");' \
+	VALUES ("canFam4", "blat1b", "17905", "0", "1");' \
 	    hgcentraltest
     #	test it with some sequence
 
 ############################################################################
-## reset default position similar to gorGor5 found via blat of NR_046473.1 mRNA
-##  (TBD - 2019-11-20 - Hiram)
+## reset default position similar to canFam3 found via blat
+##	of NM_001003070.1 mRNA
+##  (DONE - 2020-04-02 - Hiram)
 
-    # as found from the galGal5 to canFam4 liftOver
     ssh hgwdev
-    hgsql -e 'update dbDb set defaultPos="chr14:81559118-81601404"
+    hgsql -e 'update dbDb set defaultPos="chr14:7969766-7997673"
 	where name="canFam4";' hgcentraltest
 
 ##############################################################################
-# crispr whole genome (TBD - 2019-11-20 - Hiram)
+# crispr whole genome (DONE - 2020-04-09 - Hiram)
     mkdir /hive/data/genomes/canFam4/bed/crisprAll
     cd /hive/data/genomes/canFam4/bed/crisprAll
 
     # the large shoulder argument will cause the entire genome to be scanned
     # this takes a while for a new genome to get the bwa indexing done
     time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 -stop=ranges \
-    canFam4 ncbiRefSeq -shoulder=250000000 -tableName=crisprAll -fileServer=hgwdev \
+    canFam4 genscan -shoulder=250000000 -tableName=crisprAll \
+    -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev) > ranges.log 2>&1
-    # real    72m58.740s
+    # real    1m16.539s
 
     time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
-       -continue=guides -stop=specScores canFam4 ncbiRefSeq \
+       -continue=guides -stop=specScores canFam4 genscan \
 	-shoulder=250000000 -tableName=crisprAll -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev) > specScores.log 2>&1
-    # real    8m40.172s
+    # real    6558m26.295s
 
     cat guides/run.time | sed -e 's/^/# /;'
 # Completed: 100 of 100 jobs
-# CPU time in finished jobs:      12309s     205.15m     3.42h    0.14d  0.000 y
-# IO & Wait Time:                   290s       4.83m     0.08h    0.00d  0.000 y
-# Average job time:                 126s       2.10m     0.03h    0.00d
-# Longest finished job:             380s       6.33m     0.11h    0.00d
-# Submission to last job:           386s       6.43m     0.11h    0.00d
+# CPU time in finished jobs:      11979s     199.66m     3.33h    0.14d  0.000 y
+# IO & Wait Time:                   251s       4.18m     0.07h    0.00d  0.000 y
+# Average job time:                 122s       2.04m     0.03h    0.00d
+# Longest finished job:             289s       4.82m     0.08h    0.00d
+# Submission to last job:           303s       5.05m     0.08h    0.00d
 
     cat specScores/run.time | sed -e 's/^/# /;'
-# Completed: 3041114 of 3041114 jobs
-# CPU time in finished jobs:  282305886s 4705098.10m 78418.30h 3267.43d  8.952 y
-# IO & Wait Time:              84009113s 1400151.88m 23335.86h  972.33d  2.664 y
-# Average job time:                 120s       2.01m     0.03h    0.00d
-# Longest finished job:             498s       8.30m     0.14h    0.01d
-# Submission to last job:        381920s    6365.33m   106.09h    4.42d
-
-Submission to last job:        274925s    4582.08m    76.37h    3.18d
-
-# Number of specScores: 227564780
+# Completed: 3096565 of 3096565 jobs
+# CPU time in finished jobs:  263946983s 4399116.38m 73318.61h 3054.94d  8.370 y
+# IO & Wait Time:              17766691s  296111.52m  4935.19h  205.63d  0.563 y
+# Average job time:                  91s       1.52m     0.03h    0.00d
+# Longest finished job:             851s      14.18m     0.24h    0.01d
+# Submission to last job:        324649s    5410.82m    90.18h    3.76d
 
-# real    7482m37.507s
-# user    0m2.047s
-# sys     0m2.110s
+# # Number of specScores: 233102255
 
     ### remember to get back to hgwdev to run this
     time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
-       -continue=effScores -stop=load canFam4 ncbiRefSeq \
+       -continue=effScores -stop=load canFam4 genscan \
     -shoulder=250000000 -tableName=crisprAll -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev) > load.log 2>&1
-    # real    1081m16.460s
+    #  real    932m13.229s
 
     cat effScores/run.time | sed -e 's/^/# /;'
-# Completed: 27933 of 27933 jobs
-# CPU time in finished jobs:   13825593s  230426.55m  3840.44h  160.02d  0.438 y
-# IO & Wait Time:                172582s    2876.37m    47.94h    2.00d  0.005 y
-# Average job time:                 501s       8.35m     0.14h    0.01d
-# Longest finished job:           20199s     336.65m     5.61h    0.23d
-# Submission to last job:         22274s     371.23m     6.19h    0.26d
+# Completed: 25662 of 25662 jobs
+# CPU time in finished jobs:   12763858s  212730.96m  3545.52h  147.73d  0.405 y
+# IO & Wait Time:                144123s    2402.05m    40.03h    1.67d  0.005 y
+# Average job time:                 503s       8.38m     0.14h    0.01d
+# Longest finished job:            4091s      68.18m     1.14h    0.05d
+# Submission to last job:         15067s     251.12m     4.19h    0.17d
 
     cat offTargets/run.time | sed -e 's/^/# /;'
-# Completed: 152056 of 152056 jobs
-# CPU time in finished jobs:    2009038s   33483.97m   558.07h   23.25d  0.064 y
-# IO & Wait Time:               2321685s   38694.75m   644.91h   26.87d  0.074 y
-# Average job time:                  28s       0.47m     0.01h    0.00d
-# Longest finished job:              53s       0.88m     0.01h    0.00d
-# Submission to last job:          4266s      71.10m     1.19h    0.05d
+# Completed: 154829 of 154829 jobs
+# CPU time in finished jobs:    1805712s   30095.20m   501.59h   20.90d  0.057 y
+# IO & Wait Time:               3128264s   52137.73m   868.96h   36.21d  0.099 y
+# Average job time:                  32s       0.53m     0.01h    0.00d
+# Longest finished job:             273s       4.55m     0.08h    0.00d
+# Submission to last job:          5337s      88.95m     1.48h    0.06d
 
 #########################################################################
 # all.joiner update, downloads and in pushQ - (WORKING - 2019-11-20 - Hiram)
     cd $HOME/kent/src/hg/makeDb/schema
     # verify all the business is done for release
     ~/kent/src/hg/utils/automation/verifyBrowser.pl canFam4
 
+XXX - wait for genbank to be loaded
     # fixup all.joiner until this is a clean output
     joinerCheck -database=canFam4 -tableCoverage all.joiner
     joinerCheck -database=canFam4 -times all.joiner
     joinerCheck -database=canFam4 -keys all.joiner
 
     # when clean, check in:
-    git commit -m 'adding rules for canFam4 refs #24524' all.joiner
+    git commit -m 'adding rules for canFam4 refs #25279' all.joiner
     git push
     # run up a 'make alpha' in hg/hgTables to get this all.joiner file
     # into the hgwdev/genome-test system
 
     cd /hive/data/genomes/canFam4
     time (makeDownloads.pl canFam4) > downloads.log 2>&1
-XXX - running - Wed Nov 27 15:54:09 PST 2019
     #  real    17m47.024s
 
     #   now ready for pushQ entry
     mkdir /hive/data/genomes/canFam4/pushQ
     cd /hive/data/genomes/canFam4/pushQ
  time ($HOME/kent/src/hg/utils/automation/makePushQSql.pl -redmineList canFam4) > canFam4.pushQ.sql 2> stderr.out
     # real    15m52.548s
 
     # remove the tandemDups and gapOverlap from the file list:
     sed -i -e "/tandemDups/d" redmine.canFam4.table.list
     sed -i -e "/Tandem Dups/d" redmine.canFam4.releaseLog.txt
     sed -i -e "/gapOverlap/d" redmine.canFam4.table.list
     sed -i -e "/Gap Overlaps/d" redmine.canFam4.releaseLog.txt
 
     #   check for errors in stderr.out, some are OK, e.g.: