8ee6e38f07d899be11e5bd15b166b251cb5420b6
hiram
  Thu Nov 28 10:32:53 2019 -0800
waiting for genbank run to complete refs #24568

diff --git src/hg/makeDb/doc/regenCho1/initialBuild.txt src/hg/makeDb/doc/regenCho1/initialBuild.txt
index 8c2cc5b..2976ccd 100644
--- src/hg/makeDb/doc/regenCho1/initialBuild.txt
+++ src/hg/makeDb/doc/regenCho1/initialBuild.txt
@@ -656,38 +656,46 @@
     mkdir /hive/data/genomes/regenCho1/bed/cpgIslands
     cd /hive/data/genomes/regenCho1/bed/cpgIslands
     time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev -smallClusterHub=ku regenCho1) > do.log 2>&1
     # real    3m34.486s
 
     cat fb.regenCho1.cpgIslandExt.txt
     # 11992730 bases of 2266312740 (0.529%) in intersection
 
 ##############################################################################
 # genscan - (DONE - 2019-11-26 - Hiram)
     mkdir /hive/data/genomes/regenCho1/bed/genscan
     cd /hive/data/genomes/regenCho1/bed/genscan
     time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
       -bigClusterHub=ku regenCho1) > do.log 2>&1
-XXX - running - Tue Nov 26 10:15:46 PST 2019
     # real    126m0.077s
 
+    # three jobs failed on the ku run, finished on hgwdev manually:
+# ./runGsBig.2M.csh ss1415 000 gtf/000/ss1415.gtf pep/000/ss1415.pep subopt/000/ss1415.bed
+# ./runGsBig.2M.csh ss100006 000 gtf/000/ss100006.gtf pep/000/ss100006.pep subopt/000/ss100006.bed
+# ./runGsBig.2M.csh ss5358 000 gtf/000/ss5358.gtf pep/000/ss5358.pep subopt/000/ss5358.bed
+
+    time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
+      -continue=makeBed -bigClusterHub=ku regenCho1) > makeBed.log 2>&1
+    # real    1m14.506s
+
     cat fb.regenCho1.genscan.txt
-    # 54712419 bases of 2534810853 (2.158%) in intersection
+    # 55358798 bases of 2266312740 (2.443%) in intersection
 
     cat fb.regenCho1.genscanSubopt.txt
-    # 56830306 bases of 2534810853 (2.242%) in intersection
+    # 58714924 bases of 2266312740 (2.591%) in intersection
 
 #########################################################################
 # Create kluster run files (TBD - 2019-06-29 - Hiram)
 
     # numerator is regenCho1 gapless bases "real" as reported by:
     featureBits -noRandom -noHap regenCho1 gap
     # 265206282 bases of 2266312740 (11.702%) in intersection
 
     #                       ^^^
     # denominator is hg19 gapless bases as reported by:
     #   featureBits -noRandom -noHap hg19 gap
     #     234344806 bases of 2861349177 (8.190%) in intersection
     # 1024 is threshold used for human -repMatch:
     calc \( 2266312740 / 2861349177 \) \* 1024
     #  ( 2266312740 / 2861349177 ) * 1024 = 811.052445
@@ -704,95 +712,97 @@
 
     #   check non-bridged gaps to see what the typical size is:
     hgsql -N \
         -e 'select * from gap where bridge="no" order by size;' regenCho1 \
         | sort -k7,7nr | ave -col=7 stdin
     # min 52599.000000
     # max 165458.000000
 
     gapToLift -verbose=2 -minGap=50000 regenCho1 jkStuff/nonBridged.lift \
         -bedFile=jkStuff/nonBridged.bed
     wc -l jkStuff/nonBri*
     #	7832 jkStuff/nonBridged.bed
     #	7832 jkStuff/nonBridged.lift
 
 ########################################################################
-# lastz/chain/net swap human/hg38 (TBD - 2019-11-25 - Hiram)
+# lastz/chain/net swap human/hg38 (DONE - 2019-11-26 - Hiram)
     # original alignment
-    cd /hive/data/genomes/hg38/bed/lastzRegenCho1.2019-11-25
+    cd /hive/data/genomes/hg38/bed/lastzRegenCho1.2019-11-26
 
     cat fb.hg38.chainRegenCho1Link.txt
-    # 154079940 bases of 3095998939 (4.977%) in intersection
+    # 979733899 bases of 3095998939 (31.645%) in intersection
     cat fb.hg38.chainSynRegenCho1Link.txt
-    # 95877644 bases of 3095998939 (3.097%) in intersection
+    # 917104031 bases of 3095998939 (29.622%) in intersection
     cat fb.hg38.chainRBest.RegenCho1.txt
-    # 106665747 bases of 3095998939 (3.445%) in intersection
+    # 901006295 bases of 3095998939 (29.102%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/regenCho1/bed/blastz.hg38.swap
     cd /hive/data/genomes/regenCho1/bed/blastz.hg38.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
-      /hive/data/genomes/hg38/bed/lastzRegenCho1.2019-11-25/DEF \
-        -swap -chainMinScore=5000 -chainLinearGap=loose \
+      /hive/data/genomes/hg38/bed/lastzRegenCho1.2019-11-26/DEF \
+        -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-            -syntenicNet) > swap.log 2>&1
-    #  real    9m45.514s
+            -noDbNameCheck -syntenicNet) > swap.log 2>&1
+    #  real    79m18.904s
 
     cat fb.regenCho1.chainHg38Link.txt
-    # 120955955 bases of 1055588482 (11.459%) in intersection
-
+    # 956720146 bases of 2266312740 (42.215%) in intersection
     cat fb.regenCho1.chainSynHg38Link.txt
-    # 92597630 bases of 1055588482 (8.772%) in intersection
+    # 895755077 bases of 2266312740 (39.525%) in intersection
 
-    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` regenCho1 hg38) > rbest.log 2>&1 &
-    # real    139m24.408s
+    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
+	regenCho1 hg38) > rbest.log 2>&1 &
+    # real    289m24.440s
 
     cat fb.regenCho1.chainRBest.Hg38.txt
-    # 106294585 bases of 1055588482 (10.070%) in intersection
+    # 902782523 bases of 2266312740 (39.835%) in intersection
 
 #########################################################################
-# lastz/chain/net swap mouse/mm10 (TBD - 2019-11-25 - Hiram)
+# lastz/chain/net swap mouse/mm10 (DONE - 2019-11-26 - Hiram)
 
     # original alignment
-    cd /hive/data/genomes/mm10/bed/lastzRegenCho1.2019-11-25
+    cd /hive/data/genomes/mm10/bed/lastzRegenCho1.2019-11-26
+
     cat fb.mm10.chainRegenCho1Link.txt
-    # 101151132 bases of 2652783500 (3.813%) in intersection
+    #	1525566783 bases of 2652783500 (57.508%) in intersection
     cat fb.mm10.chainSynRegenCho1Link.txt
-    # 70707720 bases of 2652783500 (2.665%) in intersection
+    #   1410851403 bases of 2652783500 (53.184%) in intersection
     cat fb.mm10.chainRBest.RegenCho1.txt
-    # 79649474 bases of 2652783500 (3.002%) in intersection
+    # 1395524606 bases of 2652783500 (52.606%) in intersection
 
-    # and for the swap:
     mkdir /hive/data/genomes/regenCho1/bed/blastz.mm10.swap
     cd /hive/data/genomes/regenCho1/bed/blastz.mm10.swap
-
     time (doBlastzChainNet.pl -verbose=2 \
-      /hive/data/genomes/mm10/bed/lastzRegenCho1.2019-11-25/DEF \
-        -swap -chainMinScore=5000 -chainLinearGap=loose \
+	/hive/data/genomes/mm10/bed/lastzRegenCho1.2019-11-26/DEF \
+	-noDbNameCheck -swap -syntenicNet \
 	-workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
-            -syntenicNet) > swap.log 2>&1
-    #  real    6m41.043s
+	-chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1 &
+    #	real    101m20.296s
 
     cat fb.regenCho1.chainMm10Link.txt
-    # 88539346 bases of 1055588482 (8.388%) in intersection
+    #	1522181082 bases of 2266312740 (67.166%) in intersection
+    cat fb.regenCho1.chainSynMm10Link.txt
+    #   1397889394 bases of 2266312740 (61.681%) in intersection
 
-    time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` regenCho1 mm10) > rbest.log 2>&1 &
-    # real    94m11.007s
+    time (doRecipBest.pl -load -workhorse=hgwdev regenCho1 mm10 \
+      -buildDir=`pwd` -workhorse=hgwdev) > rbest.log 2>&1 &
+    # real    660m29.571s
 
     cat fb.regenCho1.chainRBest.Mm10.txt
-    # 79474812 bases of 1055588482 (7.529%) in intersection
+    # 1396267649 bases of 2266312740 (61.610%) in intersection
 
 ##############################################################################
 # GENBANK AUTO UPDATE (DONE - 2019-11-26 - Hiram)
     ssh hgwdev
     cd $HOME/kent/src/hg/makeDb/genbank
     git pull
     # /cluster/data/genbank/data/organism.lst shows:
     # #organism             mrnaCnt   estCnt  refSeqCnt
     # Cricetulus barabensis	34	2	0
     # Cricetulus griseus	90146	12	344
     # Cricetulus longicaudatus	58	0	0
     # Cricetulus migratorius	18	0	0
     # Cricetulus sp.	36	0	0
 
     # edit etc/genbank.conf to add regenCho1 just before criGriChoV2
@@ -828,41 +838,40 @@
     # enable daily alignment and update of hgwdev
     cd ~/kent/src/hg/makeDb/genbank
     git pull
     # add regenCho1 to:
     #   etc/align.dbs etc/hgwdev.dbs
     git add etc/align.dbs etc/hgwdev.dbs
     git commit -m "Added regenCho1 - Regeneron CHO refs #24568" etc/hgwdev.dbs \
 	etc/align.dbs
     git push
     make etc-update
 
     # wait a few days for genbank magic to take place, the tracks will
     # appear
 
 #############################################################################
-# augustus gene track (TBD - 2019-06-29 - Hiram)
+# augustus gene track (DONE - 2019-11-26 - Hiram)
     mkdir /hive/data/genomes/regenCho1/bed/augustus
     cd /hive/data/genomes/regenCho1/bed/augustus
     time (doAugustus.pl -buildDir=`pwd` -bigClusterHub=ku \
         -species=human -dbHost=hgwdev \
            -workhorse=hgwdev regenCho1) > do.log 2>&1
-XXX - running - Tue Nov 26 10:15:46 PST 2019
-    # real    194m56.414s
+    # real    219m51.368s
 
     cat fb.regenCho1.augustusGene.txt
-    # 48867584 bases of 2534810853 (1.928%) in intersection
+    # 50452718 bases of 2266312740 (2.226%) in intersection
 
 #########################################################################
 # ncbiRefSeq (TBD - 2019-11-25 - Hiram)
 
     mkdir /hive/data/genomes/regenCho1/bed/ncbiRefSeq
     cd /hive/data/genomes/regenCho1/bed/ncbiRefSeq
     # running step wise just to be careful
     time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
       -bigClusterHub=ku -dbHost=hgwdev \
       -stop=download -fileServer=hgwdev -smallClusterHub=ku -workhorse=hgwdev \
       refseq vertebrate_other Gallus_gallus \
       GCF_000002315.5_GRCg6a regenCho1) > download.log 2>&1
     # real    1m19.029s
 
     time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
@@ -906,39 +915,61 @@
  # refGene 1.374%, ncbiRefSeqCurated 1.368%, both 1.364%, cover 99.32%, enrich 72.59x
 
 #########################################################################
 # LIFTOVER TO criGriChoV2 (DONE - 2019-11-26 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/regenCho1/bed/blat.criGriChoV2.2019-11-26
     cd /hive/data/genomes/regenCho1/bed/blat.criGriChoV2.2019-11-26
     doSameSpeciesLiftOver.pl -verbose=2 \
         -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -ooc=/hive/data/genomes/regenCho1/jkStuff/regenCho1.11.ooc \
          regenCho1 criGriChoV2
     time (doSameSpeciesLiftOver.pl -verbose=2 \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -ooc=/hive/data/genomes/regenCho1/jkStuff/regenCho1.11.ooc \
          regenCho1 criGriChoV2) > doLiftOverToRn6.log 2>&1
-XXX - running - Tue Nov 26 10:21:21 PST 2019
-    # about 3 hours 20 minutes
+    # real    523m38.199s
 
-    # see if the liftOver menus function in the browser from regenCho1 to galGal5
+    # see if the liftOver menus function in the browser from regenCho1
+    # to criGriChoV2
 
     # would like to see this as a track:
-    time chainToPsl regenCho1ToRn6.over.chain.gz ../../chrom.sizes \
-   /hive/data/genomes/rn6/chrom.sizes ../../regenCho1.2bit \
-   /hive/data/genomes/rn6/regenCho1.2bit regenCho1ToRn6.psl
+    # not actually using this psl file
+    time chainToPsl regenCho1ToCriGriChoV2.over.chain.gz ../../chrom.sizes \
+   /hive/data/genomes/criGriChoV2/chrom.sizes ../../regenCho1.2bit \
+   /hive/data/genomes/criGriChoV2/criGriChoV2.2bit regenCho1ToCriGriChoV2.psl
+
+    # this net track is loaded
+chainSort regenCho1ToCriGriChoV2.over.chain.gz stdout \
+   | chainPreNet stdin \
+   /hive/data/genomes/regenCho1/chrom.sizes \
+   /hive/data/genomes/criGriChoV2/chrom.sizes stdout \
+     | chainNet stdin -minSpace=1 /hive/data/genomes/regenCho1/chrom.sizes \
+        /hive/data/genomes/criGriChoV2/chrom.sizes stdout /dev/null \
+        | netSyntenic stdin noClass.net
+
+netClass -verbose=0 -noAr noClass.net regenCho1 criGriChoV2 regenCho1.criGriChoV2.net
+
+netFilter -minGap=10 regenCho1.criGriChoV2.net \
+    | hgLoadNet -verbose=0 regenCho1 netCriGriChoV2 stdin
+
+
+Got 7812 chroms in /hive/data/genomes/regenCho1/chrom.sizes, 8265 in /hive/data/genomes/criGriChoV2/chrom.sizes
+Finishing nets
+writing stdout
+writing /dev/null
+memory usage 168030208, utime 102 s/100, stime 9
 
 #########################################################################
 #  BLATSERVERS ENTRY (TBD - 2019-11-25 - Hiram)
 #	After getting a blat server assigned by the Blat Server Gods,
     ssh hgwdev
 
     hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("regenCho1", "blat1a", "17892", "1", "0"); \
 	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
 	VALUES ("regenCho1", "blat1a", "17893", "0", "1");' \
 	    hgcentraltest
     #	test it with some sequence
 
 ############################################################################
 ## reset default position to MEPE gene (egg shell protein)
@@ -946,31 +977,30 @@
 
     # as found from the galGal5 to regenCho1 liftOver
     ssh hgwdev
     hgsql -e 'update dbDb set defaultPos="chr4:45667017-45672928"
 	where name="regenCho1";' hgcentraltest
 
 #########################################################################
 # crispr whole genome (WORKING - 2019-07-02 - Hiram)
     mkdir /hive/data/genomes/regenCho1/bed/crisprAll
     cd /hive/data/genomes/regenCho1/bed/crisprAll
 
     # working on this script, adding the indexFa step:
     time (~/kent/src/hg/utils/automation/doCrispr.pl \
    -stop=indexFa -buildDir=`pwd` -smallClusterHub=ku regenCho1 augustusGene) \
 	> indexFa.log 2>&1
-XXX - running - Tue Jul  2 11:09:39 PDT 2019
     # real    23m26.694s
 
     # the large shoulder argument will cause the entire genome to be scanned
 ~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 -stop=ranges \
     hg19 knownGene -shoulder=250000000 -tableName=crisprAll -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev-101 -bigClusterHub=ku \
       -workhorse=hgwdev
 
 
     time (~/kent/src/hg/utils/automation/doCrispr.pl \
        -continue=ranges -stop=guides -buildDir=`pwd` -smallClusterHub=ku \
            regenCho1 ncbiRefSeq) > guides.log 2>&1
     # real    2m50.758s
 
     # adding the /dev/shm/ setup rsync for the indexed Fa