980a640fde4cdcd5aa34f24de2f71e51e3d0fe61
hiram
  Fri Feb 25 10:50:52 2022 -0800
stalled on cpgIslands refs #23367

diff --git src/hg/makeDb/doc/ambMex2/initialBuild.txt src/hg/makeDb/doc/ambMex2/initialBuild.txt
index 13ce20c..ca0a7d3 100644
--- src/hg/makeDb/doc/ambMex2/initialBuild.txt
+++ src/hg/makeDb/doc/ambMex2/initialBuild.txt
@@ -59,31 +59,31 @@
 ## GenBank Unit Accession       RefSeq Unit Accession   Assembly-Unit name
 ## GCA_002915645.2              Primary Assembly
 
 # check assembly size for later reference:
 
 faSize G*v2_genomic.fna.gz
 # 32396370977 bases (4029676509 N's 28366694468 real 28365740082 upper
 #	954386 lower) in 98070 sequences in 1 files
 # Total size: mean 330339.3 sd 20104120.1 min 1033 (PGSH01113832.1)
 #	max 2030161756 (CM010939.1) median 40921
 # %0.00 masked total, %0.00 masked real
 
 #    real    6m32.968s
 
 #############################################################################
-# establish config.ra file (TBD - Hiram - 2018-10-11)
+# establish config.ra file (DONE - Hiram - 2019-04-09)
     cd /hive/data/genomes/ambMex2
     ~/kent/src/hg/utils/automation/prepConfig.pl ambMex2 vertebrate axolotl \
        genbank/*_assembly_report.txt > ambMex2.config.ra
 
     # compare with previous version to see if it is sane:
     diff ambMex2.config.ra ../ambMex1/ambMex1.config.ra
 
     # verify it really does look sane
     cat ambMex2.config.ra
 # config parameters for makeGenomeDb.pl:
 db ambMex2
 clade vertebrate
 # genomeCladePriority 70
 scientificName Ambystoma mexicanum
 commonName Axolotl
@@ -96,31 +96,31 @@
 fastaFiles /hive/data/genomes/ambMex2/ucsc/*.fa.gz
 agpFiles /hive/data/genomes/ambMex2/ucsc/*.agp
 # qualFiles none
 dbDbSpeciesDir axolotl
 photoCreditURL  https://www.flickr.com/people/35871148@N04
 photoCreditName Ruben Undheim/Flickr
 ncbiGenomeId 381
 ncbiAssemblyId 2130471
 ncbiAssemblyName ASM291563v2
 ncbiBioProject 378970
 ncbiBioSample SAMN06554622
 genBankAccessionID GCA_002915635.2
 taxId 8296
 
 #############################################################################
-# setup UCSC named files (TBD - 2018-10-11 - Hiram)
+# setup UCSC named files (DONE - 2019-03-26 - Hiram)
 
     mkdir /hive/data/genomes/ambMex2/ucsc
     cd /hive/data/genomes/ambMex2/ucsc
 
     # check for duplicate sequences:
     time faToTwoBit -long -noMask ../genbank/G*v2_genomic.fna.gz genbank.2bit
     #  real    7m9.731s
 
     time twoBitDup genbank.2bit
     # real    2m3.641s
 
     # no output is a good result, otherwise, would have to eliminate duplicates
     # the scripts creating the fasta here will be using this refseq.2bit file
     # remove it later
 
@@ -617,35 +617,37 @@
         -dbHost=hgwdev ambMex2) > do.log 2>&1
     # real    1747m17.123s
 
     # Masking statistics
     cat faSize.ambMex2.cleanWMSdust.txt
 # 32396387346 bases (4029676509 N's 28366710837 real 703 upper 28366710134
 #	lower) in 98071 sequences in 1 files
 # Total size: mean 330336.1 sd 20104017.6 min 1033 (chrUn_PGSH01113832v1)
 #	max 2030161756 (chr7) median 40920
 # %87.56 masked total, %100.00 masked real
 
     cat fb.ambMex2.rmsk.windowmaskerSdust.txt
     # 18368939458 bases of 32396387346 (56.701%) in intersection
 
 ##########################################################################
-# cpgIslands - (TBD - 2018-10-11 - Hiram)
+# cpgIslands - (WORKING - 2018-10-11 - Hiram)
     mkdir /hive/data/genomes/ambMex2/bed/cpgIslands
     cd /hive/data/genomes/ambMex2/bed/cpgIslands
     time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev -smallClusterHub=ku ambMex2) > do.log 2>&1
+XXX - running last18 manually on hgwdev 2020-12-11 - Hiram
+something is too large MALLOC failure reqesting -2147483648 bytes - aborting
     # real    2m5.105s
 
     cat fb.ambMex2.cpgIslandExt.txt
     # 16395346 bases of 1055588482 (1.553%) in intersection
 
 ##############################################################################
 # genscan - (DONE - 2020-08-17 - Hiram)
 XXX - waiting for ku to return after power fails - Mon Aug 17 12:11:48 PDT 2020
     mkdir /hive/data/genomes/ambMex2/bed/genscan
     cd /hive/data/genomes/ambMex2/bed/genscan
     time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
       -bigClusterHub=ku ambMex2) > do.log 2>&1
     # real    88m34.900s
 
     cat fb.ambMex2.genscan.txt