7b0c8bb2cb0e36f4b8a3335dfe755586cbdf584f
hiram
  Wed Dec 7 21:32:28 2022 -0800
liftOver per user request 4 to 9 to 4 refs #30301

diff --git src/hg/makeDb/doc/bosTau9/initialBuild.txt src/hg/makeDb/doc/bosTau9/initialBuild.txt
index 7b61206..70c5978 100644
--- src/hg/makeDb/doc/bosTau9/initialBuild.txt
+++ src/hg/makeDb/doc/bosTau9/initialBuild.txt
@@ -1,1175 +1,1200 @@
 # for emacs: -*- mode: sh; -*-
 
 # This file describes browser build for the bosTau9
 
 #########################################################################
 # reuse photograph from bosTau previous versions (DONE - hiram - 2018-11-06)
 
 mkdir /hive/data/genomes/bosTau9
 cd /hive/data/genomes/bosTau9
 cp -p ../bosTau8/photoReference.txt .
 
 
 cat photoReference.txt
 
 # photoCreditURL  http://www.genome.gov/dmd/img.cfm?node=Photos/Animals/Cow&id=79109
 # photoCreditName NHGRI press photos
 
 #########################################################################
 #  Initial steps (DONE  - 2018-11-06 - Hiram)
 
 # To start this initialBuild.txt document, from a previous assembly document:
 
 mkdir ~/kent/src/hg/makeDb/doc/bosTau9
 cd ~/kent/src/hg/makeDb/doc/bosTau9
 
 # best to use a most recent document since it has the latest features and
 # procedures:
 sed -e 's/oviAri4/bosTau9/g; s/OviAri4/BosTau9/g; s/DONE/TBD/g;' \
    ../oviAri4/initialBuild.txt > initialBuild.txt
 
 mkdir /hive/data/genomes/bosTau9/refseq
 cd /hive/data/genomes/bosTau9/refseq
 
 time (rsync --stats -L -a -P \
 rsync://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Bos_taurus/all_assembly_versions/GCF_002263795.1_ARS-UCD1.2/ ./) > fetch.log 2>&1
 
 # sent 3,528 bytes  received 3,992,632,460 bytes  52,882,595.87 bytes/sec
 # total size is 3,991,645,023  speedup is 1.00
 
 # real    1m14.770s
 
 # check assembly size for later reference:
 
 faSize G*D1.2_genomic.fna.gz
 # 2715853792 bases (28162 N's 2715825630 real 1595305255 upper
 #	1120520375 lower) in 2211 sequences in 1 files
 # Total size: mean 1228337.3 sd 10762990.9 min 1034 (NW_020192071.1)
 #	max 158534110 (NC_037328.1) median 21935
 # %41.26 masked total, %41.26 masked real
 
 # this information is from the top of
 #    bosTau9/refseq/GCF_002263795.1_ARS-UCD1.2_assembly_report.txt
 
 # Assembly name:  ARS-UCD1.2
 # Organism name:  Bos taurus (cattle)
 # Infraspecific name:  breed=Hereford
 # Isolate:  L1 Dominette 01449 registration number 42190680
 # Sex:  female
 # Taxid:          9913
 # BioSample:      SAMN03145444
 # BioProject:     PRJNA391427
 # Submitter:      USDA ARS
 # Date:           2018-4-11
 # Assembly type:  haploid
 # Release type:   major
 # Assembly level: Chromosome
 # Genome representation: full
 # WGS project:    NKLS02
 # Assembly method: Falcon v. FEB-2016
 # Expected final version: yes
 # Reference guided assembly: de-novo
 # Genome coverage: 80.0x
 # Sequencing technology: PacBio; Illumina NextSeq 500; Illumina HiSeq; Illumina GAII
 # RefSeq category: Representative Genome
 # GenBank assembly accession: GCA_002263795.2
 # RefSeq assembly accession: GCF_002263795.1
 # RefSeq assembly and GenBank assemblies identical: no
 #
 ## Assembly-Units:
 ## GenBank Unit Accession       RefSeq Unit Accession   Assembly-Unit name
 ## GCA_002263805.2      GCF_002263805.1 Primary Assembly
 ## GCA_002263815.2      GCF_000001285.1 non-nuclear
 
 #############################################################################
 # establish config.ra file (DONE - Hiram - 2018-11-06)
     # arguments here are: <db> <clade> <trackDbDir> <assembly_report.txt>
     cd /hive/data/genomes/bosTau9
     $HOME/kent/src/hg/utils/automation/prepConfig.pl bosTau9 mammal \
         cow ./refseq/*_assembly_report.txt > bosTau9.config.ra
 
     # compare to ../oviAri3 to see what might need to be fixed up:
     diff bosTau9.config.ra ../bosTau8/bosTau8.config.ra | less
     # fixup the 'commonName' from Cattle to Cow and orderKey from 3262 to 3626
 
     cat bosTau9.config.ra
 # config parameters for makeGenomeDb.pl:
 db bosTau9
 clade mammal
 genomeCladePriority 35
 scientificName Bos taurus
 commonName Cow
 assemblyDate Apr. 2018
 assemblyLabel USDA ARS
 assemblyShortLabel ARS-UCD1.2
 orderKey 3626
 # mitochondrial sequence included in refseq release
 # mitoAcc NC_006853.1
 mitoAcc none
 fastaFiles /hive/data/genomes/bosTau9/ucsc/*.fa.gz
 agpFiles /hive/data/genomes/bosTau9/ucsc/*.agp
 # qualFiles none
 dbDbSpeciesDir cow
 photoCreditURL  http://www.genome.gov/dmd/img.cfm?node=Photos/Animals/Cow&id=79109
 photoCreditName NHGRI press photos
 ncbiGenomeId 82
 ncbiAssemblyId 1677391
 ncbiAssemblyName ARS-UCD1.2
 ncbiBioProject 391427
 ncbiBioSample SAMN03145444
 genBankAccessionID GCF_002263795.1
 taxId 9913
 
 #############################################################################
 # setup UCSC named files (DONE - 2018-11-06 - Hiram)
 
     mkdir /hive/data/genomes/bosTau9/ucsc
     cd /hive/data/genomes/bosTau9/ucsc
 
     # check for duplicate sequences:
     time faToTwoBit -noMask ../refseq/G*D1.2_genomic.fna.gz refseq.2bit
     #  real    0m44.551s
 
     twoBitDup refseq.2bit
     # no output is a good result, otherwise, would have to eliminate duplicates
     # the scripts creating the fasta here will be using this refseq.2bit file
 
     time ~/kent/src/hg/utils/automation/ucscCompositeAgp.pl \
        ../refseq/G*D1.2_genomic.fna.gz \
           ../refseq/G*D1.2_assembly_structure/Primary_Assembly
 # NC_037328.1 chr1
 # NC_037329.1 chr2
 # NC_037330.1 chr3
 # NC_037331.1 chr4
 # NC_037332.1 chr5
 # NC_037333.1 chr6
 # NC_037334.1 chr7
 # NC_037335.1 chr8
 # NC_037336.1 chr9
 # NC_037337.1 chr10
 # NC_037338.1 chr11
 # NC_037339.1 chr12
 # NC_037340.1 chr13
 # NC_037341.1 chr14
 # NC_037342.1 chr15
 # NC_037343.1 chr16
 # NC_037344.1 chr17
 # NC_037345.1 chr18
 # NC_037346.1 chr19
 # NC_037347.1 chr20
 # NC_037348.1 chr21
 # NC_037349.1 chr22
 # NC_037350.1 chr23
 # NC_037351.1 chr24
 # NC_037352.1 chr25
 # NC_037353.1 chr26
 # NC_037354.1 chr27
 # NC_037355.1 chr28
 # NC_037356.1 chr29
 # NC_037357.1 chrX
 
 # real    10m47.295s
 
     # unplaced sequences
     time ~/kent/src/hg/utils/automation/unplacedWithChroms.pl \
        ../refseq/*_assembly_structure/Primary_Assembly
 # processed 2180 sequences into chrUn.fa.gz
 # real    0m27.379s
 
     # unlocalized sequences
     time ~/kent/src/hg/utils/automation/unlocalizedWithChroms.pl \
        ../refseq/*_assembly_structure/Primary_Assembly
 # No unlocalized sequences
 
     # bash syntax here
     mitoAcc=`grep "^# mitoAcc" ../bosTau9.config.ra | awk '{print $NF}'`
     printf "# mitoAcc %s\n" "$mitoAcc"
 # mitoAcc NC_006853.1
 
     zcat \
   ../refseq/*_assembly_structure/non-nuclear/assem*/AGP/chrMT.comp.agp.gz \
      | grep -v "^#" | sed -e "s/^$mitoAcc/chrM/;" > chrM.agp
 
     printf ">chrM\n" > chrM.fa
     twoBitToFa -noMask refseq.2bit:$mitoAcc stdout | grep -v "^>" >> chrM.fa
     gzip chrM.fa
 
     # verify chrM sequence is there:
     faCount chrM.fa.gz
 #seq    len     A       C       G       T       N       cpg
 chrM    16338   5457    4238    2202    4441    0       358
 
     # verify fasta and AGPs agree
     time faToTwoBit *.fa.gz test.2bit
     # real    0m58.603s
 
     time cat *.agp | checkAgpAndFa stdin test.2bit 2>&1 | tail -4
     # All AGP and FASTA entries agree - both files are valid
 
     # real    0m3.117s
 
     # and no sequence lost from orginal:
     twoBitToFa test.2bit stdout | faSize stdin
 # 2715853792 bases (28162 N's 2715825630 real 2715825630 upper 0
 #	lower) in 2211 sequences in 1 files
 # Total size: mean 1228337.3 sd 10762990.9 min 1034 (chrUn_NW_020192071v1)
 #	max 158534110 (chr1) median 21935
 
     # same numbers as above
 # 2715853792 bases (28162 N's 2715825630 real 1595305255 upper
 #	1120520375 lower) in 2211 sequences in 1 files
 # Total size: mean 1228337.3 sd 10762990.9 min 1034 (NW_020192071.1)
 #	max 158534110 (NC_037328.1) median 21935
 
     # no longer need these temporary 2bit files
     rm refseq.2bit test.2bit
 
 #############################################################################
 #  Initial database build (DONE - 2018-11-06 - Hiram)
 
     cd /hive/data/genomes/bosTau9
     # verify sequence and AGP are OK:
     time (makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \
          -stop=agp bosTau9.config.ra) > agp.log 2>&1
     # real    2m36.514s
 
     # verify there was no error in that step:
     tail agp.log
     #  *** All done!  (through the 'agp' step)
 
     # then finish it off:
     time (makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev \
        -fileServer=hgwdev -continue=db bosTau9.config.ra) > db.log 2>&1
     # real    15m27.039s
 
     # verify gaps are all there
     twoBitInfo -nBed bosTau9.unmasked.2bit stdout | awk '{print $3-$2}' \
 	| ave stdin | sed -e 's/^/# /;'
 # Q1 25.000000
 # median 25.000000
 # Q3 100.000000
 # average 72.958549
 # min 23.000000
 # max 252.000000
 # count 386
 # total 28162.000000
 # standard deviation 71.040090
 
     twoBitInfo -nBed bosTau9.unmasked.2bit stdout \
        | awk '{printf "%s\t%d\t%d\t%d\n", $1,$2,$3,$3-$2}' \
          | sort -k4,4nr | cut -f4 | uniq -c
      42 252
     116 100
       1 99
       1 85
       1 66
       1 65
       1 60
       1 44
       1 36
     220 25
       1 23
 
 
     # the gap table has nothing
     hgsql -e 'select count(*) from gap;' bosTau9
 +----------+
 | count(*) |
 +----------+
 |        0 |
 +----------+
 
     # otherwise, compare to the gap table:
     hgsql -e 'select chromEnd-chromStart from gap;' bosTau9 | ave stdin | sed -e 's/^/# /;'
 
     # check in the trackDb files created in TemporaryTrackDbCheckout/
     #    and add bosTau9 to trackDb/makefile
 
     # temporary symlink until masked sequence is available
     cd /hive/data/genomes/bosTau9
     ln -s `pwd`/bosTau9.unmasked.2bit /gbdb/bosTau9/bosTau9.2bit
 
 ##############################################################################
 # cpgIslands on UNMASKED sequence (DONE - 2018-11-06 - Hiram)
     mkdir /hive/data/genomes/bosTau9/bed/cpgIslandsUnmasked
     cd /hive/data/genomes/bosTau9/bed/cpgIslandsUnmasked
 
     time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
        -tableName=cpgIslandExtUnmasked \
           -maskedSeq=/hive/data/genomes/bosTau9/bosTau9.unmasked.2bit \
              -workhorse=hgwdev -smallClusterHub=ku bosTau9) > do.log 2>&1
     # real    3m41.462s
 
     cat fb.bosTau9.cpgIslandExtUnmasked.txt
     # 33761995 bases of 2715853792 (1.243%) in intersection
 
 #############################################################################
 # cytoBandIdeo - (DONE - 2018-11-06 - Hiram)
     mkdir /hive/data/genomes/bosTau9/bed/cytoBand
     cd /hive/data/genomes/bosTau9/bed/cytoBand
     makeCytoBandIdeo.csh bosTau9
 
 #############################################################################
 # run up idKeys files for chromAlias/ncbiRefSeq (DONE - 2018-11-06 - Hiram)
     mkdir /hive/data/genomes/bosTau9/bed/idKeys
     cd /hive/data/genomes/bosTau9/bed/idKeys
 
     time (doIdKeys.pl \
         -twoBit=/hive/data/genomes/bosTau9/bosTau9.unmasked.2bit \
         -buildDir=`pwd` bosTau9) > do.log 2>&1 &
     # real    0m47.105s
 
     cat bosTau9.keySignature.txt
     #  7850e2d5dabb6134fdc9d7083f1a3a54
 
     # one of them did not work, joinerCheck complained, remove it:
 hgsql -e 'delete from ncbiRefSeq where name="NM_001143743.1";' bosTau9
 hgsql -e 'delete from ncbiRefSeqLink where protAcc="NP_001137215.1";' bosTau9
 hgsql -e 'delete from ncbiRefSeqPsl where qName="NM_001143743.1";' bosTau9
 hgsql -e 'delete from ncbiRefSeqCurated where name="NM_001143743.1";' bosTau9
 hgsql -e 'delete from seqNcbiRefSeq where acc="NM_001143743.1";' bosTau9
 
 
 
 #############################################################################
 # gapOverlap (DONE - 2018-11-06 - Hiram)
     mkdir /hive/data/genomes/bosTau9/bed/gapOverlap
     cd /hive/data/genomes/bosTau9/bed/gapOverlap
     time (doGapOverlap.pl \
 	-twoBit=/hive/data/genomes/bosTau9/bosTau9.unmasked.2bit bosTau9 ) \
 	> do.log 2>&1
     # real    1m51.390s
 
     cat fb.bosTau9.gapOverlap.txt
     # 150 bases of 2715853792 (0.000%) in intersection
 
     # 1 items on chr13
     zcat *.bed.gz | cut -f1 | cut -d'_' -f1 | sort | uniq -c |sort -r | head -5
      1 chr13
 
 #############################################################################
 # tandemDups (DONE - 2018-11-06 - Hiram)
     mkdir /hive/data/genomes/bosTau9/bed/tandemDups
     cd /hive/data/genomes/bosTau9/bed/tandemDups
     time (~/kent/src/hg/utils/automation/doTandemDup.pl \
   -twoBit=/hive/data/genomes/bosTau9/bosTau9.unmasked.2bit bosTau9) \
 	> do.log 2>&1 &
     # real    140m36.622s
 
     cat fb.bosTau9.tandemDups.txt
     # 110536957 bases of 2715853792 (4.070%) in intersection
 
     bigBedInfo bosTau9.tandemDups.bb | sed -e 's/^/#  /;'
 #  version: 4
 #  fieldCount: 13
 #  hasHeaderExtension: yes
 #  isCompressed: yes
 #  isSwapped: 0
 #  extraIndexCount: 0
 #  itemCount: 1,556,737
 #  primaryDataSize: 41,133,265
 #  primaryIndexSize: 152,196
 #  zoomLevels: 9
 #  chromCount: 1955
 #  basesCovered: 2,031,718,211
 #  meanDepth (of bases covered): 7.749018
 #  minDepth: 1.000000
 #  maxDepth: 1559.000000
 #  std of depth: 13.615893
 
 #############################################################################
 # ucscToINSDC and ucscToRefSeq table/track (DONE - 2018-11-08 - Hiram)
     # construct idKeys for the refseq sequence
     mkdir /hive/data/genomes/bosTau9/refseq/idKeys
     cd /hive/data/genomes/bosTau9/refseq/idKeys
     faToTwoBit ../GCF_002263795.1_ARS-UCD1.2_genomic.fna.gz bosTau9.refSeq.2bit
 
     time (doIdKeys.pl -buildDir=`pwd` \
         -twoBit=`pwd`/bosTau9.refSeq.2bit refseqBosTau9)  > do.log 2>&1 &
     # real    2m57.680s
 
     cat refseqBosTau9.keySignature.txt
     #  8eb392728eaf8d55db9d9cf05639cc0e
 
     # and the genbank sequence needs keys too:
     mkdir /hive/data/genomes/bosTau9/refseq/idKeysGenbank
     cd /hive/data/genomes/bosTau9/refseq/idKeysGenbank
     faToTwoBit /hive/data/outside/ncbi/genomes/genbank/vertebrate_mammalian/Bos_taurus/all_assembly_versions/GCA_002263795.2_ARS-UCD1.2/GCA_002263795.2_ARS-UCD1.2_genomic.fna.gz bosTau9.genbank.2bit
 
     time (doIdKeys.pl -buildDir=`pwd` \
         -twoBit=`pwd`/bosTau9.genbank.2bit genbankBosTau9)  > do.log 2>&1 &
     # real    3m3.732s
 
     cat genbankBosTau9.keySignature.txt
     #  c5f6bb39f5c7053fa10deecfa9ce4fc6
 
     mkdir /hive/data/genomes/bosTau9/bed/chromAlias
     cd /hive/data/genomes/bosTau9/bed/chromAlias
 
     join -t$'\t' ../idKeys/bosTau9.idKeys.txt \
         ../../refseq/idKeysGenbank/genbankBosTau9.idKeys.txt | cut -f2- \
           | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
             | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
                | sort -k1,1 -k2,2n > ucscToINSDC.bed
 
     join -t$'\t' ../idKeys/bosTau9.idKeys.txt \
         ../../refseq/idKeys/refseqBosTau9.idKeys.txt | cut -f2- \
           | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
             | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
                | sort -k1,1 -k2,2n > ucscToRefSeq.bed
 
     # should be same line counts throughout:
     wc -l * ../../chrom.sizes
     # 2210 ucscToINSDC.bed
     # 2211 ucscToRefSeq.bed
     # 2211 ../../chrom.sizes
 
     # need to find the accession for the INSDC equivalent to chrM:
     egrep chrM *
 # ucscToRefSeq.bed:chrM   0       16338   NC_006853.1
 
     # lookup that accession at NCBI Entrez: AY526085.1
     # and add to ucscToINSDC.bed:
     printf "chrM\t0\t16338\tAY526085.1\n" >> ucscToINSDC.bed
     # verify:
     grep chrM *
 # ucscToINSDC.bed:chrM    0       16338   AY526085.1
 # ucscToRefSeq.bed:chrM   0       16338   NC_006853.1
 
     export chrSize=`cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1`
     echo $chrSize
     # 20
     # use the $chrSize in this sed
     sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
          | hgLoadSqlTab bosTau9 ucscToINSDC stdin ucscToINSDC.bed
      # should be the same for ucscToRefSeq:
     export chrSize=`cut -f1 ucscToRefSeq.bed | awk '{print length($0)}' | sort -n | tail -1`
     echo $chrSize
     # 20
     sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
        | sed -e 's/INSDC/RefSeq/g;' \
          | hgLoadSqlTab bosTau9 ucscToRefSeq stdin ucscToRefSeq.bed
 
     # should be quiet for all OK
     checkTableCoords bosTau9
 
     # should cover %100 entirely:
     featureBits -countGaps bosTau9 ucscToINSDC
     # 2715853792 bases of 2715853792 (100.000%) in intersection
 
     featureBits -countGaps bosTau9 ucscToRefSeq
     # 2715853792 bases of 2715853792 (100.000%) in intersection
 
 #########################################################################
 # add chromAlias table (DONE - 2018-11-08 - Hiram)
     # after ucscToRefSeq and ucscToINSDC tables have been created
 
     mkdir /hive/data/genomes/bosTau9/bed/chromAlias
     cd /hive/data/genomes/bosTau9/bed/chromAlias
 
     hgsql -N -e 'select chrom,name from ucscToRefSeq;' bosTau9 \
         | sort -k1,1 > ucsc.refseq.tab
     hgsql -N -e 'select chrom,name from ucscToINSDC;' bosTau9 \
         | sort -k1,1 > ucsc.genbank.tab
 
     ### Adding Ensembl alias with v95 release, after idKeys made: 2019-01-16
     join -t$'\t' ../idKeys/bosTau9.idKeys.txt \
         ../../ens95/ensBosTau9.idKeys.txt | cut -f2- \
           | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
             | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
                | sort -k1,1 -k2,2n > ucscToEns.bed
     # Ensembl is missing a chrM sequence:
     wc -l *.bed
   2210 ucscToEns.bed
   2211 ucscToINSDC.bed
   2211 ucscToRefSeq.bed
     cut -f1,4 ucscToEns.bed | sort > ucsc.ensembl.tab
 
     ~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.tab \
 	> bosTau9.chromAlias.tab
 
 for t in refseq genbank ensembl
 do
   c0=`cat ucsc.$t.tab | wc -l`
   c1=`grep $t bosTau9.chromAlias.tab | wc -l`
   ok="OK"
   if [ "$c0" -ne "$c1" ]; then
      ok="ERROR"
   fi
   printf "# checking $t: $c0 =? $c1 $ok\n"
 done
 # checking refseq: 2211 =? 2211 OK
 # checking genbank: 2211 =? 2211 OK
 # checking ensembl: 2210 =? 2210 OK
 
     hgLoadSqlTab bosTau9 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
         bosTau9.chromAlias.tab
 
 #########################################################################
 # fixup search rule for assembly track/gold table (DONE - 2018-11-06 - Hiram)
     cd ~/kent/src/hg/makeDb/trackDb/cow/bosTau9
 
     # preview prefixes and suffixes:
     hgsql -N -e "select frag from gold;" bosTau9 \
       | sed -e 's/[0-9][0-9]*//;' | sort | uniq -c | sed -e 's/^/#\t/;'
     #             1 NC_.1
     #          2210 NKLS.1
 
     # implies a rule: 'N[CK][LS0-9_]+(\.[0-9]+)?'
 
     # verify this rule will find them all and eliminate them all:
     hgsql -N -e "select frag from gold;" bosTau9 | wc -l
     # 2211
 
     hgsql -Ne "select frag from gold" bosTau9 \
         | egrep -e 'N[CK][LS0-9_]+(\.[0-9]+)?' | wc -l
     # 2211
 
     hgsql -Ne "select frag from gold" bosTau9 \
         | egrep -v -e 'N[CK][LS0-9_]+(\.[0-9]+)?' | wc -l
     # 0
 
     # hence, add to trackDb/cow/bosTau9/trackDb.ra
 searchTable gold
 shortCircuit 1
 termRegex N[CK][LS0-9_]+(\.[0-9]+)?
 query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
 searchPriority 8
 
     # verify searches work in the position box
 
 #############################################################################
 # running repeat masker (DONE - 2018-11-06 - Hiram)
     mkdir /hive/data/genomes/bosTau9/bed/repeatMasker
     cd /hive/data/genomes/bosTau9/bed/repeatMasker
     time  (doRepeatMasker.pl -buildDir=`pwd` \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -smallClusterHub=ku bosTau9) > do.log 2>&1 &
     # real    531m36.333s
 
     # had quite a few with this error:
 # RepeatMasker bug?: Undefined id, line 5255479 of input:
 #  1073  25.3  0.4  0.4  chrUn_NW_020191554v1   44876   45087   (19682) C  BTSAT2c        Satellite/centr       (11)  341      1   
 
     # get that list of items out of the do.log and remove them from
     # the bosTau9.sorted.fa.out to clean it up:
     mv bosTau9.sorted.fa.out bosTau9.sorted.fa.out.broken
     grep chrUn_NW do.log | cut -c24-59 | sort > grep.remove.list
     # there are 171 of these:
     wc -l grep.remove.list
     # 171
     grep -v -f grep.remove.list bosTau9.sorted.fa.out.broken \
 	> bosTau9.sorted.fa.out
     # verify 171 lines removed:
     wc -l bosTau9.sorted.fa.out bosTau9.sorted.fa.out.broken
     # 5620007 bosTau9.sorted.fa.out
     # 5620178 bosTau9.sorted.fa.out.broken
     #     171 difference
     mv bosTau9.fa.out bosTau9.fa.out.broken
     grep -v -f grep.remove.list bosTau9.fa.out.broken \
 	> bosTau9.fa.out
     wc -l bosTau9.fa.out.broken bosTau9.fa.out
     #	5620178 bosTau9.fa.out.broken
     #	5620007 bosTau9.fa.out
     #       171 difference
     # the last command of doCat.csh:
     time /cluster/bin/scripts/extractNestedRepeats.pl bosTau9.fa.out \
 	| sort -k1,1 -k2,2n > bosTau9.nestedRepeats.bed
 
     # continuing:
     time  (doRepeatMasker.pl -buildDir=`pwd` \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         -continue=mask -smallClusterHub=ku bosTau9) > mask.log 2>&1 &
     # real    17m45.760s
 
     egrep "bases|Total|masked" faSize.rmsk.txt \
 	| fold -w 75 -s  | sed -e 's/^/# /;'
 # 2715853792 bases (28162 N's 2715825630 real 1376420245 upper 1339405385 
 # lower) in 2211 sequences in 1 files
 # Total size: mean 1228337.3 sd 10762990.9 min 1034 (chrUn_NW_020192071v1) 
 # max 158534110 (chr1) median 21935
 # %49.32 masked total, %49.32 masked real
 
     egrep -i "versi|relea" do.log
     # RepeatMasker version open-4.0.7
     #    February 01 2017 (open-4-0-7) 1.331 version of RepeatMasker
     # CC    Dfam_Consensus RELEASE 20170127;                            *
     # CC    RepBase RELEASE 20170127;                                   *
 
     time featureBits -countGaps bosTau9 rmsk
     # 1339405686 bases of 2715853792 (49.318%) in intersection
     # real    0m31.962s
 
     # why is it different than the faSize above ?
     # because rmsk masks out some N's as well as bases, the faSize count above
     #   separates out the N's from the bases, it doesn't show lower case N's
 
     # faster way to get the same result on high contig count assemblies:
     time hgsql -N -e 'select genoName,genoStart,genoEnd from rmsk;' bosTau9 \
         | bedSingleCover.pl stdin | ave -col=4 stdin | grep "^total"
     # total 1339405686.000000
     # real    0m24.033s
 
 ##########################################################################
 # running simple repeat (DONE - 2018-11-06 - Hiram)
 
     mkdir /hive/data/genomes/bosTau9/bed/simpleRepeat
     cd /hive/data/genomes/bosTau9/bed/simpleRepeat
     # using trf409 6 here as similar size to genome (human == 6)
     time (doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=ku \
         -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=ku \
         -trf409 6 bosTau9) > do.log 2>&1 &
     # real    173m39.504s
 
     cat fb.simpleRepeat
     # 78768566 bases of 2715853792 (2.900%) in intersection
 
     bigBedInfo *.bb | sed -e 's/^/# /;'
 # version: 4
 # fieldCount: 16
 # hasHeaderExtension: yes
 # isCompressed: yes
 # isSwapped: 0
 # extraIndexCount: 0
 # itemCount: 545,256
 # primaryDataSize: 17,063,916
 # primaryIndexSize: 103,156
 # zoomLevels: 10
 # chromCount: 2051
 # basesCovered: 78,768,566
 # meanDepth (of bases covered): 6.563866
 # minDepth: 1.000000
 # maxDepth: 206.000000
 # std of depth: 9.700871
 
     # adding this trfMask to the other masking
     cd /hive/data/genomes/bosTau9
 
     # when using the Window Masker result:
 #    twoBitMask bed/windowMasker/bosTau9.cleanWMSdust.2bit \
 #       -add bed/simpleRepeat/trfMask.bed  bosTau9.2bit
     #   you can safely ignore the warning about fields >= 13
 
     # when using Rmsk results, add to rmsk after it is done:
     twoBitMask bosTau9.rmsk.2bit \
         -add bed/simpleRepeat/trfMask.bed bosTau9.2bit
     #   you can safely ignore the warning about fields >= 13
 
     twoBitToFa bosTau9.2bit stdout | faSize stdin > faSize.bosTau9.2bit.txt
     egrep "bases|Total|masked" faSize.bosTau9.2bit.txt \
 	| fold -w 75 -s  | sed -e 's/^/# /;'
 # 2715853792 bases (28162 N's 2715825630 real 1375606800 upper 1340218830 
 # lower) in 2211 sequences in 1 files
 # Total size: mean 1228337.3 sd 10762990.9 min 1034 (chrUn_NW_020192071v1) 
 # max 158534110 (chr1) median 21935
 # %49.35 masked total, %49.35 masked real
 
     # reset the symlink
     rm /gbdb/bosTau9/bosTau9.2bit
     ln -s `pwd`/bosTau9.2bit /gbdb/bosTau9/bosTau9.2bit
 
 #########################################################################
 # CREATE MICROSAT TRACK (DONE - 2018-11-08 - Hiram)
     ssh hgwdev
     mkdir /cluster/data/bosTau9/bed/microsat
     cd /cluster/data/bosTau9/bed/microsat
 
     awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \
        ../simpleRepeat/simpleRepeat.bed > microsat.bed
 
     hgLoadBed bosTau9 microsat microsat.bed
     # Read 25219 elements of size 4 from microsat.bed
 
 ##########################################################################
 ## WINDOWMASKER (DONE - 2018-11-07 - Hiram)
 
     mkdir /hive/data/genomes/bosTau9/bed/windowMasker
     cd /hive/data/genomes/bosTau9/bed/windowMasker
     time (doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
         -dbHost=hgwdev bosTau9) > do.log 2>&1
     # real    95m24.111s
 
     # Masking statistics
     egrep "bases|Total|masked" faSize.bosTau9.cleanWMSdust.txt \
 	| fold -w 75 -s  | sed -e 's/^/# /;'
 # 2715853792 bases (28162 N's 2715825630 real 1580978410 upper 1134847220 
 # lower) in 2211 sequences in 1 files
 # Total size: mean 1228337.3 sd 10762990.9 min 1034 (chrUn_NW_020192071v1) 
 # max 158534110 (chr1) median 21935
 # %41.79 masked total, %41.79 masked real
 
     cat fb.bosTau9.rmsk.windowmaskerSdust.txt
     # 907805797 bases of 2715853792 (33.426%) in intersection
 
 #############################################################################
 # ncbiRefSeq (DONE - 2018-11-08 - Hiram)
 
     # can be run up after ucscToRefSeq table is constructed
     mkdir /hive/data/genomes/bosTau9/bed/ncbiRefSeq
     cd /hive/data/genomes/bosTau9/bed/ncbiRefSeq
 
     # adjust the name arguments
     time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
       -bigClusterHub=ku -dbHost=hgwdev \
       -fileServer=hgwdev -smallClusterHub=hgwdev-101 -workhorse=hgwdev \
       refseq vertebrate_mammalian Bos_taurus \
       GCF_002263795.1_ARS-UCD1.2 bosTau9) > do.log 2>&1 &
     # real    5m10.572s
 
     cat fb.ncbiRefSeq.bosTau9.txt
     # 80750008 bases of 2715853792 (2.973%) in intersection
 
 #############################################################################
 # cpgIslands - (DONE - 2018-11-08 - Hiram)
     mkdir /hive/data/genomes/bosTau9/bed/cpgIslands
     cd /hive/data/genomes/bosTau9/bed/cpgIslands
     time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev -smallClusterHub=ku bosTau9) > do.log 2>&1 &
     # real    3m43.856s
 
     cat fb.bosTau9.cpgIslandExt.txt
     # 26618121 bases of 2715853792 (0.980%) in intersection
 
 ##############################################################################
 # genscan - (DONE - 2018-11-08 - Hiram)
     mkdir /hive/data/genomes/bosTau9/bed/genscan
     cd /hive/data/genomes/bosTau9/bed/genscan
     time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
       -bigClusterHub=ku bosTau9) > do.log 2>&1 &
     # real    48m49.605s
 
     # one broken one finished with 2,000,000 window size:
     time (././runGsBig2M.csh chr13 000 gtf/000/chr13.gtf pep/000/chr13.pep subopt/000/chr13.bed) > lastOne.log 2>&1
     # real    28m54.073s
 
     time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
       -continue=makeBed -bigClusterHub=ku bosTau9) > makeBed.log 2>&1 &
 
     cat fb.bosTau9.genscan.txt
     # 56389215 bases of 2715853792 (2.076%) in intersection
 
     cat fb.bosTau9.genscanSubopt.txt
     # 51538764 bases of 2715853792 (1.898%) in intersection
 
     bigBedInfo bosTau9.genscan.bb | sed -e 's/^/# /;'
 # version: 4
 # fieldCount: 12
 # hasHeaderExtension: yes
 # isCompressed: yes
 # isSwapped: 0
 # extraIndexCount: 0
 # itemCount: 43,798
 # primaryDataSize: 2,587,464
 # primaryIndexSize: 37,772
 # zoomLevels: 7
 # chromCount: 804
 # basesCovered: 1,855,898,050
 # meanDepth (of bases covered): 1.000000
 # minDepth: 1.000000
 # maxDepth: 1.000000
 # std of depth: -nan
 
 #############################################################################
 # augustus gene track (DONE - 2018-11-08 - Hiram)
 
     mkdir /hive/data/genomes/bosTau9/bed/augustus
     cd /hive/data/genomes/bosTau9/bed/augustus
     time (doAugustus.pl -buildDir=`pwd` -bigClusterHub=ku \
      -species=human -dbHost=hgwdev -workhorse=hgwdev bosTau9) > do.log 2>&1 &
     # real    109m41.672s
 
     cat fb.bosTau9.augustusGene.txt
     # 52391476 bases of 2715853792 (1.929%) in intersection
 
      bigBedInfo  bosTau9.augustus.bb | sed -e 's/^/# /;'
 # version: 4
 # fieldCount: 20
 # hasHeaderExtension: yes
 # isCompressed: yes
 # isSwapped: 0
 # extraIndexCount: 0
 # itemCount: 30,862
 # primaryDataSize: 2,187,353
 # primaryIndexSize: 22,468
 # zoomLevels: 7
 # chromCount: 454
 # basesCovered: 1,220,657,991
 # meanDepth (of bases covered): 1.261447
 # minDepth: 1.000000
 # maxDepth: 5.000000
 # std of depth: 0.630592
 
 #############################################################################
 # lastz/chain/net swap human/hg38 (TBD - 2018-04-25 - Hiram)
     # original alignment
     cd /hive/data/genomes/hg38/bed/lastzBosTau9.2018-04-25
 
     cat fb.hg38.chainBosTau9Link.txt
     # 1388649593 bases of 3049335806 (45.539%) in intersection
     cat fb.hg38.chainSynBosTau9Link.txt
     # 1330693519 bases of 3049335806 (43.639%) in intersection
     cat fb.hg38.chainRBestBosTau9Link.txt
     # 1278396766 bases of 3049335806 (41.924%) in intersection
 
     # running the swap
     mkdir /hive/data/genomes/bosTau9/bed/blastz.hg38.swap
     cd /hive/data/genomes/bosTau9/bed/blastz.hg38.swap
     time (doBlastzChainNet.pl -verbose=2 \
         -swap /hive/data/genomes/hg38/bed/lastzBosTau9.2018-04-25/DEF \
         -chainMinScore=3000 -chainLinearGap=medium \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -syntenicNet) > swap.log 2>&1
     # real    104m31.748s
 
     cat fb.bosTau9.chainHg38Link.txt
     # 1319553403 bases of 2587515673 (50.997%) in intersection
     cat fb.bosTau9.chainSynHg38Link.txt
     # 1280196824 bases of 2587515673 (49.476%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
         bosTau9 hg38) > rbest.log 2>&1 &
     # real    638m15.603s
 
     cat fb.bosTau9.chainRBestHg38Link.txt 
     # 1279077824 bases of 2587515673 (49.433%) in intersection
 
 #############################################################################
 # lastz/chain/net swap mouse/mm10 (TBD - 2018-04-25 - Hiram)
 
     # alignment to mouse/mm10:
     cd /hive/data/genomes/mm10/bed/lastzBosTau9.2018-04-25
 
     cat fb.mm10.chainBosTau9Link.txt
     # 693504453 bases of 2652783500 (26.143%) in intersection
 
     cat fb.mm10.chainRBestBosTau9Link.txt
     # 657097998 bases of 2652783500 (24.770%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/bosTau9/bed/blastz.mm0.swap
     cd /hive/data/genomes/bosTau9/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzBosTau9.2018-04-25/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    63m12.935s
 
     cat fb.bosTau9.chainMm10Link.txt
     # 680117358 bases of 2587515673 (26.285%) in intersection
     cat fb.bosTau9.chainSynMm10Link.txt
     # 643562837 bases of 2587515673 (24.872%) in intersection
 
     time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \
 	bosTau9 mm10) > rbest.log 2>&1 &
     # real    437m1.637s
 
     cat fb.bosTau9.chainRBestMm10Link.txt
     # 656602300 bases of 2587515673 (25.376%) in intersection
 
 ##############################################################################
 # Create kluster run files (DONE - 2018-11-08 - Hiram)
 
     cd /hive/data/genomes/bosTau9
     # numerator is bosTau9 gapless bases "real" as reported by:
     featureBits -noRandom -noHap bosTau9 gap
     # 0 bases of 2628411261 (0.000%) in intersection
     #               ^^^
 
     # denominator is hg19 gapless bases as reported by:
     #   featureBits -noRandom -noHap hg19 gap
     #     234344806 bases of 2861349177 (8.190%) in intersection
     # 1024 is threshold used for human -repMatch:
     calc \(2628411261 / 2861349177 \) \* 1024
     # (2628411261 / 2861349177 ) * 1024 = 940.637778
 
     # ==> use -repMatch=900 same as was bosTau8
     cd /hive/data/genomes/bosTau9
     blat bosTau9.2bit /dev/null /dev/null -tileSize=11 \
         -makeOoc=jkStuff/bosTau9.11.ooc -repMatch=900
     #   Wrote 35432 overused 11-mers to jkStuff/bosTau9.11.ooc
     # bosTau8 at repMatch=900 was:
     #   Wrote 33613 overused 11-mers to jkStuff/bosTau8.11.ooc
 
     # no unbridged gaps so no need to worry about gaplift file:
     hgsql -Ne "select bridge from gap" bosTau9 | sort | uniq -c
     # no gaps of any sort
 
     hgsql -Ne "select count(*) from gap" bosTau9
     #    +---+
     #    | 0 |
     #    +---+
 
 ##############################################################################
 # LIFTOVER TO bosTau8 (DONE - 2018-11-08 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/bosTau9/bed/blat.bosTau8.2018-11-08
     cd /hive/data/genomes/bosTau9/bed/blat.bosTau8.2018-11-08
     time (doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
 	-ooc=/hive/data/genomes/bosTau9/jkStuff/bosTau9.11.ooc \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          bosTau9 bosTau8) > do.log 2>&1 &
     # real    1255m56.756s
 
     # verify the convert link on the test browser is now active
     # from bosTau9 to bosTau8
 
 ##############################################################################
 # LIFTOVER TO bosTau7 (DONE - 2018-11-08 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/bosTau9/bed/blat.bosTau7.2018-11-08
     cd /hive/data/genomes/bosTau9/bed/blat.bosTau7.2018-11-08
     time (doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
 	-ooc=/hive/data/genomes/bosTau9/jkStuff/bosTau9.11.ooc \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          bosTau9 bosTau7) > do.log 2>&1 &
     # real    831m15.041s
 
     # verify the convert link on the test browser is now active
     # from bosTau9 to bosTau7
 
 ##############################################################################
 # LIFTOVER TO bosTau6 (DONE - 2018-11-08 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/bosTau9/bed/blat.bosTau6.2018-11-08
     cd /hive/data/genomes/bosTau9/bed/blat.bosTau6.2018-11-08
     time (doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
 	-ooc=/hive/data/genomes/bosTau9/jkStuff/bosTau9.11.ooc \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          bosTau9 bosTau6) > do.log 2>&1 &
     # real    1236m43.784s
 
     # verify the convert link on the test browser is now active
     # from bosTau9 to bosTau6
 
 ##############################################################################
 # crispr 10K shoulders (DONE - 2018-11-16 - Hiram)
     mkdir  /hive/data/genomes/bosTau9/bed/crispr10K
     cd  /hive/data/genomes/bosTau9/bed/crispr10K
 
     time (~/kent/src/hg/utils/automation/doCrispr.pl \
    -stop=load -buildDir=`pwd` -smallClusterHub=hgwdev-101 bosTau9 ncbiRefSeq) \
 	> do.log 2>&1
     # real    1192m19.444s
     # broke down, fixed, manually completed specScores
     time find tmp/outGuides -type f | xargs cut -f3-6 > ../specScores.tab
     # real    329m49.271s
     # effScores: real      1410m36.918s
     # offTargets: real     99m35.115s
     # load: real   132m24.530s
 
     # hive cleaning - 2021-04-26 - Hiram
     time (~/kent/src/hg/utils/automation/doCrispr.pl \
      -continue=cleanup -buildDir=`pwd` -smallClusterHub=hgwdev bosTau9 \
           -fileServer=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku
             -workhorse=hgwdev) > cleanup.log 2>&1 &
 
 ##############################################################################
 # crispr whole genome (DONE - 2022-03-01 - Hiram)
     mkdir /hive/data/genomes/bosTau9/bed/crisprAll
     cd /hive/data/genomes/bosTau9/bed/crisprAll
 
     # the large shoulder argument will cause the entire genome to be scanned
     # this takes a while for a new genome to get the bwa indexing done
     time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 -stop=ranges \
     bosTau9 -tableName=crisprAll \
     -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev) > indexFa.log 2>&1
     # real    1m10.666s
 
     time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
     -continue=ranges bosTau9 -tableName=crisprAll \
     -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev) > ranges.log 2>&1
     # real    8554m11.613s
 
     time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
     -continue=effScores bosTau9 -tableName=crisprAll \
     -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev) > effScores.log 2>&1
 
     time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
     -continue=offTargets bosTau9 -tableName=crisprAll \
     -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev) > offTargets.log 2>&1
 
     time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
     -continue=load bosTau9 -tableName=crisprAll \
     -fileServer=hgwdev \
     -buildDir=`pwd` -smallClusterHub=hgwdev -bigClusterHub=ku \
       -workhorse=hgwdev) > load.log 2>&1
 
     cat guides/run.time | sed -e 's/^/# /;'
 # Completed: 100 of 100 jobs
 # CPU time in finished jobs:      12054s     200.90m     3.35h    0.14d  0.000 y
 # IO & Wait Time:                   282s       4.70m     0.08h    0.00d  0.000 y
 # Average job time:                 123s       2.06m     0.03h    0.00d
 # Longest finished job:             432s       7.20m     0.12h    0.01d
 # Submission to last job:           434s       7.23m     0.12h    0.01d
 
     cat specScores/run.time | sed -e 's/^/# /;'
 # Completed: 841413 of 841413 jobs
 # CPU time in finished jobs:   71934084s 1198901.39m 19981.69h  832.57d  2.281 y
 # IO & Wait Time:                     0s       0.00m     0.00h    0.00d  0.000 y
 # Average job time:                  85s       1.41m     0.02h    0.00d
 # Longest finished job:             170s       2.83m     0.05h    0.00d
 # Submission to last job:        203959s    3399.32m    56.66h    2.36d
 
     grep -c . effScores.tab
     # 288692962
     grep -c . specScores.tab 
     # 218717447
 
     cat effScores/run.time | sed -e 's/^/# /;'
 # Completed: 28864 of 28864 jobs
 # CPU time in finished jobs:   12570265s  209504.41m  3491.74h  145.49d  0.399 y
 # IO & Wait Time:                 45737s     762.29m    12.70h    0.53d  0.001 y
 # Average job time:                 437s       7.28m     0.12h    0.01d
 # Longest finished job:            7448s     124.13m     2.07h    0.09d
 # Submission to last job:         31650s     527.50m     8.79h    0.37d
 
     cat offTargets/run.time | sed -e 's/^/# /;'
 # Completed: 145438 of 145438 jobs
 # CPU time in finished jobs:    2306725s   38445.42m   640.76h   26.70d  0.073 y
 # IO & Wait Time:                901605s   15026.74m   250.45h   10.44d  0.029 y
 # Average job time:                  22s       0.37m     0.01h    0.00d
 # Longest finished job:             133s       2.22m     0.04h    0.00d
 # Submission to last job:         14396s     239.93m     4.00h    0.17d
 
 ##############################################################################
 # GENBANK AUTO UPDATE (DONE - 2018-11-08 - Hiram)
     ssh hgwdev
     cd $HOME/kent/src/hg/makeDb/genbank
     git pull
     # /cluster/data/genbank/data/organism.lst shows:
     # #organism      mrnaCnt   estCnt  refSeqCnt
     # Bos taurus	20115	1583423	13363
 
     # edit etc/genbank.conf to add bosTau9 just after bosTau8
 # bosTau9 (cow - Bos taurus - refseq GCF_002263795.1 ARS-UCD1.2 - taxId 9913)
 bosTau9.serverGenome = /hive/data/genomes/bosTau9/bosTau9.2bit
 bosTau9.clusterGenome = /hive/data/genomes/bosTau9/bosTau9.2bit
 bosTau9.ooc = /hive/data/genomes/bosTau9/jkStuff/bosTau9.11.ooc
 bosTau9.lift = no
 bosTau9.perChromTables = no
 bosTau9.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
 bosTau9.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
 bosTau9.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
 bosTau9.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
 bosTau9.genbank.est.native.pslCDnaFilter  = ${finished.genbank.est.native.pslCDnaFilter}
 bosTau9.genbank.est.xeno.pslCDnaFilter    = ${finished.genbank.est.xeno.pslCDnaFilter}
 bosTau9.downloadDir = bosTau9
 # bosTau9.upstreamGeneTbl = refGene
 # defaults yes: genbank.mrna.native.load genbank.mrna.native.loadDesc
 # yes: genbank.est.native.load refseq.mrna.native.load
 # yes: refseq.mrna.native.loadDesc refseq.mrna.xeno.load
 # yes: refseq.mrna.xeno.loadDesc
 # defaults no: genbank.mrna.xeno.load genbank.mrna.xeno.loadDesc
 # no: genbank.est.native.loadDesc genbank.est.xeno.load
 # no: genbank.est.xeno.loadDesc
 
     # verify stated file paths do exist:
     grep bosTau9 etc/genbank.conf | egrep "Genome|ooc|lift" \
 	| awk '{print $NF}' | sort -u | xargs ls -og
 ls: cannot access no: No such file or directory
 -rw-rw-r-- 1 712534740 Nov  7 13:35 /hive/data/genomes/bosTau9/bosTau9.2bit
 -rw-rw-r-- 1    141736 Nov  8 13:13 /hive/data/genomes/bosTau9/jkStuff/bosTau9.11.ooc
     # ls error on the file named 'no' is from the bosTau9.lift = no
 
     git commit -m 'adding bosTau9 refs #22425' etc/genbank.conf
     git push
 
     # add bosTau9 to:
     #   etc/hgwdev.dbs
     git commit -m 'adding bosTau9 refs #22425' etc/hgwdev.dbs
 
     git push
     # update /cluster/data/genbank/:
     make etc-update
 
 #############################################################################
 #  BLATSERVERS ENTRY (DONE - 2018-11-08 - Hiram)
 #	After getting a blat server assigned by the Blat Server Gods,
     ssh hgwdev
 
     hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
     VALUES ("bosTau9", "blat1c", "17908", "1", "0"); \
     INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
     VALUES ("bosTau9", "blat1c", "17909", "0", "1");' \
     hgcentraltest
     #   test it with some sequence
 
 ##############################################################################
 ## reset default position to the casein gene complex (milk production proteins)
 ##  (DONE - 2018-11-08 - Hiram)
 ## https://www.ncbi.nlm.nih.gov/pmc/articles/PMC332753/
 ## quote: The four genes reside on less than 200 kb of DNA in
 ##        the order CASAS1-CASB-CASAS2-CASK.
 
     ssh hgwdev
     hgsql -e 'update dbDb set defaultPos="chr6:85405597-85664387" where name="bosTau9";' hgcentraltest
 
 ##############################################################################
 # all.joiner update, downloads and in pushQ - (TBD - 2018-05-01 - Hiram)
     cd $HOME/kent/src/hg/makeDb/schema
     ~/kent/src/hg/utils/automation/verifyBrowser.pl bosTau9
 # 64 tables in database bosTau9 - Cow, Bos taurus
 # verified 62 tables in database bosTau9, 2 extra tables, 24 optional tables
 # NCBI RefSeq genes     10 optional tables
 # chainNetRBestHg38     3 optional tables
 # chainNetRBestMm10     3 optional tables
 # chainNetSynHg38       3 optional tables
 # chainNetSynMm10       3 optional tables
 # gapOverlap    1 optional tables
 # tandemDups    1 optional tables
 # 1     crispr10KRanges - extra table
 # 2     crispr10KTargets        - extra table
 # 9 genbank tables found
 # verified 29 required tables, 0 missing tables
 # hg38 chainNet to bosTau9 found 3 required tables
 # mm10 chainNet to bosTau9 found 3 required tables
 # hg38 chainNet RBest and syntenic to bosTau9 found 6 optional tables
 # mm10 chainNet RBest and syntenic to bosTau9 found 3 optional tables
 # liftOver to previous versions: 3, from previous versions: 3
 
     # fixup all.joiner until this is a clean output
     joinerCheck -database=bosTau9 -tableCoverage all.joiner
     joinerCheck -database=bosTau9 -times all.joiner
     joinerCheck -database=bosTau9 -keys all.joiner
 
     cd /hive/data/genomes/bosTau9
     # clean up obsolete trackDb work, assuming you have already checked in
     # these trackDb files into the source tree
     rm -fr TemporaryTrackDbCheckout
 
     time (makeDownloads.pl -workhorse=hgwdev bosTau9) > downloads.log 2>&1
 
     #   now ready for pushQ entry
     mkdir /hive/data/genomes/bosTau9/pushQ
     cd /hive/data/genomes/bosTau9/pushQ
   time (makePushQSql.pl -redmineList bosTau9) > bosTau9.pushQ.sql 2> stderr.out
     # real    9m34.930s
 
     # remove the tandemDups and gapOverlap from the file list:
     sed -i -e "/tandemDups/d" redmine.bosTau9.table.list
     sed -i -e "/Tandem Dups/d" redmine.bosTau9.releaseLog.txt
     sed -i -e "/gapOverlap/d" redmine.bosTau9.table.list
     sed -i -e "/Gap Overlaps/d" redmine.bosTau9.releaseLog.txt
 
     #   check for errors in stderr.out, some are OK, e.g.:
     # WARNING: bosTau9 does not have seq
     # WARNING: bosTau9 does not have extFile
 
     # add the path names to the listing files in the redmine issue
     # in the three appropriate entry boxes:
 
 /hive/data/genomes/bosTau9/pushQ/redmine.bosTau9.file.list
 /hive/data/genomes/bosTau9/pushQ/redmine.bosTau9.releaseLog.txt
 /hive/data/genomes/bosTau9/pushQ/redmine.bosTau9.table.list
 
-#########################################################################
+##############################################################################
+# LIFTOVER TO bosTau4 (DONE - 2022-12-06 - Hiram)
+    ssh hgwdev
+    mkdir /hive/data/genomes/bosTau9/bed/blat.bosTau4.2022-12-06
+    cd /hive/data/genomes/bosTau9/bed/blat.bosTau4.2022-12-06
+    doSameSpeciesLiftOver.pl -verbose=2 \
+        -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/bosTau9/bosTau9.2bit \
+        -targetSizes=/hive/data/genomes/bosTau9/chrom.sizes \
+        -query2Bit=/hive/data/genomes/bosTau4/bosTau4.2bit \
+        -querySizes=/hive/data/genomes/bosTau4/chrom.sizes \
+        -ooc=/hive/data/genomes/bosTau9/jkStuff/bosTau9.11.ooc \
+         bosTau9 bosTau4
+    time (doSameSpeciesLiftOver.pl -verbose=2 \
+        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
+        -target2Bit=/hive/data/genomes/bosTau9/bosTau9.2bit \
+        -targetSizes=/hive/data/genomes/bosTau9/chrom.sizes \
+        -query2Bit=/hive/data/genomes/bosTau4/bosTau4.2bit \
+        -querySizes=/hive/data/genomes/bosTau4/chrom.sizes \
+        -ooc=/hive/data/genomes/bosTau9/jkStuff/bosTau9.11.ooc \
+         bosTau9 bosTau4) > doLiftOverToBosTau9.log 2>&1
+    # real    384m15.787s
+
+    # see if the liftOver menus function in the browser from bosTau9 to bosTau4
+
+##############################################################################