1519f260b2bc9fca15f37cd26496b7cd5e5b2383
gperez2
  Fri Oct 29 17:59:39 2021 -0700
oviAri3 vs. sheep GCF_002742125.1 lastz/chain/net run for user, refs #28409

diff --git src/hg/makeDb/doc/oviAri3.txt src/hg/makeDb/doc/oviAri3.txt
index 452ea78..18b263e 100644
--- src/hg/makeDb/doc/oviAri3.txt
+++ src/hg/makeDb/doc/oviAri3.txt
@@ -1,831 +1,932 @@
 # for emacs: -*- mode: sh; -*-
 
 #	DATE:	22-Aug-2012
 #	ORGANISM:	Ovis aries
 #	TAXID:	9940
 #	ASSEMBLY LONG NAME:	Oar_v3.1
 #	ASSEMBLY SHORT NAME:	Oar_v3.1
 #	ASSEMBLY SUBMITTER:	International Sheep Genome Consortium
 #	ASSEMBLY TYPE:	Haploid
 #	NUMBER OF ASSEMBLY-UNITS:	1
 #	ASSEMBLY ACCESSION:	GCA_000298735.1
 #	FTP-RELEASE DATE: 15-Oct-2012
 
 
 #	http://www.ncbi.nlm.nih.gov/assembly/457978/ ISGC Oar_v3.1
 #	http://www.ncbi.nlm.nih.gov/genome/83
 #	http://www.ncbi.nlm.nih.gov/bioproject/PRJNA179263 - RefSeq
 #	http://www.ncbi.nlm.nih.gov/assembly/GCF_000298735.1/ - RefSeq
 #	http://www.ncbi.nlm.nih.gov/bioproject/PRJNA169880
 #	http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AMGL01
 
 #	Assembly Method : SOAPdenovo v. 1.03
 #	Assembly Name   : Oar_v3.1
 #	Genome Coverage : 142x Illumina; 4x 454
 #	Sequencing Technology   : Illumina GAII; 454
 
 #	http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9940
 #	Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Ovis
 
 # rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Ovis_aries/Oar_v3.1/
 
 ##########################################################################
 # Download sequence (DONE - 2013-02-26 - Hiram)
     mkdir -p /hive/data/genomes/oviAri3/genbank
     cd /hive/data/genomes/oviAri3/genbank
 
     time rsync -a -P \
 rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Ovis_aries/Oar_v3.1/ ./
 
     # verify the size of the sequence here:
     faSize Primary_Assembly/assembled_chromosomes/FASTA/chr*.fa.gz \
         Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz
 # 2619037772 bases (84718522 N's 2534319250 real 2534319250 upper 0 lower)
 #    in 5697 sequences in 28 files
 # Total size: mean 459722.3 sd 7762987.3
 #    min 1001 (gi|406379182|gb|AMGL01128047.1|)
 #    max 275612895 (gi|406684590|gb|CM001582.1|) median 2901
 
 ##########################################################################
 # fixup names for UCSC standards (DONE - 2013-06-09 - Hiram)
     mkdir /hive/data/genomes/oviAri3/ucsc
     cd /hive/data/genomes/oviAri3/ucsc
 
     ########################  Assembled Chromosomes
     cat << '_EOF_' > ucscCompositeAgp.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my %accToChr;
 
 open (FH, "<../genbank/Primary_Assembly/assembled_chromosomes/chr2acc") or
         die "can not read Primary_Assembly/assembled_chromosomes/chr2acc";
 while (my $line = <FH>) {
     next if ($line =~ m/^#/);
     chomp $line;
     my ($chrN, $acc) = split('\s+', $line);
     $accToChr{$acc} = $chrN;
 }
 close (FH);
 
 foreach my $acc (keys %accToChr) {
     my $chrN =  $accToChr{$acc};
     print "$acc $accToChr{$acc}\n";
     open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/AGP/chr${chrN}.comp.agp.gz|") or die "can not read chr${chrN}.comp.agp.gz";
     open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp";
     while (my $line = <FH>) {
         if ($line =~ m/^#/) {
             print UC $line;
         } else {
             $line =~ s/^$acc/chr${chrN}/;
             print UC $line;
         }
     }
     close (FH);
     close (UC);
     open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/FASTA/chr${chrN}.fa.gz|") or die "can not read chr${chrN}.fa.gz";
     open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa";
     while (my $line = <FH>) {
         if ($line =~ m/^>/) {
             printf UC ">chr${chrN}\n";
         } else {
             print UC $line;
         }
     }
     close (FH);
     close (UC);
 }
 '_EOF_'
 
     # << happy emacs
     chmod +x ucscCompositeAgp.pl
     time ./ucscCompositeAgp.pl
 
     ########################  Unplaced scaffolds
     # verify we don't have any .acc numbers different from .1
     zcat \
     ../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
 	| cut -f1 | grep -v '^#' \
 	| sed -e 's/[A-Z0-9]*//i' | sort | uniq -c
     #   16286 .1
 
     cat << '_EOF_' > unplaced.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
 my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
 open (FH, "zcat $agpFile|") or die "can not read $agpFile";
 open (UC, ">chrUn.agp") or die "can not write to chrUn.agp";
 while (my $line = <FH>) {
     if ($line =~ m/^#/) {
         print UC $line;
     } else {
         $line =~ s/\.1//;
         printf UC "chrUn_%s", $line;
     }
 }
 close (FH);
 close (UC);
 
 open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
 open (UC, ">chrUn.fa") or die "can not write to chrUn.fa";
 while (my $line = <FH>) {
     if ($line =~ m/^>/) {
         chomp $line;
         $line =~ s/.*gb\|//;
         $line =~ s/\.1. .*//;
         printf UC ">chrUn_$line\n";
     } else {
         print UC $line;
     }
 }
 close (FH);
 close (UC);
 '_EOF_'
     # << happy emacs
     chmod +x unplaced.pl
     time ./unplaced.pl
 
     gzip chr*.agp chr*.fa
 
     # make sure none of the names got to be over 31 characers long:
     zcat chrUn.agp.gz | grep -v "^#" | cut -f1 | awk '{print length($1)}' | sort -rn | head -1
     # 18
 
     # verify nothing lost from original:
     time faSize *.fa.gz
     # real    0m32.541s
 
 # 2619037772 bases (84718522 N's 2534319250 real 2534319250 upper 0 lower)
 #    in 5697 sequences in 28 files
 # Total size: mean 459722.3 sd 7762987.3 min 1001 (chrUn_AMGL01128047)
 #    max 275612895 (chr1) median 2901
 
     # make sure none of the names got to be over 31 characers long:
     zcat *.agp.gz | grep -v "^#" | cut -f1 | awk '{print length($0)}' \
         | sort -rn | uniq -c | head
     #    2310 18
     #   13976 14
     #   99413 5
     #  140132 4
 
 ##########################################################################
 # Initial makeGenomeDb.pl (DONE - 2013-06-09 - Hiram)
     cd /hive/data/genomes/oviAri3
     cat << '_EOF_' > oviAri3.config.ra
 # Config parameters for makeGenomeDb.pl:
 db oviAri3
 clade mammal
 # genomeCladePriority 31
 scientificName Ovis aries
 commonName Sheep
 assemblyDate Aug. 2012
 assemblyLabel International Sheep Genome Consortium
 assemblyShortLabel ISGC Oar_v3.1
 orderKey 2369
 mitoAcc NC_001941
 fastaFiles /hive/data/genomes/oviAri3/ucsc/chr*.fa.gz
 agpFiles /hive/data/genomes/oviAri3/ucsc/chr*.agp.gz
 # qualFiles none
 dbDbSpeciesDir sheep
 photoCreditURL http://www.ars.usda.gov/is/graphics/photos/
 photoCreditName Agricultural Research Service
 ncbiGenomeId 83
 ncbiAssemblyId 457978
 ncbiAssemblyName Oar_v3.1
 ncbiBioProject 179263
 genBankAccessionID GCA_000298735.1
 taxId 9940
 '_EOF_'
     # << happy emacs
 
     # verify sequence and agp are OK
     time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \
         -stop=agp oviAri3.config.ra > agp.log 2>&1
     # real 2m24.183s
 
     # verify no problem:
     tail -1 agp.log
     #  *** All done!  (through the 'agp' step)
 
     time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \
         -continue=db oviAri3.config.ra > db.log 2>&1
     # real    19m28.374s
     #
     #	add the trackDb entries to the source tree, and the 2bit link:
     ln -s `pwd`/oviAri3.unmasked.2bit /gbdb/oviAri3/oviAri3.2bit
 
 #########################################################################
 # running repeat masker (DONE - 2013-06-09 - Hiram)
     mkdir /hive/data/genomes/oviAri3/bed/repeatMasker
     cd /hive/data/genomes/oviAri3/bed/repeatMasker
     time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
 	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
 	-smallClusterHub=encodek oviAri3 > do.log 2>&1 &
     # real    566m7.955s
 
     cat faSize.rmsk.txt
     # 2619054388 bases (84718522 N's 2534335866 real 1389945274 upper
     #  1144390592 lower) in 5698 sequences in 1 files
     # Total size: mean 459644.5 sd 7762308.2 min 1001 (chrUn_AMGL01128047)
     #  max 275612895 (chr1) median 2902
     # %43.69 masked total, %45.16 masked real
 
     egrep -i "versi|relea" do.log
     # RepeatMasker version open-4.0.2
     #    April 29 2013 (open-4-0-2) version of RepeatMasker
     # CC   RELEASE 20130422;
 
     time featureBits -countGaps oviAri3 rmsk
     # 1154400674 bases of 2619054388 (44.077%) in intersection
     # real    0m32.325s
 
     # why is it different than the faSize above ?
     # because rmsk masks out some N's as well as bases, the count above
     #	separates out the N's from the bases, it doesn't show lower case N's
 
 ##########################################################################
 # running simple repeat (DONE - 2013-06-05 - Hiram)
     mkdir /hive/data/genomes/oviAri3/bed/simpleRepeat
     cd /hive/data/genomes/oviAri3/bed/simpleRepeat
     time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
 	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
 	oviAri3 > do.log 2>&1 &
     # real    11m50.561s
 
     cat fb.simpleRepeat
     #   42019675 bases of 2534344180 (1.658%) in intersection
 
     # add to rmsk after it is done since RM is better masking that WM:
     cd /hive/data/genomes/oviAri3
     twoBitMask oviAri3.rmsk.2bit \
 	-add bed/simpleRepeat/trfMask.bed oviAri3.2bit
     #	you can safely ignore the warning about fields >= 13
 
     twoBitToFa oviAri3.2bit stdout | faSize stdin > faSize.oviAri3.2bit.txt
     cat faSize.oviAri3.2bit.txt
     # 2619054388 bases (84718522 N's 2534335866 real 1389211740 upper
     #   1145124126 lower) in 5698 sequences in 1 files
     # Total size: mean 459644.5 sd 7762308.2 min 1001 (chrUn_AMGL01128047)
     #   max 275612895 (chr1) median 2902
     # %43.72 masked total, %45.18 masked real
 
     rm /gbdb/oviAri3/oviAri3.2bit
     ln -s `pwd`/oviAri3.2bit /gbdb/oviAri3/oviAri3.2bit
 
 #########################################################################
 # Verify all gaps are marked, add any N's not in gap as type 'other'
 #	(DONE - 2013-06-09 - Hiram)
 
     mkdir /hive/data/genomes/oviAri3/bed/gap
     cd /hive/data/genomes/oviAri3/bed/gap
 
     time nice findMotif -motif=gattaca -verbose=4 \
 	-strand=+ ../../oviAri3.unmasked.2bit > findMotif.txt 2>&1
     #   real	0m19.891s
 
     grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
     time featureBits oviAri3 -not gap -bed=notGap.bed
     # 2534344180 bases of 2534344180 (100.000%) in intersection
     # real    0m12.828s
 
     # used to do this featureBits, but it is really really slow if there
     # are a log of contigs
     # time featureBits oviAri3 allGaps.bed notGap.bed -bed=new.gaps.bed
     # 8314 bases of 2534344180 (0.000%) in intersection
     #  real    3m33.473s
 
     # this is much faster:
     awk '{print $3-$2,$0}' notGap.bed | sort -rn > notGap.sizes.txt
     # largest contiguous sequence:
     head -1 notGap.sizes.txt | awk '{print $1}'
     # 383429
     # minimal coverage 1 base out of that largest sequence:
     echo 383429 | awk '{printf "%15.10f\n", 1/(2*$1)}' | sed -e 's/ //g'
     # 0.0000013040
     time bedIntersect -minCoverage=0.0000013040 allGaps.bed notGap.bed \
       test.new.gaps.bed
     # real    0m0.724s
     # number of bases in these new gaps:
     awk '{print $3-$2}' test.new.gaps.bed | ave stdin | grep total
     # total 8314.000000
 
     #	what is the highest index in the existing gap table:
     hgsql -N -e "select ix from gap;" oviAri3 | sort -n | tail -1
     #	25768
     cat << '_EOF_' > mkGap.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my $ix=`hgsql -N -e "select ix from gap;" oviAri3 | sort -n | tail -1`;
 chomp $ix;
 
 open (FH,"<new.gaps.bed") or die "can not read new.gaps.bed";
 while (my $line = <FH>) {
     my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line);
     ++$ix;
     printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart,
         $chromEnd, $ix, $chromEnd-$chromStart;
 }
 close (FH);
 '_EOF_'
     # << happy emacs
     chmod +x ./mkGap.pl
     ./mkGap.pl > other.bed
     featureBits -countGaps oviAri3 other.bed
     #	8314 bases of 2619054388 (0.000%) in intersection
     wc -l other.bed
     #	1552
     hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \
 	-noLoad oviAri3 otherGap other.bed
     #	starting with this many
     hgsql -e "select count(*) from gap;" oviAri3
     #	125067
     hgsql oviAri3 -e 'load data local infile "bed.tab" into table gap;'
     #	result count:
     hgsql -e "select count(*) from gap;" oviAri3
     #	126619
     # == 125067 + 1552
     # verify we aren't adding gaps where gaps already exist
     # this would output errors if that were true:
     gapToLift -minGap=1 oviAri3 nonBridged.lift -bedFile=nonBridged.bed
     # see example in danRer7.txt
 
     # are there *only* bridged gaps, no need for genbank lift file
     hgsql -N -e "select bridge from gap;" oviAri3 | sort | uniq -c
     #   126619 yes
 
 ##########################################################################
 ## WINDOWMASKER (DONE- 2013-06-09 - Hiram)
     mkdir /hive/data/genomes/oviAri3/bed/windowMasker
     cd /hive/data/genomes/oviAri3/bed/windowMasker
     time doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
 	-dbHost=hgwdev oviAri3 > do.log 2>&1 &
     # real    199m52.017s
 
     # Masking statistics
     cat faSize.oviAri3.cleanWMSdust.txt
     # 2619054388 bases (84718522 N's 2534335866 real 1554240396 upper
     #    980095470 lower) in 5698 sequences in 1 files
     # Total size: mean 459644.5 sd 7762308.2 min 1001 (chrUn_AMGL01128047)
     #    max 275612895 (chr1) median 2902
     # %37.42 masked total, %38.67 masked real
 
     # how much does this window masker and repeat masker overlap:
     time featureBits -countGaps oviAri3 rmsk windowmaskerSdust \
         > fb.oviAri3.rmsk.windowmaskerSdust.txt 2>&1
     cat fb.oviAri3.rmsk.windowmaskerSdust.txt
     #   737831652 bases of 2619054388 (28.172%) in intersection
 
 ########################################################################
 # cpgIslands - (DONE - 2013-06-10 - Hiram)
     mkdir /hive/data/genomes/oviAri3/bed/cpgIslands
     cd /hive/data/genomes/oviAri3/bed/cpgIslands
     time doCpgIslands.pl oviAri3 > do.log 2>&1 &
     #  real    14m42.645s
 
     cat fb.oviAri3.cpgIslandExt.txt
     #   19288346 bases of 2534335866 (0.761%) in intersection
 
 #########################################################################
 # genscan - (DONE - 2013-06-10 - Hiram)
     mkdir /hive/data/genomes/oviAri3/bed/genscan
     cd /hive/data/genomes/oviAri3/bed/genscan
     time doGenscan.pl oviAri3 > do.log 2>&1  &
     #  real    115m34.625s
 
     cat fb.oviAri3.genscan.txt
     #   53897560 bases of 2534335866 (2.127%) in intersection
     cat fb.oviAri3.genscanSubopt.txt
     #   52669785 bases of 2534335866 (2.078%) in intersection
 
 #########################################################################
 # MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2013-06-10 - Hiram)
     # Use -repMatch=900, based on size -- for human we use 1024
     # use the "real" number from the faSize measurement,
     # hg19 is 2897316137, calculate the ratio factor for 1024:
     calc \( 2534335866 / 2897316137 \) \* 1024
     #  ( 2534335866 / 2897316137 ) * 1024 = 895.711688
     # round up to 900 (oviAri1 used 400)
 
     cd /hive/data/genomes/oviAri3
     time blat oviAri3.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=jkStuff/oviAri3.11.ooc -repMatch=900
     # Wrote 30413 overused 11-mers to jkStuff/oviAri3.11.ooc
     # real    1m22.326s
 
     #	oviAri1: Wrote 19704 overused 11-mers to jkStuff/oviAri1.11.ooc
 
     # there are *no* non-bridged gaps, therefore, no lift file for genbank
     hgsql -N -e "select bridge from gap;" oviAri3 | sort | uniq -c
     #    126619 yes
 
 #########################################################################
 # AUTO UPDATE GENBANK (DONE - 2013-06-10,19 - Hiram)
     # examine the file:
     /cluster/data/genbank/data/organism.lst
     # for your species to see what counts it has for:
 # organism       mrnaCnt estCnt  refSeqCnt
 # Ovis aries      3064    338574  795
 
     # to decide which "native" mrna or ests you want to specify in genbank.conf
 
     ssh hgwdev
     cd $HOME/kent/src/hg/makeDb/genbank
     git pull
     # edit etc/genbank.conf to add oviAri3 just after before oviAri1
 
 # oviAri3 (Sheep)
 oviAri3.serverGenome = /hive/data/genomes/oviAri3/oviAri3.2bit
 oviAri3.clusterGenome = /hive/data/genomes/oviAri3/oviAri3.2bit
 oviAri3.ooc = /hive/data/genomes/oviAri3/jkStuff/oviAri3.11.ooc
 oviAri3.lift = no
 oviAri3.perChromTables = no
 oviAri3.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
 oviAri3.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
 oviAri3.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
 oviAri3.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
 oviAri3.genbank.est.native.pslCDnaFilter  = ${finished.genbank.est.native.pslCDnaFilter}
 oviAri3.genbank.est.xeno.pslCDnaFilter    = ${finished.genbank.est.xeno.pslCDnaFilter}
 oviAri3.downloadDir = oviAri3
 oviAri3.refseq.mrna.native.load  = yes
 oviAri3.refseq.mrna.xeno.load = yes
 oviAri3.refseq.mrna.xeno.loadDesc  = yes
 oviAri3.genbank.mrna.xeno.load  = yes
 oviAri3.upstreamGeneTbl = refGene
 
     # end of section added to etc/genbank.conf
     git commit -m "adding oviAri3 Sheep refs #9409" etc/genbank.conf
     git push
     make etc-update
 
     ssh hgwdev			# used to do this on "genbank" machine
     screen -S oviAri3           # long running job managed in screen
     cd /cluster/data/genbank
     time ./bin/gbAlignStep -initial oviAri3 &
     #  logFile: var/build/logs/2013.06.10-09:12:43.oviAri3.initalign.log
     # real 127433.58
     # user 19976.10
     # sys 12191.65
 
     # load database when finished
     ssh hgwdev
     cd /cluster/data/genbank
 
     time ./bin/gbDbLoadStep -drop -initialLoad oviAri3 &
     #   logFile: var/dbload/hgwdev/logs/2013.06.18-10:58:46.dbload.log
     #   real 98m19.378s
 
     # enable daily alignment and update of hgwdev
     cd ~/kent/src/hg/makeDb/genbank
     git pull
     # add oviAri3 to:
     vi etc/align.dbs etc/hgwdev.dbs
     git commit -m "Added oviAri3. refs #9409" etc/align.dbs etc/hgwdev.dbs
     git push
     make etc-update
 
 ############################################################################
 # construct liftOver from oviAri1 to oviAri3 (DONE - 2013-06-19 - Hiram)
     # documentation for this step is in oviAri1 - remember to do this
 
 #########################################################################
 # set default position to RHO gene displays  (DONE - 2013-07-09 - Hiram)
     hgsql -e \
 'update dbDb set defaultPos="chr19:50686000-50700183" where name="oviAri3";' \
 	hgcentraltest
 
 ############################################################################
 # downloads and pushQ entry (DONE - 2013-07-09 - Hiram)
     # after adding oviAri3 to the all.joiner file and verifying that
     #   joinerCheck is clean (i.e. run joinerCheck with -times and -keys flags
     #   to make sure there are no errors), can construct the downloads:
     cd /hive/data/genomes/oviAri3
     time makeDownloads.pl -workhorse=hgwdev oviAri3
     #   real    24m11.795s
 
     mkdir /hive/data/genomes/oviAri3/pushQ
     cd /hive/data/genomes/oviAri3/pushQ
     # Mark says don't let the transMap track get there
     time makePushQSql.pl oviAri3 2> stderr.txt > oviAri3.sql
     #  real    3m52.875s
 
     # check the stderr.txt for bad stuff, these kinds of warnings are OK:
 # WARNING: hgwdev does not have /gbdb/oviAri3/wib/gc5Base.wib
 # WARNING: hgwdev does not have /gbdb/oviAri3/wib/quality.wib
 # WARNING: hgwdev does not have /gbdb/oviAri3/bbi/quality.bw
 # WARNING: oviAri3 does not have seq
 # WARNING: oviAri3 does not have extFile
 # WARNING: oviAri3 does not have estOrientInfo
 
     scp -p oviAri3.sql hgwbeta:/tmp/
     ssh hgwbeta "hgsql qapushq < /tmp/oviAri3.sql"
 
 ############################################################################
 #  BLATSERVERS ENTRY (DONE - 2013-06-26 - Hiram)
 #	After getting a blat server assigned by the Blat Server Gods,
      ssh hgwdev
 
      hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
  	VALUES ("oviAri3", "blat4c", "17850", "1", "0"); \
  	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
  	VALUES ("oviAri3", "blat4c", "17851", "0", "1");' \
  	    hgcentraltest
      #	test it with some sequence
 
 ############################################################################
 # reset dbDb description to be like the previous nomLeu
 #       (DONE - 2013-03-21 - Hiram)
     hgsql hgcentraltest -e 'update dbDb set description = "Oct. 2012 (GGSC Nleu3.0/oviAri3)"" where name = "oviAri3";' \
           | hgsql -h genome-testdb hgcentraltest
 
 ############################################################################
 # SWAP hg19/Human chain/net (DONE - 2013-06-28 - Hiram)
     # original alignment
     cd /hive/data/genomes/hg19/bed/lastzOviAri3.2013-06-17
     cat fb.hg19.chainOviAri3Link.txt
     #	1356890439 bases of 2897316137 (46.833%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/oviAri3/bed/blastz.hg19.swap
     cd /hive/data/genomes/oviAri3/bed/blastz.hg19.swap
 
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
        /hive/data/genomes/hg19/bed/lastzOviAri3.2013-06-17/DEF \
        -swap -syntenicNet -workhorse=hgwdev -smallClusterHub=encodek \
        -bigClusterHub=swarm -chainMinScore=3000 -chainLinearGap=medium \
        > swap.log 2>&1
     #  real    107m13.938s
 
     cat fb.oviAri3.chainHg19Link.txt
     #  1316305922 bases of 2534335866 (51.939%) in intersection
 
     # set sym link to indicate this is the lastz for this genome:
     cd /hive/data/genomes/oviAri3/bed
     ln -s blastz.hg19.swap lastz.hg19
 
 #m############################################################################
 # fixup search rule for assembly track/gold table (DONE - 2013-08-06 - Hiram)
     hgsql -N -e "select frag from gold;" oviAri3 | sort | head -1
 AMGL01000001.1
     hgsql -N -e "select frag from gold;" oviAri3 | sort | tail -2
 AMGL01130764.1
 NC_001941
 
     # verify this rule will find them all or eliminate them all:
     hgsql -N -e "select frag from gold;" oviAri3 | wc -l
     # 130765
 
     hgsql -N -e "select frag from gold;" oviAri3 | egrep -e '[AN][CM][G_][L0]0[0-9]+(\.1)?' | wc -l
     # 130765
 
     hgsql -N -e "select frag from gold;" oviAri3 | egrep -v -e '[AN][CM][G_][L0]0[0-9]+(\.1)?' | wc -l
     # 0
 
     # hence, add to trackDb/sheep/oviAri3/trackDb.ra
 searchTable gold
 shortCircuit 1
 termRegex [AN][CM][G_][L0]0[0-9]+(\.1)?
 query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
 searchPriority 8
 
 #########################################################################
 # Farm Animal Cytoband track (IN PROGRESS - 2014-02-24 - Pauline)
     mkdir /hive/data/genomes/oviAri3/bed/cytoBand
     cd /hive/data/genomes/oviAri3/bed/cytoBand
 
     wget http://www.animalgenome.org/QTLdb/tmp/QTL_OAR_3.1.ubed.txt.gz
     zcat QTL_OAR_3.1.ubed.txt.gz >> oviAri3.animalQtl.bed
 
 ##############################################################################
 # Farm Animal QTL track (DONE - 2014-05-30 - Pauline)
     mkdir /hive/data/genomes/oviAri3/bed/animalQtl
     cd /hive/data/genomes/oviAri3/bed/animalQtl
 
     wget http://www.animalgenome.org/QTLdb/tmp/QTL_OAR_3.1.ubed.txt.gz
 
     zcat QTL_OAR_3.1.ubed.txt.gz >> oviAri3.animalQtl.bed
     cut -f1-4 oviAri3.animalQtl.bed > oviAri3.animalQtl.bed4
     hgLoadBed oviAri3 animalQtl oviAri3.animalQtl.bed4
 
     checkTableCoords oviAri3 animalQtl
     # no results - yay!
 
     #added entries in trackDb for track and search
 
     #script to determine what to include in search rule:
 
 cat > findNames <<EOF
 #!/bin/sh
 #hgsql -N -e 'select name from animalQtl;' bosTau7 | sort >
 #/tmp/bosTau7.animalQtl.name.txt
 hgsql -N -e 'select name from animalQtl;' bosTau7  > /tmp/bosTau7.animalQtl.name.txt
 hgsql -N -e 'select name from animalQtl;' galGal4  >> /tmp/bosTau7.animalQtl.name.txt
 hgsql -N -e 'select name from animalQtl;' susScr3  >> /tmp/bosTau7.animalQtl.name.txt
 hgsql -N -e 'select name from animalQtl;' oviAri3  >> /tmp/bosTau7.animalQtl.name.txt
 export maxLen=`awk '{print length($0)}' /tmp/bosTau7.animalQtl.name.txt | sort -rn | head -1`
 echo "scan to column: $maxLen"
 
 export C=1
 while [ $C -le $maxLen ];
 do
 echo -n " $C: "
 sort -u /tmp/bosTau7.animalQtl.name.txt | awk '{ print substr($0,'$C',1) }' | sort -u | xargs echo | sed -e 's/ //g'
 C=`echo $C | awk '{print $1+1}'`
 done
 
 #rm -f /tmp/bosTau7.animalQtl.name.txt
 mv /tmp/bosTau7.animalQtl.name.txt .
 EOF
 
 #script output:
  1: 12346ABCDEFGHIJKLMNOPQRSTUVWYZcdfmpu
  2: -/0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYacdeimort
  3: %-01234568:ABCDEFGHIKLMNOPRSTUVWXYZ_abcdefnost
  4: %-/0123456789:ABCDEFGHIKLMNOPRSTUVWYZ_abdehilnqrt
  5: -0123456789:ABCDEFGHIJKLMNOPQRSTUVWYZ_abcdiklnorstw
  6: %-.0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_bcdhimnoprstw
  7: -0123456789:ABCDEFGHIKLMNOPRSTVW_abcent
  8: 0123456789:ABCDEFGHILMNPRSTV_abeflo
  9: 0123456789:BCEFMOPTW_adeklnt
  10: 0123456789:CHLcdfirt
  11: -0123456789:aln
  12: 0123456789git
  13: 0123456789:
  14: 0123456789
  15: 0123456789
  16: 0123456789
  17: 012346
  18: 023456
 
 #rule constructed based on script output - set termregex to this in trackDb
 #search stanza
 ^[a-zA-Z0-9]+[a-zA-Z0-9:/-]+[a-zA-Z0-9:/-_%]+
 
 ##############################################################################
 # cytoBandIdeo (DONE - 2014-11-19 - Kuhn)
 #
 
 makeCytoBandIdeo.csh oviAri.csh
 # load failed on hgLoadBed, but table and index made properly
 #
 # Can't start query:
 # LOAD DATA   INFILE '/hive/users/kuhn/tracks/sheep/oviAri3.cytoBand'  INTO TABLE cytoBandIdeo
 
 # loaded manually:
 mysql>  LOAD DATA LOCAL INFILE "oviAri3.cytoBand" INTO TABLE cytoBandIdeo;
 
 
 
 
 ##############################################################################
 # TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
 ##############################################################################
 
 ##############################################################################
 # construct liftOver to oviAri3 (DONE - 2014-12-23 - Hiram)
     screen -S oviAri1	# manage this longish running job in a screen
     mkdir /hive/data/genomes/oviAri3/bed/blat.oviAri1.2014-12-23
     cd /hive/data/genomes/oviAri3/bed/blat.oviAri1.2014-12-23
     # check it with -debug first to see if it is going to work:
     doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=ku \
 	-ooc=/hive/data/genomes/oviAri3/jkStuff/oviAri3.11.ooc \
 	-debug -dbHost=hgwdev -workhorse=hgwdev oviAri3 oviAri1
     # if that is OK, then run it:
     time (doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=ku \
 	-ooc=/hive/data/genomes/oviAri3/jkStuff/oviAri3.11.ooc \
 	-dbHost=hgwdev -workhorse=hgwdev oviAri3 oviAri1) > do.log 2>&1
     #	real    8m44.490s
 
     # verify this file exists:
     #	/gbdb/oviAri3/liftOver/oviAri3ToOviAri1.over.chain.gz
     # and try out the conversion on genome-test from oviAri3 to oviAri1
 
 
 ##############################################################################
 # swap LASTZ pig/susScr3 sheep/oviAri3 - (DONE - 2014-05-19 - Hiram)
     # orginal alignment
     cd /hive/data/genomes/susScr3/bed/lastzOviAri3.2014-05-19
     cat fb.susScr3.chainOviAri3Link.txt
     # 1453348100 bases of 2525294057 (57.552%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/oviAri3/bed/blastz.susScr3.swap
     cd /hive/data/genomes/oviAri3/bed/blastz.susScr3.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/susScr3/bed/lastzOviAri3.2014-05-19/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    155m31.793s
 
     cat fb.oviAri3.chainSusScr3Link.txt
     # 1311099603 bases of 2534335866 (51.733%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` oviAri3 susScr3) > rbest.log 2>&1
     # real    42m24.943s
 
 #########################################################################
 # SWAP LASTZ mouse/mm10 sheep/oviAri3 - (DONE - 2015-01-21 - Hiram)
     # the original alignment
     cd /hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08
     cat fb.mm10.chainOviAri3Link.txt
     # 432006690 bases of 2652783500 (16.285%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/oviAri3/bed/blastz.mm10.swap
     cd /hive/data/genomes/oviAri3/bed/blastz.mm10.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/mm10/bed/lastzOviAri3.2015-01-08/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    31m27.481s
 
     cat fb.oviAri3.chainMm10Link.txt
     #422549165 bases of 2534335866 (16.673%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` oviAri3 mm10) > rbest.log 2>&1
     # real    16m45.956s
 
 #########################################################################
 # SWAP LASTZ cow/bosTau8 sheep/oviAri3 - (DONE - 2015-01-26 - Hiram)
     # original alignment
     cd /hive/data/genomes/bosTau8/bed/lastzOviAri3.2015-01-20
     cat fb.bosTau8.chainOviAri3Link.txt
     # 2236790379 bases of 2649307237 (84.429%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/oviAri3/bed/blastz.bosTau8.swap
     cd /hive/data/genomes/oviAri3/bed/blastz.bosTau8.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/bosTau8/bed/lastzOviAri3.2015-01-20/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     #  real    477m36.613s
 
     cat fb.oviAri3.chainBosTau8Link.txt
     # 2217800123 bases of 2534335866 (87.510%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` oviAri3 bosTau8) > rbest.log 2>&1
     # real    44m43.976s
 
 #########################################################################
 # SWAP LASTZ horse/equCab2 sheep/oviAri3 - (DONE - 2015-01-21 - Hiram)
     # original alignment
     cd /hive/data/genomes/equCab2/bed/lastzOviAri3.2015-01-20
     cat fb.equCab2.chainOviAri3Link.txt
     # 1466221911 bases of 2428790173 (60.368%) in intersection
 
     # and for the swap:
     mkdir /hive/data/genomes/oviAri3/bed/blastz.equCab2.swap
     cd /hive/data/genomes/oviAri3/bed/blastz.equCab2.swap
 
     time (doBlastzChainNet.pl -verbose=2 \
       /hive/data/genomes/equCab2/bed/lastzOviAri3.2015-01-20/DEF \
         -swap -chainMinScore=3000 -chainLinearGap=medium \
           -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
             -syntenicNet) > swap.log 2>&1
     # real    117m5.548s
 
 
     cat fb.oviAri3.chainEquCab2Link.txt
     # 1426475061 bases of 2534335866 (56.286%) in intersection
 
     time (doRecipBest.pl -buildDir=`pwd` oviAri3 equCab2) > rbest.log 2>&1
     # real    40m15.262s
 
 ############################################################################
 # construct liftOver to oviAri4 (DONE - 2018-04-30 - ChrisL)
     screen -S oviAri4	# manage this longish running job in a screen
     mkdir /hive/data/genomes/oviAri3/bed/blat.oviAri3.2018-04-30
     cd /hive/data/genomes/oviAri3/bed/blat.oviAri3.2018-08-30
 
     # check it with -debug first to see if it is going to work:
     doSameSpeciesLiftOver.pl -debug -verbose=2 -buildDir=`pwd` \
 	-ooc=/hive/data/genomes/oviAri3/jkStuff/oviAri3.11.ooc \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          oviAri3 oviAri4 > do.log 2>&1
 
     # if that is OK, then run it:
     time (doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
 	-ooc=/hive/data/genomes/oviAri3/jkStuff/oviAri3.11.ooc \
         -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
          oviAri3 oviAri4) > do.log 2>&1 &
 
     #	real    109m42.711s
 
     # verify this file exists:
     #	/gbdb/oviAri3/liftOver/oviAri3ToOviAri4.over.chain.gz
     # and try out the conversion on genome-test from oviAri1 to oviAri3
+
+##############################################################################
+# LASTZ Sheep OviAri3 vs. sheep GCF_002742125.1 (DONE - 2021-10-27 - Gerardo)
+
+# should be able to run this from anywhere, this time it was run from:
+    cd kent/src/hg/utils/automation
+
+  time (~/kent/src/hg/utils/automation/pairLastz.sh \
+	oviAri3 GCF_002742125.1_Oar_rambouillet_v1.0 mammal mammal) \
+	   > sheepLiftOver_20211027.log 2>&1 &
+  # check the total time
+grep -w real sheepLiftOver_20211027.log  | tail -1 | sed -e 's/^/    # /;'
+    # real      1128m46.020s
+
+  # this sheepLiftOver_20211027 log file happens to have a copy of the make doc, as well
+  # as the copy of the make doc left in the target assembly directory:
+# /hive/data/genomes/oviAri3/bed/lastzGCF_002742125.1.2021-10-27/makeDoc.txt
+
+    # this command outputs this makeDoc text:
+
+    cat kent/src/hg/utils/automation/sheepLiftOver_20211027.log
+
+##############################################################################
+# LASTZ Sheep OviAri3 vs. sheep GCF_002742125.1 
+#       (DONE - 2021-10-06 - Gerardo)
+
+    mkdir /hive/data/genomes/oviAri3/bed/lastzGCF_002742125.1.2021-10-27
+    cd /hive/data/genomes/oviAri3/bed/lastzGCF_002742125.1.2021-10-27
+
+    printf '# sheep GCF_002742125.1 vs. Sheep OviAri3
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+
+# TARGET: Sheep OviAri3
+SEQ1_DIR=/hive/data/genomes/oviAri3/oviAri3.2bit
+SEQ1_LEN=/hive/data/genomes/oviAri3/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: sheep GCF_002742125.1
+SEQ2_DIR=/hive/data/genomes/asmHubs/GCF/002/742/125/GCF_002742125.1/GCF_002742125.1.2bit
+SEQ2_LEN=/hive/data/genomes/asmHubs/GCF/002/742/125/GCF_002742125.1/GCF_002742125.1.chrom.sizes.txt
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/oviAri3/bed/lastzGCF_002742125.1.2021-10-27
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -verbose=2 `pwd`/DEF -syntenicNet \
+       -qAsmId GCF_002742125.1_Oar_rambouillet_v1.0 -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=3000 -chainLinearGap=medium) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real	1128m46.020s
+
+    sed -e 's/^/    # /;' fb.oviAri3.chainGCF_002742125.1Link.txt
+    # 2501829064 bases of 2619054388 (95.524%) in intersection
+    sed -e 's/^/    # /;' fb.oviAri3.chainSynGCF_002742125.1Link.txt
+    # 2475978010 bases of 2619054388 (94.537%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+      -query2Bit="/hive/data/genomes/asmHubs/GCF/002/742/125/GCF_002742125.1/GCF_002742125.1.2bit" \
+-querySizes="/hive/data/genomes/asmHubs/GCF/002/742/125/GCF_002742125.1/GCF_002742125.1.chrom.sizes.txt" \
+        oviAri3 GCF_002742125.1) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	116m28.947s
+
+    sed -e 's/^/    # /;' fb.oviAri3.chainRBest.GCF_002742125.1.txt
+    # 2474220941 bases of 2619054388 (94.470%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/asmHubs/allBuild/GCF/002/742/125/GCF_002742125.1_Oar_rambouillet_v1.0/trackData/blastz.oviAri3.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl -trackHub -noDbNameCheck -swap -verbose=2 \
+   -qAsmId GCF_002742125.1_Oar_rambouillet_v1.0 /hive/data/genomes/oviAri3/bed/lastzGCF_002742125.1.2021-10-27/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=3000 -chainLinearGap=medium) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real	778m32.085s
+
+    sed -e 's/^/    # /;' fb.GCF_002742125.1.chainOviAri3Link.txt
+    # 2707531625 bases of 2869914396 (94.342%) in intersection
+    sed -e 's/^/    # /;' fb.GCF_002742125.1.chainSynOviAri3Link.txt
+    # 2631159342 bases of 2869914396 (91.681%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl -trackHub -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+   -target2bit="/hive/data/genomes/asmHubs/GCF/002/742/125/GCF_002742125.1/GCF_002742125.1.2bit" \
+-targetSizes="/hive/data/genomes/asmHubs/GCF/002/742/125/GCF_002742125.1/GCF_002742125.1.chrom.sizes.txt" \
+   GCF_002742125.1 oviAri3) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	140m7.548s
+
+    sed -e 's/^/    # /;' fb.GCF_002742125.1.chainRBest.OviAri3.txt
+    # 2470704166 bases of 2869914396 (86.090%) in intersection