src/hg/makeDb/doc/dp3.txt 70415b51bd79ffbe93d0e64a49a73ec73493abf6

70415b51bd79ffbe93d0e64a49a73ec73493abf6
gperez2
  Tue Dec 6 21:55:51 2022 -0800
dp3 vs. droPer1 lastz/chain/net run for user, refs #2567

diff --git src/hg/makeDb/doc/dp3.txt src/hg/makeDb/doc/dp3.txt
index 83b1f4d..445fe58 100644
--- src/hg/makeDb/doc/dp3.txt
+++ src/hg/makeDb/doc/dp3.txt
@@ -1,1019 +1,1111 @@
 # for emacs: -*- mode: sh; -*-
 
 
 # Drosophila pseudoobscura -- 
 # 
 # Flybase release 1.0.  Partly chrom-based, partly linkage-group based,
 # but at 17 sequences total that's nicer than 759 scaffolds.  Also comes 
 # with gene annotations (predicted genes and putative Drosophila homologs 
 # based on tblastn).
 #
 # ftp://flybase.net/genomes/Drosophila_pseudoobscura/current/dpse_release1.0.txt
 #
 
 #  NOTE:  this doc may have genePred loads that fail to include
 #  the bin column.  Please correct that for the next build by adding
 #  a bin column when you make any of these tables:
 #
 #  mysql> SELECT tableName, type FROM trackDb WHERE type LIKE "%Pred%";
 #  +-------------+---------------------------------+
 #  | tableName   | type                            |
 #  +-------------+---------------------------------+
 #  | flyBaseGene | genePred flyBasePep             |
 #  | xenoRefGene | genePred xenoRefPep xenoRefMrna |
 #  | twinscan    | genePred twinscanPep            |
 #  | genewise    | genePred genewisePep            |
 #  | geneid      | genePred geneidPep              |
 #  | genscan     | genePred genscanPep             |
 #  +-------------+---------------------------------+
 
 
 # DOWNLOAD SEQUENCE (DONE 11/15/04 angie)
     ssh kksilo
     mkdir /cluster/store8/dp3
     cd /cluster/data
     ln -s /cluster/store8/dp3 dp3
     cd /cluster/data/dp3
     mkdir jkStuff bed
     mkdir downloads
     cd downloads
     wget ftp://flybase.net/genomes/Drosophila_pseudoobscura/current/fasta/dpse-all-chromosome-r1.03.fasta.gz
     zcat dpse-all-chromosome-r1.03.fasta.gz | faSize stdin
 #141079253 bases (6504941 N's 134574312 real) in 17 sequences in 1 files
 #Total size: mean 8298779.6 sd 7909506.4 min 374547 (XL_group3b) max 30711475 (2) median 6604477
 #N count: mean 382643.6 sd 405982.0
     zcat dpse-all-chromosome-r1.03.fasta.gz \
     | sed -e 's/^>/>chr/' \
     | faSplit byName stdin stubArg
     # Put chromosome files in our usual $c/$chr.fa hierarchy:
     foreach f (chr*.fa)
       set chr = $f:r
       set c = `echo $chr | sed -e 's/^chr//; s/_group.*$//;'`
       echo $chr to $c
       mkdir -p ../$c
       mv $f ../$c
     end
 
 
 # BREAK UP SEQUENCE INTO 5 MB CHUNKS AT CHUNKS OF N'S (DONE 11/15/04 angie)
     # Since we didn't get real AGP with gap info, we don't have enough info 
     # to help us break into ~5MB chunks at proper gap boundaries.  So just 
     # use runs of N's as gaps.
     ssh kksilo
     cd /cluster/data/dp3
     foreach f (?{,?}/chr*.fa)
       set chr = $f:t:r
       set c = $f:h
       faSplit gap -minGapSize=100 -lift=$c/$chr.lft \
         $c/$chr.fa 5000000 $c/${chr}_
     end
     # put chunks into usual "contig" dir structure
     foreach ctg (?{,?}/chr*_?{,?}.fa)
       mv $ctg $ctg:r/
     end
     # make liftAll.lft:
     cat ?{,?}/chr*.lft > jkStuff/liftAll.lft
 
 
 # RUN REPEAT MASKER (DONE 11/15/04 angie)
     # January ("March") '04 version of RepeatMasker and libs.
     #- Split sequence into 500kb chunks, at gaps if possible:
     ssh kksilo
     cd /cluster/data/dp3
     foreach c (?{,?})
       foreach d ($c/chr*_?{,?})
         cd $d
         echo "splitting $d"
         set contig = $d:t
         faSplit gap $contig.fa 500000 ${contig}_ -lift=$contig.lft \
           -minGapSize=100
         cd ../..
       end
     end
 
     # make the run directory, output directory, and job list
     cat << '_EOF_' > jkStuff/RMDrosophila
 #!/bin/csh -fe
 
 cd $1
 /bin/mkdir -p /tmp/dp3/$2
 /bin/cp $2 /tmp/dp3/$2/
 pushd /tmp/dp3/$2
 /cluster/bluearc/RepeatMasker/RepeatMasker -s -spec drosophila $2
 popd
 /bin/cp /tmp/dp3/$2/$2.out ./
 /bin/rm -fr /tmp/dp3/$2/*
 /bin/rmdir --ignore-fail-on-non-empty /tmp/dp3/$2
 /bin/rmdir --ignore-fail-on-non-empty /tmp/dp3
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod +x jkStuff/RMDrosophila
     mkdir RMRun RMOut
     cp /dev/null RMRun/RMJobs
     foreach d (?{,?}/chr*_?{,?})
       foreach f ( $d/chr*_?{,?}_?{,?}.fa )
         set chunk = $f:t
         echo ../jkStuff/RMDrosophila \
              /cluster/data/dp3/$d $chunk \
              '{'check in line+ /cluster/data/dp3/$f'}' \
            '{'check out line+ /cluster/data/dp3/$f.out'}' \
         >> RMRun/RMJobs
       end
     end
 
     # do the run
     ssh kk
     cd /cluster/data/dp3/RMRun
     para create RMJobs
     para try, check, push, check,...
 #Completed: 379 of 379 jobs
 #Average job time:                4774s      79.57m     1.33h    0.06d
 #Longest job:                     6961s     116.02m     1.93h    0.08d
 #Submission to last job:          7079s     117.98m     1.97h    0.08d
 
     #- Lift up the 500KB chunk .out's to 5MB ("pseudo-contig") level
     ssh kksilo
     cd /cluster/data/dp3
     foreach d (*/chr*_?{,?})
       set chunk = $d:t
       echo $chunk
       liftUp $d/$chunk.fa.out $d/$chunk.lft warn $d/${chunk}_*.fa.out \
         > /dev/null
     end
 
     #- Lift pseudo-contigs to chromosome level
     foreach f (?{,?}/chr*.fa)
     set chr = $f:t:r
       echo lifting $chr
       cd $f:h
       liftUp $chr.fa.out $chr.lft warn \
         `awk '{print $2 "/" $2 ".fa.out";}' $chr.lft` \
         > /dev/null
       cd ..
     end
 
     # Load the .out files into the database with:
     ssh hgwdev
     hgLoadOut dp3 /cluster/data/dp3/*/chr*.fa.out
     featureBits dp3 rmsk
 #7517385 bases of 134584124 (5.586%) in intersection
     # Compare to previous assembly:
     featureBits dp2 rmsk
 #6707169 bases of 129770350 (5.168%) in intersection
 
 
 # CREATING DATABASE (DONE 11/15/04 angie)
     # Create the database.
     ssh hgwdev
     # Make sure there is at least 5 gig free for the database
     df -h /var/lib/mysql
 #/dev/sdc1             1.8T  646G 1014G  39% /var/lib/mysql
     hgsql '' -e 'create database dp3'
     # Copy all the data from the table "grp" 
     # in an existing database to the new database
     hgsql dp3 -e 'create table grp (PRIMARY KEY(NAME)) select * from hg17.grp'
 
 
 # SIMPLE REPEATS (TRF) (DONE 11/15/04 angie)
     ssh kksilo
     mkdir -p /cluster/data/dp3/bed/simpleRepeat
     cd /cluster/data/dp3/bed/simpleRepeat
     mkdir trf
     cp /dev/null jobs.csh
     foreach d (/cluster/data/dp3/?{,?}/chr*_?{,?})
       set f = $d/$d:t.fa
       set fout = $f:t:r.bed
       echo "trfBig -trf=/cluster/bin/i386/trf $f /dev/null -bedAt=trf/$fout -tempDir=/tmp" \
       >> jobs.csh
     end
     csh -fe jobs.csh \
     |& egrep -v '^(Removed|Tandem|Copyright|Loading|Allocating|Initializing|Computing|Scanning|Freeing)' \
     > jobs.log &
     # check on this with
     tail -f jobs.log
     wc -l jobs.csh
     ls -1 trf | wc -l
 #     55
     # Lift chunk results to one file for all chroms:
     liftUp simpleRepeat.bed ../../jkStuff/liftAll.lft warn trf/*.bed > /dev/null
 
     # Load this into the database as so
     ssh hgwdev
     hgLoadBed dp3 simpleRepeat \
       /cluster/data/dp3/bed/simpleRepeat/simpleRepeat.bed \
       -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
     featureBits dp3 simpleRepeat
 #3280684 bases of 134584124 (2.438%) in intersection
     # Compare to previous assembly:
     featureBits dp2 simpleRepeat
 #3105979 bases of 129770350 (2.393%) in intersection
 
 
 # FILTER SIMPLE REPEATS (TRF) INTO MASK (DONE 11/15/04 angie)
     # make a filtered version of the trf output: 
     # keep trf's with period <= 12:
     ssh kksilo
     cd /cluster/data/dp3/bed/simpleRepeat
     mkdir trfMask
     foreach f (trf/*.bed)
       awk '{if ($5 <= 12) print;}' $f > trfMask/$f:t
     end
     mkdir trfMaskChrom
     foreach f (../../?{,?}/chr*.fa)
       set chr = $f:t:r
       liftUp trfMaskChrom/$chr.bed ../../jkStuff/liftAll.lft warn \
         trfMask/$chr*.bed
     end
 
 
 # MASK FA USING REPEATMASKER AND FILTERED TRF FILES (DONE 11/15/04 angie)
     ssh kksilo
     cd /cluster/data/dp3
     echo repeat- and trf-masking chunks
     foreach d (?{,?}/chr*_?{,?})
       set f = $d/$d:t.fa
       maskOutFa -soft $f $f.out $f
       maskOutFa -softAdd $f bed/simpleRepeat/trfMask/$d:t.bed $f
     end
     echo repeat- and trf-masking chromosomes
     foreach f (?{,?}/chr*.fa)
       maskOutFa -soft $f $f.out $f
       maskOutFa -softAdd $f bed/simpleRepeat/trfMaskChrom/$f:t:r.bed $f
     end
 
 
 # STORE SEQUENCE AND ASSEMBLY INFORMATION (DONE 11/15/04 angie)
     ssh kksilo
     cd /cluster/data/dp3
     # Make a .2bit file:
     faToTwoBit ?{,?}/chr*.fa dp3.2bit
     # Make chromInfo.tab.
     mkdir bed/chromInfo
     twoBitInfo dp3.2bit stdout \
     | awk '{printf "%s\t%s\t/gbdb/dp3/dp3.2bit\n", $1, $2;}' \
     > bed/chromInfo/chromInfo.tab
 
     # Make symbolic links from /gbdb/dp3 to the 2bit.
     ssh hgwdev
     mkdir -p /gbdb/dp3
     ln -s /cluster/data/dp3/dp3.2bit /gbdb/dp3/
     # Load up chromInfo table
     hgsql dp3 < $HOME/kent/src/hg/lib/chromInfo.sql
     hgsql dp3 -e 'load data local infile \
       "/cluster/data/dp3/bed/chromInfo/chromInfo.tab" into table chromInfo'
     hgsql dp3 -N -e 'select chrom,size from chromInfo' \
       > /cluster/data/dp3/chrom.sizes
     wc -l /cluster/data/dp3/chrom.sizes
 #     17 /cluster/data/dp3/chrom.sizes
 
 
 # MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE (DONE 11/16/04 angie)
     # Warning: genome and organism fields must correspond
     # with defaultDb values
     echo 'INSERT INTO dbDb \
         (name, description, nibPath, organism, \
              defaultPos, active, orderKey, genome, scientificName, \
              htmlPath, hgNearOk, hgPbOk, sourceName) values \
         ("dp3", "Nov. 2004", "/gbdb/dp3", "D. pseudoobscura", \
              "chr4_group3:5603001-5614000", 1, 56, \
              "D. pseudoobscura", \
              "Drosophila pseudoobscura", "/gbdb/dp3/html/description.html", \
              0, 0, "FlyBase Release 1.03");' \
       | hgsql -h genome-testdb hgcentraltest
     echo 'update defaultDb set name = "dp3" where genome = "D. pseudoobscura"' \
       | hgsql -h genome-testdb hgcentraltest
 
     # Make trackDb table so browser knows what tracks to expect:
     ssh hgwdev
     cd ~/kent/src/hg/makeDb/trackDb
     cvs up -d -P
 
     # Edit trackDb/makefile to add dp3 to the DBS variable.
     mkdir drosophila/dp3
     # Create a simple drosophila/dp3/description.html file.
     cvs add drosophila/dp3
     cvs add drosophila/dp3/description.html
     make update DBS=dp3 ZOO_DBS=
 
     # go public on genome-test
     cvs ci makefile
     cvs ci drosophila/dp3
     mkdir /gbdb/dp3/html
     # in a clean, updated tree's kent/src/hg/makeDb/trackDb:
     make alpha
 
 
 # EXTRACTING GAP INFO FROM BLOCKS OF NS (DONE 11/15/04 angie)
     ssh kksilo
     cd /cluster/data/dp3
     cat ?{,?}/chr*.fa \
     | faGapSizes stdin \
         -niceSizes=5,10,20,25,30,40,50,100,250,500,1000,10000,100000
     # None of those round numbers seem overrepresented, so just use 
     # hgFakeAgp's default -minContigGap of 25.
     foreach f (?{,?}/chr*.fa)
       hgFakeAgp $f stdout \
       | awk '$5 == "N" {print;}' > $f:r.gap
     end
     ssh hgwdev
     hgLoadGap dp3 /cluster/data/dp3
 
 
 # PRODUCING GENSCAN PREDICTIONS (DONE 11/16/04 angie)
     # Run on small cluster -- genscan needs big mem.
     ssh kksilo
     mkdir /cluster/data/dp3/bed/genscan
     cd /cluster/data/dp3/bed/genscan
     # Check out hg3rdParty/genscanlinux to get latest genscan:
     cvs co hg3rdParty/genscanlinux
     # Make 3 subdirectories for genscan to put their output files in
     mkdir gtf pep subopt
     # Make hard-masked chunks
     foreach d (/cluster/data/dp3/?{,?}/chr*_?{,?})
       set f = $d/$d:t.fa
       maskOutFa $f hard $f.masked
     end
     # Generate a list file, scaffolds.list, of all the hard-masked scaffolds 
     # that *do not* consist of all-N's (which would cause genscan to blow up)
     cp /dev/null contigs.list
     foreach d (/cluster/data/dp3/?{,?}/chr*_?{,?})
       set f = $d/$d:t.fa.masked
       egrep '[ACGT]' $f > /dev/null
       if ($status == 0) echo $f >> contigs.list
     end
     cat << '_EOF_' > gsub
 #LOOP
 gsBig {check in line+ $(path1)} {check out line gtf/$(root1).gtf} -trans={check out line pep/$(root1).pep} -subopt={check out line subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000
 #ENDLOOP
 '_EOF_'
     # << this line keeps emacs coloring happy
     gensub2 contigs.list single gsub jobList
     ssh kki
     cd /cluster/data/dp3/bed/genscan
     para create jobList
     para try, check, push, check, ...
 #Completed: 54 of 55 jobs
 #Crashed: 1 jobs
 #Average job time:                  95s       1.58m     0.03h    0.00d
 #Longest job:                      232s       3.87m     0.06h    0.00d
 #Submission to last job:           431s       7.18m     0.12h    0.00d
 
     # If there are crashes, diagnose with "para problems".  
     # If a job crashes due to genscan running out of memory, re-run it 
     # manually with "-window=1200000" instead of "-window=2400000".
     # OK, 1200000 ran out of mem too... try 1000000.
     ssh kolossus
     cd /cluster/data/dp3/bed/genscan
     gsBig /cluster/data/dp3/XR/chrXR_group8_1/chrXR_group8_1.fa.masked gtf/chrXR_group8_1.fa.gtf -trans=pep/chrXR_group8_1.fa.pep -subopt=subopt/chrXR_group8_1.fa.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=1000000
     
     # Convert these to chromosome level files as so:
     ssh kksilo
     cd /cluster/data/dp3/bed/genscan
     liftUp genscan.gtf ../../jkStuff/liftAll.lft warn gtf/*.gtf
     liftUp genscanSubopt.bed ../../jkStuff/liftAll.lft warn subopt/*.bed
     cat pep/*.pep > genscan.pep
 
     # Load into the database as so:
     ssh hgwdev
     cd /cluster/data/dp3/bed/genscan
     ldHgGene -gtf dp3 genscan genscan.gtf
     hgPepPred dp3 generic genscanPep genscan.pep
     hgLoadBed dp3 genscanSubopt genscanSubopt.bed
 
 # MYTOUCH FIX - jen - 2006-01-24
   sudo mytouch dp3 genscanPep 0501071300.00
 
 # PUT SEQUENCE ON /ISCRATCH FOR BLASTZ (DONE 11/15/04 angie)
     # OK, for this we still need nibs so build those...
     ssh kksilo
     cd /cluster/data/dp3
     mkdir nib
     foreach f (*/chr*.fa)
       faToNib -softMask $f nib/$f:t:r.nib
     end
     ssh kkr1u00
     mkdir /iscratch/i/dp3
     cp -pR /cluster/data/dp3/nib /iscratch/i/dp3/
     iSync
 
 
 # MAKE DOWNLOADABLE FILES (DONE 11/16/04 angie)
     ssh kksilo
     mkdir /cluster/data/dp3/zips
     cd /cluster/data/dp3
     # Oops, need to make hard-masked chr*.fa.masked before we can zip them:
     foreach f (*/chr*.fa)
       maskOutFa $f hard $f.masked
     end
     zip -j zips/chromOut.zip */chr*.fa.out
     zip -j zips/chromFa.zip */chr*.fa
     zip -j zips/chromFaMasked.zip */chr*.fa.masked
     cd bed/simpleRepeat
     zip ../../zips/chromTrf.zip trfMaskChrom/chr*.bed
     cd ../..
     foreach f (zips/*.zip)
       echo $f
       unzip -t $f | tail -1
     end
     ssh hgwdev
     mkdir /usr/local/apache/htdocs/goldenPath/dp3
     cd /usr/local/apache/htdocs/goldenPath/dp3
     mkdir bigZips database
     # Create README.txt files in bigZips/ and database/ to explain the files.
     cd bigZips
     cp -p /cluster/data/dp3/zips/*.zip .
     md5sum *.zip > md5sum.txt
 
 
 # SWAP DM1-DP3 BLASTZ (DONE 11/16/04 angie)
     ssh kksilo
     mkdir -p /cluster/data/dp3/bed/blastz.dm1.swap.2004-11-16/axtChrom
     ln -s blastz.dm1.swap.2004-11-16 /cluster/data/dp3/bed/blastz.dm1
     cd /cluster/data/dp3/bed/blastz.dm1
     set aliDir = /cluster/data/dm1/bed/blastz.dp3
     cp $aliDir/S1.len S2.len
     cp $aliDir/S2.len S1.len
     mkdir unsorted axtChrom
     cat $aliDir/axtChrom/chr*.axt \
     | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
     | axtSplitByTarget stdin unsorted
     # Sort the shuffled .axt files.
     foreach f (unsorted/*.axt)
       echo sorting $f:t:r
       axtSort $f axtChrom/$f:t
     end
     du -sh $aliDir/axtChrom unsorted axtChrom
 #296M    /cluster/data/dm1/bed/blastz.dp3/axtChrom
 #297M    unsorted
 #296M    axtChrom
     rm -r unsorted
 
 
 # CHAIN MELANOGASTER BLASTZ (DONE 11/17/04 angie)
     # Run axtChain on little cluster
     ssh kki
     cd /cluster/data/dp3/bed/blastz.dm1
     mkdir -p axtChain/run1
     cd axtChain/run1
     mkdir chain
     ls -1S /cluster/data/dp3/bed/blastz.dm1/axtChrom/*.axt \
       > input.lst
     cat << '_EOF_' > gsub
 #LOOP
 doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain}
 #ENDLOOP
 '_EOF_'
     # << this line makes emacs coloring happy
 
     cat << '_EOF_' > doChain
 #!/bin/csh -ef
 axtChain -verbose=0 $1 \
   /iscratch/i/dp3/nib \
   /iscratch/i/dm1/nib stdout \
 | chainAntiRepeat /iscratch/i/dp3/nib /iscratch/i/dm1/nib stdin $2
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod a+x doChain
     gensub2 input.lst single gsub jobList
     para create jobList
     para try, check, push, check...
 #Completed: 17 of 17 jobs
 #Average job time:                  10s       0.17m     0.00h    0.00d
 #Longest job:                       18s       0.30m     0.01h    0.00d
 #Submission to last job:            26s       0.43m     0.01h    0.00d
 
     # now on the fileserver, sort chains
     ssh kksilo
     cd /cluster/data/dp3/bed/blastz.dm1/axtChain
     chainMergeSort run1/chain/*.chain > all.chain
     chainSplit chain all.chain
     rm run1/chain/*.chain
 
     # Load chains into database
     ssh hgwdev
     cd /cluster/data/dp3/bed/blastz.dm1/axtChain/chain
     cat *.chain | hgLoadChain -tIndex dp3 chainDm1 stdin
 
 
 # NET MELANOGASTER BLASTZ (DONE 11/17/04 angie)
     ssh kksilo
     cd /cluster/data/dp3/bed/blastz.dm1/axtChain
     chainPreNet all.chain ../S1.len ../S2.len stdout \
     | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
     | netSyntenic stdin noClass.net
 
     # Add classification info using db tables:
     ssh hgwdev
     cd /cluster/data/dp3/bed/blastz.dm1/axtChain
     netClass -noAr noClass.net dp3 dm1 melanogaster.net
 
     # Make a 'syntenic' subset:
     ssh kksilo
     cd /cluster/data/dp3/bed/blastz.dm1/axtChain
     rm noClass.net
     netFilter -syn melanogaster.net > melanogasterSyn.net
 
     # Load the nets into database 
     ssh hgwdev
     cd /cluster/data/dp3/bed/blastz.dm1/axtChain
     netFilter -minGap=10 melanogaster.net |  hgLoadNet dp3 netDm1 stdin
     netFilter -minGap=10 melanogasterSyn.net \
     | hgLoadNet dp3 netSyntenyDm1 stdin
 
 
 # MAKE AXTNET (DONE 11/17/04 angie)
     ssh kksilo
     cd /cluster/data/dp3/bed/blastz.dm1/axtChain
     netSplit melanogaster.net net
     cd ..
     mkdir axtNet
     foreach f (axtChain/net/*)
       set chr = $f:t:r
       netToAxt $f axtChain/chain/$chr.chain /cluster/data/dp3/nib \
         /cluster/data/dm1/nib stdout \
       | axtSort stdin axtNet/$chr.axt
     end
 
 
 # MAKE VSDM1 DOWNLOADABLES (DONE 11/17/04 angie)
     ssh kksilo
     mkdir /cluster/data/dp3/zips/axtNet
     gzip -c \
       /cluster/data/dp3/bed/blastz.dm1/axtChain/all.chain \
       > /cluster/data/dp3/zips/melanogaster.chain.gz
     gzip -c \
       /cluster/data/dp3/bed/blastz.dm1/axtChain/melanogaster.net \
       > /cluster/data/dp3/zips/melanogaster.net.gz
     foreach f (/cluster/data/dp3/bed/blastz.dm1/axtNet/*.axt)
       gzip -c $f > /cluster/data/dp3/zips/axtNet/$f:t.gz
     end
     ssh hgwdev
     mkdir /usr/local/apache/htdocs/goldenPath/dp3/vsDm1
     cd /usr/local/apache/htdocs/goldenPath/dp3/vsDm1
     mv /cluster/data/dp3/zips/mel*.gz .
     mv /cluster/data/dp3/zips/axtNet .
     md5sum *.gz */*.gz > md5sum.txt
     # Make a README.txt which explains the files & formats.
 
 
 # MAKE 11.OOC FILE FOR BLAT (DONE 11/16/04 angie)
     # Use -repMatch=75 (based on size -- for human we use 1024, and 
     # mosquito size is ~5% of human => 50 -- but I worry it doesn't scale 
     # linearly so be conservative and require more matches to be "overused")
     ssh kkr1u00
     mkdir /cluster/data/dp3/bed/ooc
     cd /cluster/data/dp3/bed/ooc
     ls -1 /cluster/data/dp3/nib/*.nib > nib.lst
     mkdir /cluster/bluearc/dp3
     blat nib.lst /dev/null /dev/null -tileSize=11 \
       -makeOoc=/cluster/bluearc/dp3/11.ooc -repMatch=75
 #Wrote 6790 overused 11-mers to /cluster/bluearc/dp3/11.ooc
     cp -p /cluster/bluearc/dp3/*.ooc /iscratch/i/dp3/
     iSync
 
 
 # AUTO UPDATE GENBANK MRNA RUN  (DONE 1/28/05 angie)
     ssh hgwdev
     # Update genbank config and source in CVS:
     cd ~/kent/src/hg/makeDb/genbank
     cvsup .
 
     # Edit etc/genbank.conf and add these lines:
 # dp3 (D. pseudoobscura)
 dp3.genome = /iscratch/i/dp3/nib/*.nib
 dp3.lift = /cluster/data/dp3/jkStuff/liftAll.lft
 dp3.refseq.mrna.native.load = no
 dp3.refseq.mrna.xeno.load = yes
 dp3.refseq.mrna.xeno.pslReps = -minCover=0.15 -minAli=0.75 -nearTop=0.005
 dp3.genbank.mrna.xeno.load = yes
 dp3.genbank.est.xeno.load = no
 dp3.downloadDir = dp3
 
     cvs ci etc/genbank.conf
     # Since D. pseudoobscura is not a new species for us, 
     # don't need to edit src/lib/gbGenome.c.  
     # Edit src/align/gbBlat to add /iscratch/i/dp3/11.ooc
     cvs diff src/align/gbBlat
     make
     cvs ci src/align/gbBlat
 
     # Install to /cluster/data/genbank:
     make install-server
 
     ssh eieio
     cd /cluster/data/genbank
     # This is an -initial run, (xeno) RefSeq only:
     nice bin/gbAlignStep -srcDb=refseq -type=mrna -initial dp3 &
     tail -f [its logfile]
     # Load results:
     ssh hgwdev
     cd /cluster/data/genbank
     nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad dp3
     featureBits dp3 xenoRefGene
 #14849904 bases of 134584124 (11.034%) in intersection
     # Clean up:
     rm -rf work/initial.dp3
 
     # This is an -initial run, mRNA only:
     nice bin/gbAlignStep -srcDb=genbank -type=mrna -initial dp3 &
     tail -f [its logfile]
     # Load results:
     ssh hgwdev
     cd /cluster/data/genbank
     nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad dp3
     featureBits dp3 mrna
 #28204 bases of 134584124 (0.021%) in intersection
     # Still pretty paltry, but enough so that it would be a shame to not 
     # have them.  
     featureBits dp3 xenoMrna
 #16380659 bases of 134584124 (12.171%) in intersection
     # Clean up:
     rm -rf work/initial.dp3
 
 # ADDED 4/22/05
     # This is an -initial run, EST only:
     nice bin/gbAlignStep -srcDb=genbank -type=est -initial dp3 &
     tail -f [its logfile]
     # Load results:
     ssh hgwdev
     cd /cluster/data/genbank
     nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad dp3
     featureBits dp3 intronEst
 #154814 bases of 134584124 (0.115%) in intersection
     featureBits dp3 est
 #235099 bases of 134584124 (0.175%) in intersection
     # Clean up:
     rm -rf work/initial.dp3
 
 
 # MAKE GCPERCENT (DONE 11/15/04 angie)
     ssh hgwdev
     mkdir /cluster/data/dp3/bed/gcPercent
     cd /cluster/data/dp3/bed/gcPercent
     # create and load gcPercent table
     hgGcPercent dp3 ../../nib
 
 
 # MAKE HGCENTRALTEST BLATSERVERS ENTRY (TODO 11/?/04 angie)
     ssh hgwdev
     echo 'insert into blatServers values("dp3", "blat?", "177??", 1, 0); \
           insert into blatServers values("dp3", "blat?", "177??", 0, 1);' \
       | hgsql -h genome-testdb hgcentraltest
 
 
 # MAKE Drosophila Proteins track  (DONE braney 11/19/04)
     ssh kksilo
     mkdir -p /cluster/data/dp3/blastDb
     cd /cluster/data/dp3/blastDb
     ln -s ../*/*/*.fa .
     for i in *.fa; do formatdb -i $i -p F 2> /dev/null; done
     rm *.fa *.log
 
     ssh kkr1u00
     mkdir -p /iscratch/i/dp3/blastDb
     cp /cluster/data/dp3/blastDb/* /iscratch/i/dp3/blastDb
     (iSync) 2>&1 > sync.out
     
     mkdir -p /cluster/data/dp3/bed/tblastn.dm1FB
     cd /cluster/data/dp3/bed/tblastn.dm1FB
     ls -1S /iscratch/i/dp3/blastDb/*.nsq | sed "s/\.nsq//" > bug.lst
     exit
 
     # back to kksilo
     cd /cluster/data/dp3/bed/tblastn.dm1FB
     mkdir fbfa
     # calculate a reasonable number of jobs
     calc `wc /cluster/data/dm1/bed/blat.dm1FB/dm1FB.psl | awk "{print \\\$1}"`/\(150000/`wc bug.lst | awk "{print \\\$1}"`\)
     # 18735/(150000/434) = 54.206600
     split -l 54 /cluster/data/dm1/bed/blat.dm1FB/dm1FB.psl fbfa/fb
     cd fbfa
     for i in *; do pslxToFa $i $i.fa; rm $i; done
     cd ..
     ls -1S fbfa/*.fa > fb.lst
     mkdir blastOut
     for i in `cat fb.lst`; do  mkdir blastOut/`basename $i .fa`; done
     cat << '_EOF_' > blastGsub
 #LOOP
 blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
 #ENDLOOP
 '_EOF_'
     cat << '_EOF_' > blastSome
 #!/bin/sh
 BLASTMAT=/iscratch/i/blast/data
 export BLASTMAT
 g=`basename $2`
 f=/tmp/`basename $3`.$g
 for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
 do
 if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
 then
         mv $f.8 $f.1
         break;
 fi
 done
 if test -f  $f.1
 then
 if /cluster/bin/i386/blastToPsl $f.1 $f.2
 then
 	liftUp -nosort -type=".psl" -nohead $f.3 subChr.lft warn $f.2
 	liftUp -nosort -type=".psl" -nohead $f.4 ../../jkStuff/liftAll.lft warn $f.3
 	liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /iscratch/i/dm1/protein.lft warn $f.4
 
         mv $3.tmp $3
         rm -f $f.1 $f.2 
         exit 0
     fi
 fi
 rm -f $f.1 $f.2 $3.tmp 
 exit 1
 '_EOF_'
 
     chmod +x blastSome
     gensub2 bug.lst fb.lst blastGsub blastSpec
 
     ssh kk
     cd /cluster/data/dp3/bed/tblastn.dm1FB
     para create blastSpec
     para try, push
 
 # Completed: 144699 of 144699 jobs
 # CPU time in finished jobs:    1808325s   30138.75m   502.31h   20.93d  0.057 y
 # IO & Wait Time:                550220s    9170.33m   152.84h    6.37d  0.017 y
 # Average job time:                  16s       0.27m     0.00h    0.00d
 # Longest job:                     1069s      17.82m     0.30h    0.01d
 # Submission to last job:         25184s     419.73m     7.00h    0.29d
 
     cat << '_EOF_' > chainGsub
 #LOOP
 chainSome $(path1)
 #ENDLOOP
 '_EOF_'
 
     cat << '_EOF_' > chainSome
 (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=25000 stdin ../c.`basename $1`.psl)
 '_EOF_'
     chmod +x chainSome
 
     ls -1dS `pwd`/blastOut/fb?? > chain.lst
     gensub2 chain.lst single chainGsub chainSpec
     para create chainSpec
 
     # should run this on the mini-cluster or with my shove script
     # so you can limit the number of jobs starting to 3 or 4
     para try, push...
 
 # Completed: 347 of 347 jobs
 # CPU time in finished jobs:      11953s     199.21m     3.32h    0.14d  0.000 y
 # IO & Wait Time:                  4385s      73.09m     1.22h    0.05d  0.000 y
 # Average job time:                  47s       0.78m     0.01h    0.00d
 # Longest job:                     1500s      25.00m     0.42h    0.02d
 # Submission to last job:          2224s      37.07m     0.62h    0.03d
 
     exit
     # back to kksilo
     cd /cluster/data/dp3/bed/tblastn.dm1FB/blastOut
     for i in fb??
     do 
 	awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl
 	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
 	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
 	echo $i
     done
     cat u.*.psl m60* |  sort -T /tmp -k 14,14 -k 16,16n -k 17,17n  | uniq  > ../blastDm1FB.psl
 
     ssh hgwdev
     cd /cluster/data/dp3/bed/tblastn.dm1FB
 
     hgLoadPsl dp3 blastDm1FB.psl
 
 # End tblastn
 
 
 # FLYBASE ANNOTATIONS (DONE 1/12/05 angie)
     ssh kksilo
     mkdir /cluster/data/dp3/bed/flybase
     cd /cluster/data/dp3/bed/flybase
     foreach f (../../?{,?}/chr*.fa)
       set c = `echo $f:t:r | sed -e 's/^chr//'`
       wget ftp://flybase.net/genomes/Drosophila_pseudoobscura/current/gff/dpse-$c-r1.03.gff.gz
     end
     zcat *.gff.gz > flybase.gff3
     # What data sources are represented in this file?
     grep -v '^#' flybase.gff3 | awk '{print $2 "\t" $3;}' | sort | uniq -c
 #   9946 .       CDS
 #     17 .       chromosome
 #      2 .       chromosome_arm
 # 120844 .       exon
 #  12197 .       gene
 #     15 .       golden_path
 #   2650 .       golden_path_fragment
 #  30310 .       intron
 #   9946 .       mRNA
 #  12101 .       orthologous_region
 #   2905 .       scaffold
 #   1170 .       syntenic_region
 #  27838 blastn:na_dbEST.dpse    match
 #  34576 blastz  match
 #  18179 genewise        gene
 #  18179 genewise        mRNA
 #  67141 genewise        match
 #   8327 genewise        match_part
 #  17182 genscan gene
 #  17182 genscan mRNA
 #  46479 genscan match
 #   4847 genscan match_part
 #  18330 twinscan        gene
 #  18330 twinscan        mRNA
 #  45317 twinscan        match
 #   4603 twinscan        match_part
     # What keywords are defined in the 9th field?
     grep -v '^#' flybase.gff3 \
     | awk '{print $9;}' | perl -wpe 's/=[^;]+;/\n/g; s/=.*$//;' \
     | sort | uniq -c
 #  39402 Dbxref		[gene, mRNA, CDS, intron only]
 # 548613 ID		[everybody has this one.]
 #  76012 Name		[gene, intron only]
 # 412944 Parent		[gene, mRNA, CDS, exon, intron, match, match_part only]
 # 116103 object_oid	[gene, match only]
 #   2666 species	[chromosome*, golden_path* only]
 #  11493 to_name	[orthologous_region only]
 #  13271 to_species	[orthologous_region, syntenic_region only]
 
     # Supposedly this will be the stable standard, but for now I wrote 
     # a one-shot perl script:
     extractGenes.pl flybase.gff3
 
     # Get predicted proteins (for main annotations only)
     wget ftp://flybase.net/genomes/Drosophila_pseudoobscura/current/fasta/dpse-all-translation-r1.03.fasta.gz
     zcat dpse-all-translation-r1.03.fasta.gz \
     | perl -wpe 's/^(>\w+)-PA/$1-RA/' > flybasePep.fa
 
     ssh hgwdev
     cd /cluster/data/dp3/bed/flybase
     ldHgGene -gtf dp3 flyBaseGene flyBase.gtf
     hgPepPred dp3 generic flyBasePep flybasePep.fa
     ldHgGene -gtf dp3 twinscan twinscan.gtf
     ldHgGene -gtf dp3 genewise genewise.gtf
     ldHgGene -gtf dp3 fbGenscan genscan.gtf
     featureBits dp3 -enrichment fbGenscan genscan
 #fbGenscan 18.578%, genscan 18.793%, both 17.662%, cover 95.07%, enrich 5.06x
     # Their genscan and ours are similar enough that we should drop one.  
     # In featureBits -enrichment flyBaseGene, ours does a hair better so 
     # drop FlyBase's.
     hgsql dp3 -e 'drop table fbGenscan'
     # Cross-referencing info for both coding and noncoding:
     hgsql dp3 < $HOME/kent/src/hg/lib/flyBase2004Xref.sql
     hgsql dp3 -e 'load data local infile "flyBase2004Xref.tab" \
       into table flyBase2004Xref'
 
 
 # SWAP CHAINS FROM DM2, BUILD NETS ETC. (DONE 5/23/05 angie) 
     ssh kksilo
     mkdir /cluster/data/dp3/bed/blastz.dm2.swap
     cd /cluster/data/dp3/bed/blastz.dm2.swap
     doBlastzChainNet.pl -swap /cluster/data/dm2/bed/blastz.dp3/DEF \
       -workhorse kkr7u00 >& do.log &
     tail -f do.log
     # Add {chain,net}Dm2 to trackDb.ra if necessary.
 
 
 # MAKE Drosophila Proteins track  (DONE 06-30-05 braney)
     ssh kk
     mkdir -p /cluster/data/dp3/bed/tblastn.dm2FB
     cd /cluster/data/dp3/bed/tblastn.dm2FB
     ls -1S /iscratch/i/dp3/blastDb/*.nsq | sed "s/\.nsq//" > target.lst
     mkdir fbfa
     # calculate a reasonable number of jobs (didn't actually do this for dp3)
     calc `wc /cluster/data/dm2/bed/blat.dm2FB/dm2FB.psl | awk "{print \\\$1}"`/\(164630/`wc target.lst | awk "{print \\\$1}"`\)
 # 18929/(164630/417) = 47.946261
     split -l 50 /cluster/data/dm2/bed/blat.dm2FB/dm2FB.psl fbfa/fb
     cd fbfa
     for i in *; do pslxToFa $i $i.fa; rm $i; done
     cd ..
     ls -1S fbfa/*.fa > fb.lst
     rm -rf /cluster/bluearc/dp3/bed/tblastn.dm2FB/blastOut
     mkdir -p /cluster/bluearc/dp3/bed/tblastn.dm2FB/blastOut
     ln -s /cluster/bluearc/dp3/bed/tblastn.dm2FB/blastOut
     for i in `cat fb.lst`; do  mkdir blastOut/`basename $i .fa`; done
     tcsh
     cat << '_EOF_' > blastGsub
 #LOOP
 blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
 #ENDLOOP
 '_EOF_'
     cat << '_EOF_' > blastSome
 #!/bin/sh
 BLASTMAT=/iscratch/i/blast/data
 export BLASTMAT
 g=`basename $2`
 f=/tmp/`basename $3`.$g
 for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
 do
 if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
 then
         mv $f.8 $f.1
         break;
 fi
 done
 if test -f  $f.1
 then
 if /cluster/bin/i386/blastToPsl $f.1 $f.2
 then
         liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/dp3/jkStuff/subChr.lft warn $f.2
         liftUp -nosort -type=".psl" -nohead $f.4 /cluster/data/dp3/jkStuff/liftAll.lft warn $f.3
         liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/dm2/bed/blat.dm2FB/protein.lft warn $f.4
         mv $3.tmp $3
         rm -f $f.1 $f.2 $f.3
         exit 0
     fi
 fi
 rm -f $f.1 $f.2 $3.tmp $f.3 $f.8
 exit 1
 '_EOF_'
 
     chmod +x blastSome
     gensub2 target.lst fb.lst blastGsub blastSpec
 
     ssh kk
     cd /cluster/data/dp3/bed/tblastn.dm2FB
     para create blastSpec
     para push
 # Completed: 143641 of 143641 jobs
 # CPU time in finished jobs:    1055248s   17587.47m   293.12h   12.21d  0.033 y
 # IO & Wait Time:                546039s    9100.65m   151.68h    6.32d  0.017 y
 # Average job time:                  11s       0.19m     0.00h    0.00d
 # Longest finished job:             107s       1.78m     0.03h    0.00d
 # Submission to last job:         20041s     334.02m     5.57h    0.23d
 
     ssh kki
     cd /cluster/data/dp3/bed/tblastn.dm2FB
     tcsh
 
     cat << '_EOF_' > chainGsub
 #LOOP
 chainSome $(path1)
 #ENDLOOP
 '_EOF_'
 
     cat << '_EOF_' > chainSome
 (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=20000 stdin ../c.`basename $1`.psl)
 '_EOF_'
     chmod +x chainSome
 
     ls -1dS `pwd`/blastOut/fb?? > chain.lst
     gensub2 chain.lst single chainGsub chainSpec
 
     para create chainSpec
     para push
 
 # Completed: 379 of 379 jobs
 # CPU time in finished jobs:      14757s     245.95m     4.10h    0.17d  0.000 y
 # IO & Wait Time:                  9660s     161.00m     2.68h    0.11d  0.000 y
 # Average job time:                  64s       1.07m     0.02h    0.00d
 # Longest finished job:            2472s      41.20m     0.69h    0.03d
 # Submission to last job:          4737s      78.95m     1.32h    0.05d
 
     ssh kkstore01
     cd /cluster/data/dp3/bed/tblastn.dm2FB/blastOut
     for i in fb??
     do 
 	awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl
 	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
 	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
 	echo $i
     done
 
     sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/dp3/bed/tblastn.dm2FB/blastDm2FB.psl
     cd ..
 
     ssh hgwdev
     cd /cluster/data/dp3/bed/tblastn.dm2FB
     hgLoadPsl dp3 blastDm2FB.psl
     exit
 
     # back to kksilo
     rm -rf blastOut
 
 # End tblastn
 
 
 # GENEID PREDICTIONS FROM IMIM (DONE 7/26/05 angie)
     ssh hgwdev
     mkdir /cluster/data/dp3/bed/geneid
     cd /cluster/data/dp3/bed/geneid
     foreach chr (`awk '{print $1;}' ../../chrom.sizes`)
       wget http://genome.imim.es/genepredictions/D.pseudoobscura/golden_path_200411/geneidv1.2/$chr.gtf
     end
     ldHgGene -gtf -genePredExt dp3 geneid *.gtf
 
 
 ###########################################################################
 # SWAP/CHAIN/NET DM3 (DONE 6/7/07 angie)
     ssh kkstore04
     mkdir /cluster/data/dp3/bed/blastz.dm3.swap
     cd /cluster/data/dp3/bed/blastz.dm3.swap
     doBlastzChainNet.pl -swap /cluster/data/dm3/bed/blastz.dp3/DEF >& do.log &
     tail -f do.log
     ln -s blastz.dm3.swap /cluster/data/dp3/bed/blastz.dm3
+###########################################################################
+# LASTZ D. pseudoobscura Dp3 vs. D. persimilis DroPer1 (DONE - 2022-11-21 - Gerardo)
+
+# should be able to run this from anywhere, this time it was run from:
+    cd kent/src/hg/utils/automation
+
+  time (~/kent/src/hg/utils/automation/pairLastz.sh \
+	dp3 droPer1 other other) \
+	   > dp3.droPer1_20221121.log 2>&1 &
+  # check the total time
+grep -w real  dp3.droPer1_20221121.log  | tail -1 | sed -e 's/^/    # /;'
+    # real      419m30.782s
 
+    # this command outputs this makeDoc text:
+
+    cat /hive/data/genomes/dp3/bed/lastzDroPer1.2022-11-21/makeDoc.txt
+###########################################################################
+# LASTZ D. pseudoobscura Dp3 vs. D. persimilis DroPer1
+#    (DONE - 2022-11-21 - Gerardo)
+
+    mkdir /hive/data/genomes/dp3/bed/lastzDroPer1.2022-11-21
+    cd /hive/data/genomes/dp3/bed/lastzDroPer1.2022-11-21
+
+    printf '# D. persimilis DroPer1 vs. D. pseudoobscura Dp3
+BLASTZ=/cluster/bin/penn/lastz-distrib-1.04.03/bin/lastz
+
+# TARGET: D. pseudoobscura  dp3
+SEQ1_DIR=/hive/data/genomes/dp3/dp3.2bit
+SEQ1_LEN=/hive/data/genomes/dp3/chrom.sizes
+SEQ1_CHUNK=20000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=40
+
+# QUERY: D. persimilis  droPer1
+SEQ2_DIR=/hive/data/genomes/droPer1/droPer1.2bit
+SEQ2_LEN=/hive/data/genomes/droPer1/chrom.sizes
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=100
+
+BASE=/hive/data/genomes/dp3/bed/lastzDroPer1.2022-11-21
+TMPDIR=/dev/shm
+
+' > DEF
+
+    time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl  -verbose=2 `pwd`/DEF -syntenicNet \
+        -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+        -chainMinScore=5000 -chainLinearGap=loose) > do.log 2>&1
+    grep -w real do.log | sed -e 's/^/    # /;'
+    # real	328m6.727s
+
+    sed -e 's/^/    # /;' fb.dp3.chainDroPer1Link.txt
+    # 129266273 bases of 134584124 (96.049%) in intersection
+    sed -e 's/^/    # /;' fb.dp3.chainSynDroPer1Link.txt
+    # 114329591 bases of 134584124 (84.950%) in intersection
+
+    time (~/kent/src/hg/utils/automation/doRecipBest.pl  -load -workhorse=hgwdev -buildDir=`pwd` \
+       \
+       \
+        dp3 droPer1) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	3m59.362s
+
+    sed -e 's/^/    # /;' fb.dp3.chainRBest.DroPer1.txt
+    # 123131513 bases of 134584124 (91.490%) in intersection
+
+    ### and for the swap
+
+    cd /hive/data/genomes/droPer1/bed/blastz.dp3.swap
+
+   time (~/kent/src/hg/utils/automation/doBlastzChainNet.pl  -swap -verbose=2 \
+    /hive/data/genomes/dp3/bed/lastzDroPer1.2022-11-21/DEF -swapDir=`pwd` \
+  -syntenicNet -workhorse=hgwdev -smallClusterHub=hgwdev -bigClusterHub=ku \
+    -chainMinScore=5000 -chainLinearGap=loose) > swap.log 2>&1
+
+    grep -w real swap.log | sed -e 's/^/    # /;'
+    # real	71m46.993s
+
+    sed -e 's/^/    # /;' fb.droPer1.chainDp3Link.txt
+    # 165484841 bases of 175583556 (94.248%) in intersection
+    sed -e 's/^/    # /;' fb.droPer1.chainSynDp3Link.txt
+    # 123700833 bases of 175583556 (70.451%) in intersection
+\    time (~/kent/src/hg/utils/automation/doRecipBest.pl  -load -workhorse=hgwdev -buildDir=`pwd` \
+    \
+    \
+   droPer1 dp3) > rbest.log 2>&1
+
+    grep -w real rbest.log | sed -e 's/^/    # /;'
+    # real	15m37.284s
+
+    sed -e 's/^/    # /;' fb.droPer1.chainRBest.Dp3.txt
+    # 124103933 bases of 175583556 (70.681%) in intersection