src/hg/makeDb/doc/loxAfr1.txt 1.8

1.8 2009/07/21 21:01:44 markd
added transmap update blurb
Index: src/hg/makeDb/doc/loxAfr1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/loxAfr1.txt,v
retrieving revision 1.7
retrieving revision 1.8
diff -b -B -U 1000000 -r1.7 -r1.8
--- src/hg/makeDb/doc/loxAfr1.txt	4 Dec 2008 00:59:07 -0000	1.7
+++ src/hg/makeDb/doc/loxAfr1.txt	21 Jul 2009 21:01:44 -0000	1.8
@@ -1,1048 +1,1057 @@
 # for emacs: -*- mode: sh; -*-
 
 
 # Elephant May 2005 Broad Assembly
 #
 
 
 # DOWNLOAD SEQUENCE 
     ssh kkstore01
     mkdir /cluster/store9/loxAfr1
     cd /cluster/data
     ln -s /cluster/store9/loxAfr1 loxAfr1
     cd /cluster/data/loxAfr1
     mkdir jkStuff bed
     mkdir broad
     cd broad
     # ftp'ed with password from Broad
     # -rw-rw-r--  1 braney protein   81083988 May 20 20:08 assembly.agp
     # -rw-rw-r--  1 braney protein  799096831 May 20 20:30 assembly.bases.gz
     # -rw-rw-r--  1 braney protein 1340880552 May 20 21:07 assembly.quals.gz
     # -rw-rw-r--  1 braney protein  408350312 May 21 09:05 unplaced.fasta.gz
     # -rw-rw-r--  1 braney protein 1258465506 May 21 09:40 unplaced.qual.gz
 
     gunzip assembly.bases.gz 
     faSize assembly.bases
 # 2295548473 bases (0 N's 2295548473 real 2295548473 upper 0 lower) in 918949 sequences in 1 files
 # Total size: mean 2498.0 sd 2280.1 min 600 (contig_719849) max 181008 (contig_82) median 1929
 # N count: mean 0.0 sd 0.0
 # U count: mean 2498.0 sd 2280.1
 # L count: mean 0.0 sd 0.0
 
     ssh kolossus
     cd /cluster/data/loxAfr1/broad
     /cluster/bin/x86_64/agpAllToFaFile assembly.agp assembly.bases ../loxAfr1.fa
     cd ..
     faSize loxAfr1.fa
 # 3707398801 bases (1411850328 N's 2295548473 real 2295548473 upper 0 lower) in 233134 sequences in 1 files
 # Total size: mean 15902.4 sd 26684.7 min 601 (scaffold_233133) max 615624 (scaffold_0) median 5715
 # N count: mean 6056.0 sd 12052.4
 # U count: mean 9846.5 sd 16481.1
 # L count: mean 0.0 sd 0.0
 
     /cluster/bin/scripts/agpToLift < assembly.agp > ../jkStuff/assembly.lft
 
 # PARTITION SCAFFOLDS FOR REPEATMASKER RUN 
     # glom the tiny scaffolds up into ~50k collections 
     ssh kkstore01
     cd /cluster/data/loxAfr1
     mkdir chunks50k
     faSplit about broad/assembly.bases 50000 chunks50k/chunk_
     cd chunks50k
     for i in 0 1 2 3 4 5 6 7 8 9; do mkdir $i; mv *$i.fa $i; done
 
 # RUN REPEAT MASKER 
     # make the run directory, output directory, and job list
     ssh kkstore01
     cd /cluster/data/loxAfr1
     tcsh
     cat << '_EOF_' > jkStuff/RMElephant
 #!/bin/csh -fe
 
 cd $1
 /bin/mkdir -p /tmp/loxAfr1/$2
 /bin/cp $3 /tmp/loxAfr1/$2/
 pushd /tmp/loxAfr1/$2
 /cluster/bluearc/RepeatMasker/RepeatMasker -s -species elephant $2
 popd
 /bin/cp /tmp/loxAfr1/$2/$2.out ./
 /bin/rm -fr /tmp/loxAfr1/$2/*
 /bin/rmdir --ignore-fail-on-non-empty /tmp/loxAfr1/$2
 /bin/rmdir --ignore-fail-on-non-empty /tmp/loxAfr1
 '_EOF_'
     # << this line makes emacs coloring happy
     chmod +x jkStuff/RMElephant
 
     mkdir RMRun RMOut
     cd RMOut
     mkdir 0 1 2 3 4 5 6 7 8 9
     cd ../chunks50k
     for i in */*.fa
     do
 	e=`dirname $i`
 	d="/cluster/data/loxAfr1"
 	c=`basename $i`
     echo "../jkStuff/RMElephant $d/RMOut/$e $c  {check in line+ $d/chunks50k/$i} {check out line+ $d/RMOut/$e/$c.out}"
     done > ../RMRun/RMJobs
 
     # do the run
     ssh kk
     cd /cluster/data/loxAfr1/RMRun
     para create RMJobs
     para try, check, push, check,...
 # Completed: 44022 of 44022 jobs
 # CPU time in finished jobs:   20439985s  340666.42m  5677.77h  236.57d  0.648 y
 # IO & Wait Time:                171349s    2855.81m    47.60h    1.98d  0.005 y
 # Average job time:                 468s       7.80m     0.13h    0.01d
 # Longest finished job:            2125s      35.42m     0.59h    0.02d
 # Submission to last job:         44073s     734.55m    12.24h    0.51d
 
     # Lift up the split-scaffold .out's to scaffold .out's
     ssh kkstore01
     cd /cluster/data/loxAfr1/RMOut
     for i in 0 1 2 3 4 5 6 7 8 9; do echo $i; liftUp -nohead $i.out ../jkStuff/assembly.lft warn $i/*.fa.out>/dev/null; done
     head -3 0.out > loxAfr1.out
     for i in 0 1 2 3 4 5 6 7 8 9; do tail +4 $i.out; done >> loxAfr1.out
     # Load the .out files into the database with:
     ssh hgwdev
     hgLoadOut loxAfr1 /cluster/data/loxAfr1/RMOut/loxAfr1.out
     # Lots of strange perc messages
     # Loading up table loxAfr1_rmsk
     # note: 6 records dropped due to repStart > repEnd
 
     hgsql loxAfr1 -e 'rename table loxAfr1_rmsk to rmsk'
     # Fix up the indices too:
     hgsql loxAfr1 -e 'drop index bin       on rmsk;  drop index genoStart on rmsk;  drop index genoEnd   on rmsk; \
                   create index bin       on rmsk (genoName(16), bin); \
                   create index genoStart on rmsk (genoName(16), genoStart); \
                   create index genoEnd   on rmsk (genoName(16), genoEnd);'
 
 
 # EXTRACTING GAP INFO FROM BLOCKS OF NS (DONE 11/5/04 angie)
     ssh kkstore01
     mkdir /cluster/data/loxAfr1/bed/fakeAgp
     cd /cluster/data/loxAfr1/bed/fakeAgp
     faGapSizes ../../downloads/scaffolds.fasta \
         -niceSizes=5,10,20,25,30,40,50,100,250,500,1000,10000,100000
     # Wow, all block of N's seem to be exactly 100bp long.  
     # hgFakeAgp's default -minContigGap of 25 will be fine.  
     hgFakeAgp ../../downloads/scaffolds.fasta fake.agp
     ssh hgwdev
     hgLoadGap -unsplit loxAfr1 /cluster/data/loxAfr1/bed/fakeAgp/fake.agp
 
 
 # SIMPLE REPEATS (TRF) 
     ssh kkstore01
     mkdir /cluster/data/loxAfr1/bed/simpleRepeat
     cd /cluster/data/loxAfr1/bed/simpleRepeat
     nice trfBig -trf=/cluster/bin/i386/trf ../../loxAfr1.fa \
       /dev/null -bedAt=simpleRepeat.bed -tempDir=/tmp \
     |& egrep -v '^(Removed|Tandem|Copyright|Loading|Allocating|Initializing|Computing|Scanning|Freeing)' \
     > trf.log &
     # check on this with
     tail -f trf.log
 
     # Load this into the database as so
     ssh hgwdev
     hgLoadBed loxAfr1 simpleRepeat /cluster/data/loxAfr1/bed/simpleRepeat/simpleRepeat.bed -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
 
 
 # FILTER SIMPLE REPEATS (TRF) INTO MASK 
     # make a filtered version of the trf output: 
     # keep trf's with period <= 12:
     ssh kkstore01
     cd /cluster/data/loxAfr1/bed/simpleRepeat
     awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed
 
 
 # MASK FA USING REPEATMASKER AND FILTERED TRF FILES 
     ssh kolossus
     cd /cluster/data/loxAfr1
     /cluster/bin/x86_64/maskOutFa -soft loxAfr1.fa bed/simpleRepeat/trfMask.bed loxAfr1.simple.fa
     /cluster/bin/x86_64/maskOutFa -softAdd loxAfr1.simple.fa RMOut/loxAfr1.out loxAfr1.masked.fa
     mv loxAfr1.fa loxAfr1.unmasked.fa
     # Now clean up the unmasked split scaffolds to avoid confusion later.
     rm -r chunks500k scaffoldsSplit.fa jkStuff/scaffoldsSplit.lft
 
 
 # CREATING DATABASE
     # Create the database.
     ssh hgwdev
     # Make sure there is at least 5 gig free for the database
     df -h /var/lib/mysql
 Filesystem            Size  Used Avail Use% Mounted on
 # /dev/sdc1             1.8T  915G  746G  56% /var/lib/mysql
     hgsql '' -e 'create database loxAfr1'
 
 # STORE SEQUENCE AND ASSEMBLY INFORMATION 
     # Translate to 2bit
     ssh kkstore01
     cd /cluster/data/loxAfr1
     /cluster/bin/x86_64/faToTwoBit loxAfr1.masked.fa loxAfr1.2bit
     # Make chromInfo.tab.
     mkdir bed/chromInfo
     twoBitInfo loxAfr1.2bit stdout | awk '{printf "%s\t%s\t/gbdb/loxAfr1/loxAfr1.2bit\n",$1,$2;}' \
 	| sort > bed/chromInfo/chromInfo.tab
 
     # Make symbolic a link from /gbdb/loxAfr1 to the 2bit.
     ssh hgwdev
     mkdir -p /gbdb/loxAfr1
     ln -s /cluster/data/loxAfr1/loxAfr1.2bit /gbdb/loxAfr1/
     # Load chromInfo table.
     hgsql loxAfr1 < $HOME/kent/src/hg/lib/chromInfo.sql
     hgsql loxAfr1 -e 'load data local infile "/cluster/data/loxAfr1/bed/chromInfo/chromInfo.tab" into table chromInfo'
     # Make chrom.sizes from chromInfo contents and check scaffold count.
     hgsql loxAfr1 -N -e 'select chrom,size from chromInfo' > /cluster/data/loxAfr1/chrom.sizes
     wc -l /cluster/data/loxAfr1/chrom.sizes
 # 233134 /cluster/data/loxAfr1/chrom.sizes
 
 # CREATING GRP TABLE FOR TRACK GROUPING 
     # Copy all the data from the table "grp" 
     # in an existing database to the new database
     ssh hgwdev
     hgsql loxAfr1 -e 'create table grp (PRIMARY KEY(NAME)) select * from hg17.grp'
 
 
 # MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE (DONE 11/4/04 angie)
     # Warning: genome and organism fields must correspond
     # with defaultDb values
     echo 'INSERT INTO dbDb \
         (name, description, nibPath, organism, defaultPos, active, orderKey, genome, scientificName, \
              htmlPath, hgNearOk, hgPbOk, sourceName) values \
         ("loxAfr1", "May. 2005", "/gbdb/loxAfr1", "Elephant", \
              "", 1, 57,  "Elephant", "Loxodonta africana", "/gbdb/loxAfr1/html/description.html", \
              0, 0, "Broad May 2005");' \
       | hgsql -h genome-testdb hgcentraltest
     echo 'INSERT INTO defaultDb (genome, name) values ("Elephant", "loxAfr1");' \
       | hgsql -h genome-testdb hgcentraltest
 
     # Make trackDb table so browser knows what tracks to expect:
     ssh hgwdev
     cd ~/kent/src/hg/makeDb/trackDb
     cvs up -d -P
 
     # Edit trackDb/makefile to add loxAfr1 to the DBS variable.
     mkdir -p rabbit/loxAfr1
     # Create a simple rabbit/loxAfr1/description.html file.
     cvs add drosophila/loxAfr1
     cvs add drosophila/loxAfr1/description.html
     make update DBS=loxAfr1 ZOO_DBS=
 
     # go public on genome-test
     cvs ci makefile
     cvs ci rabbit/loxAfr1
     mkdir /gbdb/loxAfr1/html
     # in a clean, updated tree's kent/src/hg/makeDb/trackDb:
     make alpha
 
 
 # PUT SEQUENCE ON /ISCRATCH FOR BLASTZ 
     # First, agglomerate small scaffolds into chunks of ~100k median 
     # (many scaffolds are larger than that) so we don't have too many 
     # files for one dir, but keep a reasonably low job run time:
     ssh kkstore01
     cd /cluster/data/loxAfr1
     mkdir chunksUnsplit
     faSplit about scaffolds.fa 100000 chunksUnsplit/chunk_
     ssh kkr1u00
     mkdir /iscratch/i/loxAfr1
     cp -pR /cluster/data/loxAfr1/chunksUnsplit /iscratch/i/loxAfr1/
     cp -p /cluster/data/loxAfr1/loxAfr1.2bit /iscratch/i/loxAfr1/
     iSync
 
 
 # PRODUCING GENSCAN PREDICTIONS (DONE 11/4/04 angie)
     ssh kkstore01
     # Make hard-masked scaffolds and split up for processing:
     cd /cluster/data/loxAfr1
     maskOutFa scaffolds.fa hard scaffolds.fa.masked
     mkdir chunksUnsplitMasked
     faSplit about scaffolds.fa.masked 100000 chunksUnsplitMasked/chunk_
     mkdir /cluster/data/loxAfr1/bed/genscan
     cd /cluster/data/loxAfr1/bed/genscan
     # Check out hg3rdParty/genscanlinux to get latest genscan:
     cvs co hg3rdParty/genscanlinux
     # Make 3 subdirectories for genscan to put their output files in
     mkdir gtf pep subopt
     ls -1S ../../chunksUnsplitMasked/chunk*.fa > chunks.list
     cat << '_EOF_' > gsub
 #LOOP
 gsBig {check in line+ $(path1)} {check out line gtf/$(root1).gtf} -trans={check out line pep/$(root1).pep} -subopt={check out line subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000
 #ENDLOOP
 '_EOF_'
     # << this line keeps emacs coloring happy
     gensub2 chunks.list single gsub jobList
     ssh kki
     cd /cluster/data/loxAfr1/bed/genscan
     para create jobList
     para try, check, push, check, ...
 #Completed: 463 of 463 jobs
 #Average job time:                  12s       0.21m     0.00h    0.00d
 #Longest job:                      317s       5.28m     0.09h    0.00d
 #Submission to last job:           445s       7.42m     0.12h    0.01d
 
     # If there are crashes, diagnose with "para problems".  
     # If a job crashes due to genscan running out of memory, re-run it 
     # manually with "-window=1200000" instead of "-window=2400000".
     
     # Concatenate scaffold-level results:
     ssh kkstore01
     cd /cluster/data/loxAfr1/bed/genscan
     cat gtf/*.gtf > genscan.gtf
     cat subopt/*.bed > genscanSubopt.bed
     cat pep/*.pep > genscan.pep
     # Clean up
     rm -r /cluster/data/loxAfr1/chunksUnsplitMasked
 
     # Load into the database as so:
     ssh hgwdev
     cd /cluster/data/loxAfr1/bed/genscan
     # Reloaded without -genePredExt 1/6/05:
     ldHgGene -gtf loxAfr1 genscan genscan.gtf
     hgPepPred loxAfr1 generic genscanPep genscan.pep
     hgLoadBed loxAfr1 genscanSubopt genscanSubopt.bed
 
 
 # MAKE DOWNLOADABLE FILES (DONE 11/4/04 angie)
 # RECREATE BIGZIPS DOWNLOADS AND README FILE (DONE, 2006-05-05, hartera)
 # ADDED LIFTOVER CHAIN FILES FOR DOWNLOAD AND ADDED md5sum.txt FILE FOR 
 # THESE LIFTOVER DOWNLOADS AND CREATED CORRECT md5sum.txt FOR vsMm7 
 # DOWNLOADS (DONE, 2006-05-23, hartera)
     ssh kkstore01
     mkdir /cluster/data/loxAfr1/zips
     cd /cluster/data/loxAfr1
     zip -j zips/scaffoldOut.zip RMOut/scaffolds.fa.out
     zip -j zips/scaffoldFa.zip scaffolds.fa
     zip -j zips/scaffoldFaMasked.zip scaffolds.fa.masked
     zip -j zips/scaffoldTrf.zip bed/simpleRepeat/trfMask.bed
     foreach f (zips/*.zip)
       echo $f
       unzip -t $f | tail -1
     end
     ssh hgwdev
     mkdir /usr/local/apache/htdocs/goldenPath/loxAfr1
     cd /usr/local/apache/htdocs/goldenPath/loxAfr1
     mkdir bigZips database
     # Create README.txt files in bigZips/ and database/ to explain the files.
     cd bigZips
     cp -p /cluster/data/loxAfr1/zips/*.zip .
     md5sum *.zip > md5sum.txt
     
     # Add more bigZips downloads. Some of the above downloads don't exist 
     # anymore in bigZips in /usr/local/apache/htdocs/goldenPath/... on hgwdev
     # (2006-05-05, hartera)
     ssh kkstore04
     mkdir /cluster/data/loxAfr1/bigZips
     cd /cluster/data/loxAfr1
     # soft-masked scaffolds sequences
     cp -p loxAfr1.masked.fa.gz ./bigZips/scaffoldSoftMask.fa.gz
     # assembly agp file
     cp -p ./broad/assembly.agp ./bigZips/
     # Simple Repeats
     cp -p ./bed/simpleRepeat/trfMask.bed ./bigZips/trf.bed
     # RepeatMasker output
     cp -p ./RMOut/loxAfr1.out.gz ./bigZips/rmsk.out
     # unmasked scaffolds sequences
     cp -p loxAfr1.unmasked.fa.gz ./bigZips/scaffold.fa.gz
     cd bigZips
     gzip assembly.agp
     gzip trf.bed
     gzip rmsk.out
     # check integrity of files
     foreach f (*.gz)
       echo $f
       gunzip -t $f | tail -1
     end
     md5sum *.gz > md5sum.txt
  
     # link the *.gz and *.txt files to hgwdev:/usr/local/apache/....
     ssh hgwdev
     set gp=/usr/local/apache/htdocs/goldenPath/loxAfr1
     rm -r $gp/bigZips
     mkdir -p $gp/bigZips
     ln -s /cluster/data/loxAfr1/bigZips/{*.gz,*.txt} $gp/bigZips
     # copy over README.txt and edit for bigZips
     # Move over loxAfr1ToMm7.over.chain.gz from hgwdevold and then
     # add this and loxAfr1ToHg18.over.chain.gz to goldenPath liftOver dir.
     # Add liftOver md5sum.txt. loxAfr1ToMm7.over.chain.gz is in the md5sum.txt
     # for the vsMm7 directory which is wrong so recreate this md5sum.txt file
     # (hartera, 2006-05-23)
     ssh hgwdev
     set gp=/usr/local/apache/htdocs/goldenPath/loxAfr1
     # copy over from hgwdevold
     cd /cluster/data/loxAfr1/bed/liftOver
     scp \
     hgwdevold:/usr/local/apache/htdocs/goldenPath/loxAfr1/liftOver/loxAfr1ToMm7.over.chain.gz .
     cd $gp/liftOver
     ln -s /cluster/data/loxAfr1/bed/liftOver/*.gz $gp/liftOver
     # create md5sum.txt
     md5sum *.gz > md5sum.txt
     # recreate md5sum.txt for vsMm7 files
     cd $gp/vsMm7
     rm md5sum.txt
     md5sum *.gz > md5sum.txt
     # check that vsMm7 README.txt is correct.
 
 # QA NOTE on bigZips above (10/26/07 rhead): a user found that rmsk.out.gz was
 # corrupted. It had been gzipped twice.  Gunzipped the file
 # /cluster/store9/loxAfr1/bigZips/rmsk.out.gz, then renamed it with the .gz
 # extension.  Also updated the md5sum.txt file.
  
 # SWAP DM1-DROANA1 BLASTZ (DONE 11/4/04 angie)
     ssh kkstore01
     mkdir /cluster/data/loxAfr1/bed/blastz.dm1.swap.2004-11-03
     ln -s blastz.dm1.swap.2004-11-03 /cluster/data/loxAfr1/bed/blastz.dm1
     cd /cluster/data/loxAfr1/bed/blastz.dm1
     set aliDir = /cluster/data/dm1/bed/blastz.loxAfr1
     cp $aliDir/S1.len S2.len
     cp $aliDir/S2.len S1.len
     # With 11k scaffolds, we don't want a directory with one file per 
     # scaffold.  So just make one .axt with everything -- not too huge 
     # anyway, given these little insect genomes.
     cat $aliDir/axtChrom/chr*.axt \
     | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
     | axtSort stdin dm1.axt
     du -sh $aliDir/axtChrom dm1.axt
 #389M    /cluster/data/dm1/bed/blastz.loxAfr1/axtChrom
 #389M    dm1.axt
 
 
 # CHAIN MELANOGASTER BLASTZ (DONE 11/4/04 angie)
     # Run axtChain on kolossus (one big dm1.axt input)
     ssh kolossus
     mkdir /cluster/data/loxAfr1/bed/blastz.dm1/axtChain
     cd /cluster/data/loxAfr1/bed/blastz.dm1/axtChain
     axtChain -verbose=0 ../dm1.axt /cluster/data/loxAfr1/loxAfr1.2bit \
       /cluster/data/dm1/nib stdout \
     | chainAntiRepeat /cluster/data/loxAfr1/loxAfr1.2bit \
       /cluster/data/dm1/nib stdin stdout \
     | chainMergeSort stdin > all.chain
     # Load chains into database
     ssh hgwdev
     cd /cluster/data/loxAfr1/bed/blastz.dm1/axtChain
     hgLoadChain -tIndex loxAfr1 chainDm1 all.chain
 
 
 # NET MELANOGASTER BLASTZ (DONE 11/4/04 angie)
     ssh kkstore01
     cd /cluster/data/loxAfr1/bed/blastz.dm1/axtChain
     chainPreNet all.chain ../S1.len ../S2.len stdout \
     | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
     | netSyntenic stdin noClass.net
 
     # Add classification info using db tables:
     ssh hgwdev
     cd /cluster/data/loxAfr1/bed/blastz.dm1/axtChain
     netClass -noAr noClass.net loxAfr1 dm1 melanogaster.net \
     |& g -v "table gap doesn't exist"
 
     # Make a 'syntenic' subset:
     ssh kkstore01
     cd /cluster/data/loxAfr1/bed/blastz.dm1/axtChain
     rm noClass.net
     netFilter -syn melanogaster.net > melanogasterSyn.net
 
     # Load the nets into database 
     ssh hgwdev
     cd /cluster/data/loxAfr1/bed/blastz.dm1/axtChain
     netFilter -minGap=10 melanogaster.net |  hgLoadNet loxAfr1 netDm1 stdin
     netFilter -minGap=10 melanogasterSyn.net \
     | hgLoadNet loxAfr1 netSyntenyDm1 stdin
 
 
 # MAKE AXTNET (DONE 11/4/04 angie)
     ssh kkstore01
     cd /cluster/data/loxAfr1/bed/blastz.dm1/axtChain
     netToAxt melanogaster.net all.chain /cluster/data/loxAfr1/loxAfr1.2bit \
         /cluster/data/dm1/nib stdout \
       | axtSort stdin melanogasterNet.axt
 
 
 # MAKE VSDM1 DOWNLOADABLES (DONE 11/4/04 angie)
     ssh kkstore01
     cd /cluster/data/loxAfr1/bed/blastz.dm1/axtChain
     nice gzip *.{chain,net,axt}
     ssh hgwdev
     mkdir /usr/local/apache/htdocs/goldenPath/loxAfr1/vsDm1
     cd /usr/local/apache/htdocs/goldenPath/loxAfr1/vsDm1
     cp -p /cluster/data/loxAfr1/bed/blastz.dm1/axtChain/all.chain.gz \
       melanogaster.chain.gz
     cp -p /cluster/data/loxAfr1/bed/blastz.dm1/axtChain/melanogaster.net.gz .
     cp -p /cluster/data/loxAfr1/bed/blastz.dm1/axtChain/melanogasterNet.axt.gz .
     # Make a README.txt which explains the files & formats.
     md5sum *.gz */*.gz > md5sum.txt
 
 
 # MAKE 11.OOC FILE FOR BLAT (DONE 11/4/04 angie)
     # Use -repMatch=100 (based on size -- for human we use 1024, and 
     # fly size is ~4.4% of human judging by gapless dm1 genome size from 
     # featureBits -- we would use 45, but bump that up a bit to be more 
     # conservative).
     ssh kkr1u00
     mkdir /cluster/bluearc/loxAfr1
     blat /cluster/data/loxAfr1/loxAfr1.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=/cluster/bluearc/loxAfr1/11.ooc -repMatch=100
 #Wrote 9721 overused 11-mers to /cluster/bluearc/loxAfr1/11.ooc
     cp -p /cluster/bluearc/loxAfr1/*.ooc /iscratch/i/loxAfr1/
     iSync
 
 
 # GET GENBANK mRNA AND EST COUNTS 
     # Go to the latest GenBank full release dir and get an idea of how
     # many mRNAs and ESTs there are to align.
     ssh eieio
     cd /cluster/data/genbank/data/processed/genbank.144.0/full
     awk '$4 == "Elephant" {print $4 " " $5;}' mrna.gbidx | sort | uniq -c
 #      9 Drosophila ananassae
 #      1 Drosophila mojavensis
 #     33 Drosophila virilis
     # Wow, questionable whether we should have a native mRNA track here.
     awk '$4 == "Drosophila" {print $4 " " $5;}' est*.gbidx | sort | uniq -c
 # 382439 Drosophila melanogaster
 #   4105 Drosophila simulans
 #    779 Drosophila yakuba
     # And a native EST track isn't even a possibility for the new flies 
     # at this point!  
 
 
 # AUTO UPDATE GENBANK MRNA RUN (DONE 11/16/04 angie)
     ssh hgwdev
 
     # Update genbank config and source in CVS:
     cd ~/kent/src/hg/makeDb/genbank
     cvsup .
 
     # Edit etc/genbank.conf and add these lines (note scaffold-browser settings):
 # loxAfr1 (D. ananassae)
 loxAfr1.genome = /iscratch/i/loxAfr1/loxAfr1.2bit
 loxAfr1.mondoTwoBitParts = 1000
 loxAfr1.lift = no
 loxAfr1.refseq.mrna.native.load = no
 loxAfr1.refseq.mrna.xeno.load = yes
 loxAfr1.refseq.mrna.xeno.pslReps = -minCover=0.15 -minAli=0.75 -nearTop=0.005
 loxAfr1.genbank.mrna.xeno.load = yes
 # GenBank has no D. ananassae ESTs at this point... that may change.
 loxAfr1.genbank.est.native.load = no
 loxAfr1.genbank.est.xeno.load = no
 loxAfr1.downloadDir = loxAfr1
 loxAfr1.perChromTables = no
 
     cvs ci etc/genbank.conf
     # Since D. ananassae is a new species for us, edit src/lib/gbGenome.c.  
     # Pick some other browser species, & monkey-see monkey-do.  
     cvs diff src/lib/gbGenome.c
     make
     cvs ci src/lib/gbGenome.c
     # Edit src/align/gbBlat to add /iscratch/i/loxAfr1/11.ooc
     cvs diff src/align/gbBlat
     make
     cvs ci src/align/gbBlat
 
     # Install to /cluster/data/genbank:
     make install-server
 
     ssh `fileServer /cluster/data/genbank/`
     cd /cluster/data/genbank
     # This is an -initial run, (xeno) RefSeq only:
     nice bin/gbAlignStep -srcDb=refseq -type=mrna -initial loxAfr1 &
     tail -f [its logfile]
     # Load results:
     ssh hgwdev
     cd /cluster/data/genbank
     nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad loxAfr1
     featureBits loxAfr1 xenoRefGene
 #16520385 bases of 165766797 (9.966%) in intersection
     # Clean up:
     rm -rf work/initial.loxAfr1
 
     # This is an -initial run, mRNA only:
     nice bin/gbAlignStep -srcDb=genbank -type=mrna -initial loxAfr1 &
     tail -f [its logfile]
     # Load results:
     ssh hgwdev
     cd /cluster/data/genbank
     nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad loxAfr1
     featureBits loxAfr1 all_mrna
 #19602 bases of 165766797 (0.012%) in intersection
     featureBits loxAfr1 xenoMrna
 #17295487 bases of 165766797 (10.434%) in intersection
     # Clean up:
     rm -rf work/initial.loxAfr1
 
 
 # MAKE GCPERCENT (DONE 11/4/04 angie)
     ssh hgwdev
     mkdir /cluster/data/loxAfr1/bed/gcPercent
     cd /cluster/data/loxAfr1/bed/gcPercent
     # create and load gcPercent table
     hgGcPercent loxAfr1 /cluster/data/loxAfr1
 
 
 # MAKE HGCENTRALTEST BLATSERVERS ENTRY (DONE 12/?/04 heather)
     ssh hgwdev
     echo 'insert into blatServers values("loxAfr1", "blat14", "17780", 1, 0); \
           insert into blatServers values("loxAfr1", "blat14", "17781", 0, 1);' \
       | hgsql -h genome-testdb hgcentraltest
 
 
 # MAKE Drosophila Proteins track  (DONE braney 11/17/04)
     ssh kkstore01
     mkdir -p /cluster/data/loxAfr1/blastDb
     cd /cluster/data/loxAfr1/blastDb
     faSplit sequence ../scaffolds.fa 400 x
     for i in *.fa; do formatdb -i $i -p F 2> /dev/null; done
     rm *.fa *.log
 
     ssh kkr1u00
     mkdir -p /iscratch/i/loxAfr1/blastDb
     cp /cluster/data/loxAfr1/blastDb/* /iscratch/i/loxAfr1/blastDb
     (iSync) 2>&1 > sync.out
     
     mkdir -p /cluster/data/loxAfr1/bed/tblastn.dm1FB
     cd /cluster/data/loxAfr1/bed/tblastn.dm1FB
     ls -1S /iscratch/i/loxAfr1/blastDb/*.nsq | sed "s/\.nsq//" > bug.lst
     exit
 
     # back to kkstore01
     cd /cluster/data/loxAfr1/bed/tblastn.dm1FB
     mkdir fbfa
     # calculate a reasonable number of jobs
     calc `wc /cluster/data/dm1/bed/blat.dm1FB/dm1FB.psl | awk "{print \\\$1}"`/\(150000/`wc bug.lst | awk "{print \\\$1}"`\)
     # 18735/(150000/396) = 49.460400
     split -l 49 /cluster/data/dm1/bed/blat.dm1FB/dm1FB.psl fbfa/fb
     cd fbfa
     for i in *; do pslxToFa $i $i.fa; rm $i; done
     cd ..
     ls -1S fbfa/*.fa > fb.lst
     mkdir blastOut
     for i in `cat fb.lst`; do  mkdir blastOut/`basename $i .fa`; done
     cat << '_EOF_' > blastGsub
 #LOOP
 blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
 #ENDLOOP
 '_EOF_'
     cat << '_EOF_' > blastSome
 #!/bin/sh
 BLASTMAT=/iscratch/i/blast/data
 export BLASTMAT
 g=`basename $2`
 f=/tmp/`basename $3`.$g
 for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
 do
 if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
 then
         mv $f.8 $f.1
         break;
 fi
 done
 if test -f  $f.1
 then
 if /cluster/bin/i386/blastToPsl $f.1 $f.2
 then
 	liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /iscratch/i/dm1/protein.lft warn $f.2
 
         mv $3.tmp $3
         rm -f $f.1 $f.2 
         exit 0
     fi
 fi
 rm -f $f.1 $f.2 $3.tmp 
 exit 1
 '_EOF_'
 
     chmod +x blastSome
     gensub2 bug.lst fb.lst blastGsub blastSpec
 
     ssh kk
     cd /cluster/data/loxAfr1/bed/tblastn.dm1FB
     para create blastSpec
     para try, push
 
 # Completed: 151668 of 151668 jobs
 # CPU time in finished jobs:    2932565s   48876.08m   814.60h   33.94d  0.093 y
 # IO & Wait Time:                694006s   11566.77m   192.78h    8.03d  0.022 y
 # Average job time:                  24s       0.40m     0.01h    0.00d
 # Longest job:                     2721s      45.35m     0.76h    0.03d
 # Submission to last job:         73860s    1231.00m    20.52h    0.85d
 
     cat << '_EOF_' > chainGsub
 #LOOP
 chainSome $(path1)
 #ENDLOOP
 '_EOF_'
 
     cat << '_EOF_' > chainSome
 (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=25000 stdin ../c.`basename $1`.psl)
 '_EOF_'
     chmod +x chainSome
 
     ls -1dS `pwd`/blastOut/fb?? > chain.lst
     gensub2 chain.lst single chainGsub chainSpec
     para create chainSpec
 
     # should run this on the mini-cluster or with my shove script
     # so you can limit the number of jobs starting to 3 or 4
     para try, push...
 
 # Completed: 383 of 383 jobs
 # CPU time in finished jobs:        327s       5.44m     0.09h    0.00d  0.000 y
 # IO & Wait Time:                  8218s     136.97m     2.28h    0.10d  0.000 y
 # Average job time:                  22s       0.37m     0.01h    0.00d
 # Longest job:                       54s       0.90m     0.01h    0.00d
 # Submission to last job:           674s      11.23m     0.19h    0.01d
 
     exit
     # back to kkstore01
     cd /cluster/data/loxAfr1/bed/tblastn.dm1FB/blastOut
     for i in fb??
     do 
 	awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl
 	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
 	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
 	echo $i
     done
     sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/loxAfr1/bed/tblastn.dm1FB/blastDm1FB.psl
 
     ssh hgwdev
     cd /cluster/data/loxAfr1/bed/tblastn.dm1FB
 
     hgLoadPsl loxAfr1 blastDm1FB.psl
 
 # End tblastn
 
 
 # SWAP CHAINS FROM DM2, BUILD NETS ETC. (DONE 3/2/05 angie)
     ssh kkstore01
     mkdir /cluster/data/loxAfr1/bed/blastz.dm2.swap
     cd /cluster/data/loxAfr1/bed/blastz.dm2.swap
     doBlastzChainNet.pl -swap /cluster/data/dm2/bed/blastz.loxAfr1/DEF \
       >& do.log &
     tail -f do.log
     # Add {chain,net}Dm2 to trackDb.ra if necessary.
     # Add /usr/local/apache/htdocs/goldenPath/loxAfr1/vsDm2/README.txt
 
 
 #########################################################################
 #	BLASTZ NOTE:  with the advent of Angie's script to run the
 #	blastz process through to chains and nets loaded into the
 #	database and download files prepared, it is now a juggling act
 #	to see which klusters are available.  The particular options to
 #	the script to make it go to one kluster or another are to be
 #	determined at run-time.  The typical run-times listed here will
 #	be a factor in your choice of kluster to operate on.
 #########################################################################
 # BLASTZ HUMAN Hg17 
     ssh kk
     mkdir /cluster/data/loxAfr1/bed/blastz.hg17
     cd /cluster/data/loxAfr1/bed/blastz.hg17
     cat << '_EOF_' > DEF
 # mouse vs. human
 export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/home/angie/schwartzbin:/cluster/home/kent/bin/i386
 
 ALIGN=blastz-run
 BLASTZ=blastz
 BLASTZ_H=2000
 BLASTZ_ABRIDGE_REPEATS=1
 
 # TARGET: Mouse
 SEQ1_DIR=/panasas/store/loxAfr1/nib
 # not used
 SEQ1_RMSK=/panasas/store/loxAfr1/rmsk
 # not used
 SEQ1_FLAG=-rodent
 SEQ1_SMSK=/panasas/store/loxAfr1/linSpecRep.notInHuman
 SEQ1_IN_CONTIGS=0
 SEQ1_CHUNK=10000000
 SEQ1_LAP=10000
 
 # QUERY: Human
 SEQ2_DIR=/scratch/hg/hg17/bothMaskedNibs
 # RMSK not currently used
 SEQ2_RMSK=
 # FLAG not currently used
 SEQ2_FLAG=
 SEQ2_SMSK=/scratch/hg/hg17/linSpecRep.notInMouse
 SEQ2_IN_CONTIGS=0
 SEQ2_CHUNK=30000000
 SEQ2_LAP=0
 
 BASE=/cluster/data/loxAfr1/bed/blastzHg17.2005_03_14
 
 DEF=$BASE/DEF
 RAW=$BASE/raw
 CDBDIR=$BASE
 SEQ1_LEN=$BASE/S1.len
 SEQ2_LEN=$BASE/S2.len
 '_EOF_'
     # << keep emacs coloring happy
 
     cp /cluster/data/loxAfr1/chrom.sizes ./S1.len
     sort -rn +1 /cluster/data/hg17/chrom.sizes > S2.len
     #	establish a screen to control this job
     screen
     time /cluster/bin/scripts/doBlastzChainNet.pl `pwd`/DEF > \
 	blast.run.out 2>&1 &
     #	real    993m28.547s
     #	user    0m0.198s
     #	sys     0m0.171s
     #	detach from screen session: Ctrl-a Ctrl-d
     #	to reattach to this screen session:
     ssh kksilo
     screen -d -r
     #	STARTED - 2005-03-17 21:25
     #	FINISHED - 2005-03-18 14:00
 # Completed: 45347 of 45347 jobs
 # CPU time in finished jobs:   16921981s  282033.02m  4700.55h  195.86d  0.537 y
 # IO & Wait Time:               2381711s   39695.18m   661.59h   27.57d  0.076 y
 # Average job time:                 426s       7.09m     0.12h    0.00d
 # Longest running job:                0s       0.00m     0.00h    0.00d
 # Longest finished job:            9568s     159.47m     2.66h    0.11d
 # Submission to last job:         58695s     978.25m    16.30h    0.68d
 
 # Completed: 331 of 331 jobs
 # CPU time in finished jobs:        272s       4.54m     0.08h    0.00d  0.000 y
 # IO & Wait Time:                  1145s      19.08m     0.32h    0.01d  0.000 y
 # Average job time:                   4s       0.07m     0.00h    0.00d
 # Longest job:                       24s       0.40m     0.01h    0.00d
 # Submission to last job:           265s       4.42m     0.07h    0.00d
 
     #	The kki batch doChainRun.csh appears to have failed
     #	due to underlying changes in the location of hg17 items
     #	fixup the symlinks which are in a state of flux today, then,
     #	to recover:
     ssh kki
     cd /cluster/data/loxAfr1/bed/blastzHg17.2005_03_14/axtChain/run
     rm -fr chain
     time ./doChainRun.csh
     #	real    22m47.917s
     #	user    0m0.380s
     #	sys     0m0.630s
 # Completed: 40 of 40 jobs
 # CPU time in finished jobs:       6373s     106.22m     1.77h    0.07d  0.000 y
 # IO & Wait Time:                   552s       9.20m     0.15h    0.01d  0.000 y
 # Average job time:                 173s       2.89m     0.05h    0.00d
 # Longest job:                      662s      11.03m     0.18h    0.01d
 # Submission to last job:          1200s      20.00m     0.33h    0.01d
 
     #	That was the last part of the chainRun step, can now continue:
     ssh kksilo
     cd /cluster/data/loxAfr1/bed/blastzHg17.2005_03_14
     time /cluster/bin/scripts/doBlastzChainNet.pl -continue chainMerge `pwd`/DEF > chainMerge.run.out 2>&1 &
     # STARTED - 2005-03-18 15:00
     # FINISHED 2005-03-18 16:33
 
     #	checking the numbers for sanity:
     ssh hgwdev
     #	expect ~ 2m30 seconds for chain measurement
     time featureBits loxAfr1 chainHg17
     #	2596946329 bases of 2597150411 (99.992%) in intersection
     time featureBits mm5 chainHg17
     #	2507720521 bases of 2615483787 (95.880%) in intersection
 
 
     #	expect ~ 2m30s seconds for net measurement
     time featureBits loxAfr1 netHg17
     #	2579747741 bases of 2597150411 (99.330%) in intersection
     time featureBits mm5 netHg17
     #	2504056038 bases of 2615483787 (95.740%) in intersection
 
     ssh kolossus
     #	expect ~ 20-22 minutes for the chainLink measurement
     HGDB_CONF=~/.hg.conf.read-only /usr/bin/time --portability \
 	featureBits loxAfr1 chainHg17Link
     #	966916309 bases of 2597150411 (37.230%) in intersection
     HGDB_CONF=~/.hg.conf.read-only /usr/bin/time --portability \
 	featureBits mm5 chainHg17Link
     #	1025750185 bases of 2615483787 (39.218%) in intersection
 
     #	swap results to place loxAfr1 alignments onto Hg17
     time /cluster/bin/scripts/doBlastzChainNet.pl -swap `pwd`/DEF > \
 	swap.run.out 2>&1 &
     #	STARTED - 2005-03-29 - 15:58
     #	FINI - 2005-03-29 - 18:48
     #	real    171m26.172s
     #	user    0m2.270s
     #	sys     0m0.870s
 
     ssh kolossus
     time HGDB_CONF=~/.hg.conf.read-only featureBits hg17 chainMm6Link
     #	969459954 bases of 2866216770 (33.824%) in intersection
     time HGDB_CONF=~/.hg.conf.read-only featureBits hg17 chainMm5Link
     #	1020106336 bases of 2866216770 (35.591%) in intersection
 
     #	A measurement script to do all featureBits combinations:
     cd /cluster/data/loxAfr1/jkStuff
     cat << '_EOF_' >  netChainCheck.sh
 #!/bin/sh
 
 usage()
 {
 echo "usage: netChainCheck.sh <db0> <db1> <targetDb>"
 echo "    does: featureBits <db0> net<targetDb>"
 echo "          featureBits <db1> net<targetDb>"
 echo "    as well as the chain and chainLink tables,"
 echo "    and on the targetDb:"
 echo "          featureBits <targetDb> net<db0>"
 echo "          featureBits <targetDb> net<db1>"
 echo "    and the chain and chainLink tables."
 echo -e "\texample: netChainCheck.sh loxAfr1 mm5 fr1"
 }
 
 doOne() 
 {   
 db=$1
 tbl=$2
 echo "    featureBits $db $tbl"
 echo -en "    #\t"
 time featureBits $db $tbl
 }
 
 ucFirstLetter()
 {
 ucString="$1"
 fc=`echo "${ucString}" | sed -e "s/\(.\).*/\1/"`
 rest=`echo "${ucString}" | sed -e "s/.\(.*\)/\1/"`
 FC=`echo "${fc}" | tr '[a-z]' '[A-Z]'`
 echo "${FC}${rest}"
 }
 
 if [ "$#" -ne 3 ]; then
     usage
     exit 255
 fi
 
 db0=$1
 db1=$2
 targetDb=$3
 
 targetDB=`ucFirstLetter "${targetDb}"`
 DB0=`ucFirstLetter "${db0}"`
 DB1=`ucFirstLetter "${db1}"`
 
 export db0 db1 targetDb targetDB DB0 DB1
 # echo "${db0} ${db1} ${targetDb} ${targetDB} ${DB0} ${DB1}"
 
 doOne "${db0}" net${targetDB}
 doOne "${db1}" net${targetDB}
 doOne "${db0}" chain${targetDB}
 doOne "${db1}" chain${targetDB}
 doOne "${db0}" chain${targetDB}Link
 doOne "${db1}" chain${targetDB}Link
 doOne ${targetDb} net${DB0}
 doOne ${targetDb} net${DB1}
 doOne ${targetDb} chain${DB0}
 doOne ${targetDb} chain${DB1}
 doOne ${targetDb} chain${DB0}Link
 doOne ${targetDb} chain${DB1}Link
 '_EOF_'
     # << keep emacs coloring happy
 
 # CHAIN  AND NET SWAPPED HUMAN BLASTZ (WORKING 12/22/05 kate)
 #  Working in Brian's blastz dir (not doced).
 #  This procedure follows conventions in doBlastzNetChain.pl,
 #  so it can be used to complete the processing
     ssh kki
     cd /cluster/data/loxAfr1/bed/zb.hg17
     ln -s `pwd` /cluster/data/hg17/bed/blastz.loxAfr1 
     mkdir -p axtChain/run/chain
     ls -1S /cluster/data/loxAfr1/bed/zb.hg17/axtChrom/*.axt.gz | \
         sed 's/.gz//' >  axtChain/run/input.lst
     cd axtChain/run
 
 cat > chain.csh << 'EOF'
 #!/bin/csh -ef
 zcat $1 | \
 axtChain -verbose=0 -minScore=3000 -linearGap=medium stdin \
     /cluster/data/hg17/nib /cluster/data/loxAfr1/loxAfr1.2bit stdout | \
 chainAntiRepeat /cluster/data/hg17/nib /cluster/data/loxAfr1/loxAfr1.2bit \
           stdin $2
 # delay so parasol check out will succeed on output file
 sleep 5
 'EOF'
     # << happy emacs
 cat > gsub << 'EOF'
 #LOOP
 chain.csh {check in exists $(path1).gz} {check out line+ chain/$(root1).chain}
 #ENDLOOP
 'EOF'
     # << happy emacs
     chmod a+x chain.csh
     gensub2 input.lst single gsub jobList
     para create jobList; para try
     para check 
     para push
     # wait for completion
     para time >  run.time
 
     # finish pipeline
     cd /cluster/data/hg17/bed/blastz.loxAfr1
     /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
         -continue=chainMerge \
 	-bigClusterHub=pk \
 	`pwd`/DEF >& blastz.out &
 
 ############################################################################
 ## create missing hg18 nets     (DONE 2006-05-12 markd)
 ## hg18 chains exist, but are not documented in this file.
    # create the net filee
     ssh hgwdev
     cd /cluster/data/loxAfr1/bed/blastz.hg18.swap/axtChain
     nice netClass -verbose=0 -noAr noClass.net loxAfr1 hg18 loxAfr1.hg18.net 
     nice gzip loxAfr1.hg18.net
 
 ############################################################################
 ##  BLASTZ swap from mm9 alignments (2007-11-11 - markd)
     ssh hgwdev
     mkdir /cluster/data/loxAfr1/bed/blastz.mm9.swap
     cd /cluster/data/loxAfr1/bed/blastz.mm9.swap
     ln -s blastz.mm9.swap ../blastz.mm9
     /cluster/bin/scripts/doBlastzChainNet.pl \
         -swap /cluster/data/mm9/bed/blastz.loxAfr1/DEF >& swap.out&
 
     # fb.loxAfr1.chainMm9Link.txt:
     #   468255296 bases of 2295548473 (20.398%) in intersection
 
 ############################################################################
 # TRANSMAP vertebrate.2008-05-20 build  (2008-05-24 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-05-20
 
 see doc/builds.txt for specific details.
 ############################################################################
 ############################################################################
 # TRANSMAP vertebrate.2008-06-07 build  (2008-06-30 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded
 by a single Makefile. This is available from:
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-06-30
 
 see doc/builds.txt for specific details.
 ############################################################################
 #  loxAfr1 - Elephant - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
     ssh kkr14u07
     cd /hive/data/genomes/loxAfr1
     cat << '_EOF_' > loxAfr1.ensGene.ra
 # required db variable
 db loxAfr1
 # do we need to translate geneScaffold coordinates
 geneScaffolds yes
 # ignore genes that do not properly convert to a gene pred, and contig
 #	names that are not in the UCSC assembly
 skipInvalid yes
 # ignore the one gene that has an invalid structure from Ensembl:
 # 13002: ENSLAFT00000000586 no exonFrame on CDS exon 1
 '_EOF_'
 #  << happy emacs
 
     doEnsGeneUpdate.pl -ensVersion=51 loxAfr1.ensGene.ra
     ssh hgwdev
     cd /hive/data/genomes/loxAfr1/bed/ensGene.51
     featureBits loxAfr1 ensGene
     # 21666114 bases of 2295548473 (0.944%) in intersection
 
  *** All done!  (through the 'makeDoc' step)
  *** Steps were performed in /hive/data/genomes/loxAfr1/bed/ensGene.51
 
 ############################################################################
+############################################################################
+# TRANSMAP vertebrate.2009-07-01 build  (2009-07-21 markd)
+
+vertebrate-wide transMap alignments were built  Tracks are created and loaded
+by a single Makefile. This is available from:
+   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01
+
+see doc/builds.txt for specific details.
+############################################################################