src/hg/makeDb/doc/gasAcu1.txt 1.39

1.39 2009/09/24 20:55:31 hiram
Finish tetNig2 chain/net
Index: src/hg/makeDb/doc/gasAcu1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/gasAcu1.txt,v
retrieving revision 1.38
retrieving revision 1.39
diff -b -B -U 4 -r1.38 -r1.39
--- src/hg/makeDb/doc/gasAcu1.txt	20 Sep 2009 17:16:43 -0000	1.38
+++ src/hg/makeDb/doc/gasAcu1.txt	24 Sep 2009 20:55:31 -0000	1.39
@@ -1504,8 +1504,14 @@
 	~/kent/src/hg/lib/mafSummary.sql multiz8waySummary.tab
     #	real    0m4.525
 
 #########################################################################
+# Adding automatic generation of upstream files (DONE - 2009-08-13 - Hiram)
+    # edit src/hg/makeDb/genbank/genbank.conf to add:
+gasAcu1.upstreamGeneTbl = ensGene
+gasAcu1.upstreamMaf = multiz8way /hive/data/genomes/gasAcu1/bed/multiz8way/species.lst
+
+#########################################################################
 # MULTIZ8WAY DOWNLOADABLES (DONE - 2007-01-05 - Hiram)
 ##  re-done with new chrUn.maf 2007-01-13 - Hiram
 ##  re-done with fr2 in place of fr1 - 2007-02-03 - Hiram
     # Annotated MAF is now documented, so use anno/maf for downloads/
@@ -2685,8 +2691,96 @@
    svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01
 
 see doc/builds.txt for specific details.
 ############################################################################
+# BLASTZ/CHAIN/NET TetNig2 (DONE - 2009-08-10,09-15 - Hiram)
+    #	create contigs only sequence to align properly to gasAcu1 contigs
+    mkdir /hive/data/genomes/gasAcu1/nonBridged
+    cd /hive/data/genomes/gasAcu1/nonBridged
+    gapToLift -verbose=2 gasAcu1 gasAcu1.contigs.lift \
+	-bedFile=gasAcu1.contigs.bed
+#       chrom count: 23
+# WARNING: gap at end of chromosome not telomere at
+#		chrUn:62549211-62550211, type: clone
+#       found 16945 gaps
+#       bed output requested to gasAcu1.contigs.bed
+#       no gaps on chrom: chrM, size: 15742
+    ~/kent/src/hg/utils/lft2BitToFa.pl ../gasAcu1.2bit gasAcu1.contigs.lift \
+	| gzip -c > gasAcu1.contigs.fa.gz
+    #	make sure nothing was destroyed:
+    faCount *.fa.gz > faCount.contigs.txt 2>&1
+    twoBitToFa ../gasAcu1.2bit stdout | faCount stdin > faCount.2bit.txt 2>&1
+    tail -1 faCount.contigs.txt
+# total   461441448       123670916       99610982        99564587
+#	123781376       14813587        14615136
+    tail -1 faCount.2bit.txt
+# total   463354448       123670916       99610982        99564587
+#	123781376       16726587        14615136
+
+    #	only the total size and N count are different
+    faToTwoBit gasAcu1.contigs.fa.gz gasAcu1.contigs.2bit
+    twoBitInfo gasAcu1.contigs.2bit stdout | sort -k2nr > gasAcu1.contigs.sizes
+    cp -p gasAcu1.contigs.2bit gasAcu1.contigs.sizes gasAcu1.contigs.lift \
+	/hive/data/staging/data/gasAcu1
+
+
+    mkdir /hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10
+    cd /hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10
+
+    cat << '_EOF_' > DEF
+# Stickleback vs. Tetraodon
+
+# TARGET: Stickleback gasAcu1, chunk large enough to run largest piece
+SEQ1_DIR=/scratch/data/gasAcu1/gasAcu1.2bit
+SEQ1_LEN=/scratch/data/gasAcu1/chrom.sizes
+SEQ1_CTGDIR=/scratch/data/gasAcu1/gasAcu1.contigs.2bit
+SEQ1_CTGLEN=/scratch/data/gasAcu1/gasAcu1.contigs.sizes
+SEQ1_LIFT=/scratch/data/gasAcu1/gasAcu1.contigs.lift
+SEQ1_CHUNK=22000000
+SEQ1_LAP=10000
+SEQ1_LIMIT=50
+
+# QUERY: Tetraodon TetNig2 - single chunk big enough to run single largest item
+SEQ2_DIR=/scratch/data/tetNig2/tetNig2.2bit
+SEQ2_LEN=/scratch/data/tetNig2/chrom.sizes
+SEQ2_CTGDIR=/scratch/data/tetNig2/tetNig2.contigs.2bit
+SEQ2_CTGLEN=/scratch/data/tetNig2/tetNig2.contigs.sizes
+SEQ2_LIFT=/scratch/data/tetNig2/tetNig2.contigs.lift
+SEQ2_CHUNK=20000000
+SEQ2_LAP=0
+SEQ2_LIMIT=50
+
+BASE=/hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10
+TMPDIR=/scratch/tmp
+'_EOF_'
+    # << this line keeps emacs coloring happy
+
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	`pwd`/DEF \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	> do.log 2>&1 &
+    #	about 72 minutes
+    #	forgot to indicate type of repeats, continuing the load:
+    cd /hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10/axtChain
+    netClass -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
+        -verbose=0 -noAr noClass.net gasAcu1 tetNig2 gasAcu1.tetNig2.net
+    netFilter -minGap=10 gasAcu1.tetNig2.net \
+	| hgLoadNet -verbose=0 gasAcu1 netTetNig2 stdin
+    cd ..
+    featureBits gasAcu1 chainTetNig2Link >&fb.gasAcu1.chainTetNig2Link.txt
+    cat fb.gasAcu1.chainTetNig2Link.txt
+    #	134497679 bases of 446627861 (30.114%) in intersection
+
+    mkdir /hive/data/genomes/tetNig2/bed/blastz.gasAcu1.swap
+    cd /hive/data/genomes/tetNig2/bed/blastz.gasAcu1.swap
+    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
+	/hive/data/genomes/gasAcu1/bed/lastzTetNig2.2009-08-10/DEF \
+	-swap -qRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
+	-noLoadChainSplit -chainMinScore=2000 -chainLinearGap=medium \
+	-workhorse=hgwdev -smallClusterHub=memk -bigClusterHub=swarm \
+	> swap.log 2>&1 &
+
 ############################################################################
 # TRANSMAP vertebrate.2009-09-13 build  (2009-09-20 markd)
 
 vertebrate-wide transMap alignments were built  Tracks are created and loaded