src/hg/makeDb/doc/pyrAer1.txt 1.3

1.3 2009/11/25 18:29:19 hiram
change defaultViewLimits to viewLimits
Index: src/hg/makeDb/doc/pyrAer1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/pyrAer1.txt,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/makeDb/doc/pyrAer1.txt	26 Jul 2006 16:59:58 -0000	1.2
+++ src/hg/makeDb/doc/pyrAer1.txt	25 Nov 2009 18:29:19 -0000	1.3
@@ -1,322 +1,322 @@
 # for emacs: -*- mode: sh; -*-
 
  
 # This file describes building the browser database for the archaeal
 # species Pyrobaculum aerophilum.
 
 # DOWNLOAD SEQUENCE FROM GENBANK (DONE 01/07/04)
 
     ssh eieio
     mkdir /cluster/store5/archae/pyrAer1
     ln -s /cluster/store5/archae/pyrAer1 /cluster/data/pyrAer1
     cd /cluster/data/pyrAer1
     wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Pyrobaculum_aerophilum/NC_003364.fna
     mv NC_003364.fna chr1.fa
     # Edit header of chr1.fa to '> pyrAer1'
 
 # CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE 01/12/04)
 
     ssh hgwdev
     echo 'create database pyrAer1' | hgsql ''
     cd /cluster/data/pyrAer1
     hgNibSeq pyrAer1 /cluster/data/pyrAer1/nib chr1.fa
     faSize -detailed chr1.fa > chrom.sizes
     mkdir -p /gbdb/pyrAer1/nib
     echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \
 	    | hgsql pyrAer1
     echo 'INSERT INTO dbDb \
         (name, description, nibPath, organism, \
                 defaultPos, active, orderKey, genome, scientificName, \
                 htmlPath, hgNearOk) values \
         ("pyrAer1", "Pyrobaculum aerophilum", "/gbdb/pyrAer1/nib", "P. aerophilum", \
                "chr1:500000-550000", 1, 85, "Archae", \
                 "Pyrobaculum aerophilum", "/gbdb/pyrAer1/html/description.html", \
                 0);' \
       | hgsql -h genome-testdb hgcentraltest
     echo 'INSERT INTO defaultDb (genome, name) values ("Archae", "pyrAer1");' \
       | hgsql -h genome-testdb hgcentraltest
 
     cd ~/kent/src/hg/makeDb/trackDb
     # edit the trackDb makefile
 
     # add the trackDb directories
     mkdir -p archae/pyrAer1
     cvs add archae
     cvs add archae/pyrAer1
     cvs commit
 
 # GC PERCENT TRACK (DONE 01/12/04)
 
     ssh hgwdev
     mkdir -p /cluster/data/pyrAer1/bed/gcPercent
     cd /cluster/data/pyrAer1/bed/gcPercent
     hgsql pyrAer1 < ~/kent/src/hg/lib/gcPercent.sql
     hgGcPercent -win=2000 pyrAer1 ../../nib
     # edit ~/kent/src/hg/makeDb/trackDb/archae/trackDb.ra and add an entry for
     # GC percent for 2,000 base windows instead (simply cut and paste
     # ~/kent/src/hg/makeDb/trackDb/trackDb.ra).
 
 # GC 20 BASE WIGGLE TRACK (DONE 8/20)
 
     mkdir /cluster/data/pyrAer1/bed/gc20Base
     cd /cluster/data/pyrAer1/bed/gc20Base
     mkdir wigData20 dataLimits20
     hgGcPercent -chr=chr -file=stdout -win=20 -overlap=19 pyrAer1 ../../nib | grep -w GC | \
     awk '
 {
     bases = $3 - $2
     perCent = $5/10.0
     printf "%d\t%.1f\n", $2+1, perCent
 }' | wigAsciiToBinary -dataSpan=1 -chrom=chr \
 	-wibFile=wigData20/gc20Base_1 -name=1 stdin > dataLimits20/chr
     hgLoadWiggle pyrAer1 gc20Base wigData20/*.wig
     mkdir /gbdb/pyrAer1/wib
     ln -s `pwd`/wigData20/*.wib /gbdb/pyrAer1/wib
     #	the trackDb entry
 track gc20Base
 shortLabel GC Percent
 longLabel GC Percent in 20 base windows
 group map
 priority 1.7
 visibility hide
 autoScaleDefault Off
 maxHeightPixels 128:36:16
 graphTypeDefault Bar
 gridDefault OFF
 windowingFunction Mean
 color 0,128,255
 altColor 255,128,0
-defaultViewLimits 30:70
+viewLimits 30:70
 type wig 0 100    
 
 # CONTIG TRACK 
     # reformat is a Todd Lowe program
     for num in `seq 746 946`; do   
        curl -o ${num}.gbk "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?txt=on&val=AE009${num}.1"
        reformat fasta ${num}.gbk >> contigs.fa
     done
     blat /cluster/data/pyrAer1/chr1.fa contigs.fa -minIdentity=100 contigs.psl
     perfectBlatBed4 contigs.psl contigs.bed
     mkdir /cluster/data/pyrAer1/bed/pyrAer1Contigs
     cp contigs.bed /cluster/data/pyrAer1/bed/pyrAer1Contigs
     cd /cluster/data/pyrAer1/bed/pyrAer1Contigs
     hgLoadBed pyrAer1 pyrAer1Contigs contigs.bed
     # the trackDb entry: 
 track pyrAer1Contigs
 shortLabel Contigs
 longLabel Contigs deposited in Genbank
 group map
 priority 0.5
 visibility pack
 url http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=nucleotide&cmd=search&term=$$
 type bed 4 .
 
 
 # TANDEM REPEAT MASKER (NOT DONE YET)
 
     ssh hgwdev
     mkdir -p /cluster/data/pyrAer1/bed/simpleRepeat
     cd /cluster/data/pyrAer1
     trfBig chr1.fa /dev/null -bedAt=/cluster/data/pyrAer1/bed/simpleRepeat/chr1.bed
     cd /cluster/data/pyrAer1/bed/simpleRepeat
     hgLoadBed pyrAer1 simpleRepeat *.bed -sqlTable=~kent/src/hg/lib/simpleRepeat.sql
     # weird, I get a seg fault here.  need to ask Hiram or somebody else.
 
 # DESCRIPTION PAGE (DONE 01/12/04)
 
     ssh hgwdev
     # Write ~/kent/src/hg/makeDb/trackDb/archae/pyrAer1/description.html
     chmod a+r ~/kent/src/hg/makeDb/trackDb/archae/pyrAer1/description.html
     # Check it in.
     mkdir /gbdb/pyrAer1/html
     ln -s /cluster/data/pyrAer1/html/description.html /gbdb/pyrAer1/html/
 
 # GENBANK PROTEIN-CODING GENES (DONE 01/14/04)
 
     ssh hgwdev
     mkdir /cluster/data/pyrAer1/genbank
     cd /cluster/data/pyrAer1/genbank
     wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Pyrobaculum_aerophilum/NC_003364.gbk
     mv NC_003364.gbk pyrAer1.gbk
     # Create 3 files to assist parsing of the genbank
     # 1. for a bed file
     echo 'chr1
 start
 end
 gene
 1000
 strand' > pyrAer1-params-bed.txt
     # 2. for the peptide parts
     echo 'gene
 translation' > pyrAer1-params-pep.txt
     # 3. for the other gene information
     echo 'gene
 gene
 product
 note
 protein_id
 db_xref
 EC_number
 pseudo' > pyrAer1-params-xra.txt
     # Now extract the genes and information:
     gbArchaeGenome pyrAer1.gbk pyrAer1-params-bed.txt pyrAer1-genbank-cds.bed
     gbArchaeGenome pyrAer1.gbk pyrAer1-params-pep.txt pyrAer1-genbank-cds.pep
     gbArchaeGenome pyrAer1.gbk pyrAer1-params-xra.txt pyrAer1-genbank-cds.xra
     hgsql pyrAer1 < ~/kent/src/hg/lib/pepPred.sql
     hgsql pyrAer1 < ~/kent/src/hg/lib/minGeneInfo.sql
     echo rename table pepPred to gbProtCodePep | hgsql pyrAer1
     echo rename table minGeneInfo to gbProtCodeXra | hgsql pyrAer1
     echo load data local infile \'pyrAer1-genbank-cds.pep\' into table gbProtCodePep | hgsql pyrAer1
     echo load data local infile \'pyrAer1-genbank-cds.xra\' into table gbProtCodeXra | hgsql pyrAer1
 csh
 /cluster/bin/scripts/tawk '{print $1,$2,$3,$4,$5,$6,$2,$3,0,1,$3-$2,0}' pyrAer1-genbank-cds.bed | bedToGenePred stdin tmp.gp
 #below substr($1,4,4) must be edited so that it returns a numeric field.
 /cluster/bin/scripts/tawk '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,substr($1,4,4),name2,"cmpl","cmpl",0}' tmp.gp  > tmp2.gp
 # hard tab between quotes use ctrl-V then press tab
 join -t "     " -o 1.1,1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 2.2 1.13 1.14 1.15  tmp2.gp pyrAer1-genbank-cds.xra > pyrAer1.gp
 ldHgGene pyrAer1 refSeq pyrAer1.gp -predTab -genePredExt
 
 # GENBANK rRNA GENES (NOT QUITE DONE)
     ssh hgdev
     cd /cluster/data/pyrAer1/genbank
     gbArchaeGenome -kind=rRNA pyrAer1.gbk pyrAer1-params-bed.txt pyrAer1-rrnas.bed
     echo 'gene
 product
 NA' > pyrAer1-params-rrna-xra.txt
     gbArchaeGenome -kind=rRNA pyrAer1.gbk pyrAer1-params-rrna-xra.txt pyrAer1-rrnas-xra.txt
     hgLoadBed pyrAer1 gbRRNA pyrAer1-rrnas.bed
     hgsql pyrAer1 < ~/kent/src/hg/lib/minGeneInfo.sql
     echo rename table minGeneInfo to gbRRNAXra | hgsql pyrAer1
     echo load data local infile \'pyrAer1-rrnas-xra.txt\' into table gbRRNAXra | hgsql pyrAer1
 
 # COG STUFF
     # Cut and paste http://www.ncbi.nlm.nih.gov/cgi-bin/COG/palox into emacs (COG list)
     # and save as cogpage.txt
     awk '{printf("%s\t%s\n",$6,$5)}' < cogpage.txt | sed -e 's/\[//' -e 's/\]//' > cogs.txt
     rm cogpage.txt
     # Now we have the basic list of cogs and the letter code for each one.
     
 
 # TODD LOWE tRNA GENES (DONE 01/15/04)
 
     # This one is a bed 6+ file created by hand of 46 tRNAs and 1 pseudo tRNA by Todd
     # Lowe.  See ~/kent/src/hg/lib/loweTrnaGene.as for a description of the fields.
     # **Showing the tRNAScanSE instructions would be nice in the future.  
     ssh hgwdev
     mkdir /cluster/data/pyrAer1/bed/loweTrnaGene
     cd /cluster/data/pyrAer1/bed/loweTrnaGene
     hgLoadBed -tab pyrAer1 loweTrnaGene pyrAer1-lowe-trnas.bed -sqlTable=~/kent/src/hg/lib/loweTrnaGene.sql
 
 # TODD LOWE snoRNA GENES (DONE 01/16/04)
     # This is a bed 6 file created by hand.
     ssh hgwdev
     mkdir /cluster/data/pyrAer1/bed/loweSnoGene
     cd /cluster/data/pyrAer1/bed/loweSnoGene
     hgLoadBed -tab pyrAer1 loweSnoGene pyrAer1-snos.bed
 
 # TIGR GENES (DONE 02/09/04)
     # First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi
     # and fill out the web form as follows:
     #   - Pick "Retrieve attributes for the specified DNA feature within a specific 
     #     organism and/or a specific role category".
     #       * Pick "Pyrobaculum aerophilum IM2", and "Primary and TIGR annotation ORFs" 
     #         from the 1st and 3rd box.
     #       * Select everything from "Choose TIGR Annotation Gene Attributes"
     #       * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes"
     #       * Select everything from "Choose Other Gene Attributes"
     #   - Click submit, and click save as tab-delimited file.
     ssh hgwdev
     mkdir /cluster/data/pyrAer1/bed/tigrCmrORFs
     cp pyrAer1-tigr.tab /cluster/data/pyrAer1/bed/tigrCmrORFs
     cd /cluster/data/pyrAer1/bed/tigrCmrORFs
     tigrCmrToBed pyrAer1-tigr.tab pyrAer1-tigr.bed
     hgLoadBed -tab pyrAer1 tigrCmrGene pyrAer1-tigr.bed -sqlTable=~/kent/src/hg/lib/tigrCmrGene.sql
     echo RENAME TABLE tigrCmrGene to tigrCmrORFs | hgsql pyrAer1
 
 # Lowe Lab Microarrays (DONE 02/09/04)
     ssh hgwdev
     mkdir /cluster/data/pyrAer1/bed/llaPaePrintA
     cp Pae-arrays.{bed,exps} /cluster/data/pyrAer1/bed/llaPaePrintA
     cd /cluster/data/pyrAer1/bed/llaPaePrintA
     hgLoadBed pyrAer1 llaPaePrintA Pae-arrays.bed
     hgsql pyrAer1 < ~/kent/src/hg/lib/expRecord.sql
     echo RENAME TABLE expRecord to llaPaePrintAExps | hgsql pyrAer1
     echo LOAD DATA LOCAL INFILE \'Pae-arrays.exps\' INTO TABLE llaPaePrintAExps | hgsql pyrAer1
     # for now I'm using both hgFixed and the pyrAer1 database for this... that will change.
     hgsql hgFixed < ~/kent/src/hg/lib/expRecord.sql
     echo RENAME TABLE expRecord to llaPaePrintAExps | hgsql hgFixed
     echo LOAD DATA LOCAL INFILE \'Pae-arrays.exps\' INTO TABLE llaPaePrintAExps | hgsql hgFixed
 
 # AORF TRACK
     # This is another hand-created file, provided originally by Sorel Fitz-Gibbon and massaged to be a bed.
     ssh hgwdev
     mkdir /cluster/data/pyrAer1/bed/primAORF
     cp aorfs.bed /cluster/data/pyrAer1/bed/primAORF
     cd /cluster/data/pyrAer1/bed/primAORF
     hgLoadBed pyrAer1 primAORF aorfs.bed
 
 
 # CHANGE "chr1" to "chr"
 
     ssh hgdev
     mv /cluster/data/pyrAer1/nib/chr1.nib /cluster/data/pyrAer1/nib/chr.nib
     rm /gbdb/pyrAer1/nib/chr1.nib
     ln -s /cluster/data/pyrAer1/nib/chr.nib /gbdb/pyrAer1/nib/chr.nib
     # a quick script to replace chr1 with chr
 
 #!/bin/bash
 sed 's/chr1/chr/g' $1 > /tmp/whatever
 mv /tmp/whatever $1
 
     cd /cluster/data/pyrAer1/bed 
     find -name '*.bed' | xargs changeCh.sh
     # Now change the DB
     cd /tmp
     hgsqldump pyrAer1 | sed 's/chr1/chr/g' > paeNew.sql
     echo drop database pyrAer1 | hgsql test
     echo create database pyrAer1 | hgsql test
     hgsql pyrAer1 < paeNew.sql
     rm paeNew.sql
     echo 'update dbDb set defaultPos="chr:550000-580000" where name="pyrAer1"' | hgsql -h genome-testdb hgcentraltest
 
 # Pae promoter track
 
     cd /cluster/data/pyrAer1/wiggle
     mkdir promoterScanPos
     mkdir promoterScanNeg
     cd promoterScanPos
     cp prom.neg.gz .
     wigAsciiToBinary -chrom=chr -wibFile=promoterScanPos prom.pos.gz
     hgLoadWiggle pyrAer1 promoterScanPos promoterScanPos.wig
     cd ../promoterScanNeg
     cp prom.neg.gz .
     wigAsciiToBinary -chrom=chr -wibFile=promoterScanNeg prom.neg.gz
     hgLoadWiggle pyrAer1 promoterScanNeg promoterScanNeg.wig
     cd /gbdb/pyrAer1/wib  
     ln -s /cluster/data/pyrAer1/wiggle/promoterScanPos/promoterScanPos.wib promoterScanPos.wib
     ln -s /cluster/data/pyrAer1/wiggle/promoterScanNeg/promoterScanNeg.wib promoterScanNeg.wib
 
 # shine DG track
 
     cd /cluster/data/pyrAer1/wiggle
     mkdir shineDGPos
     mkdir shineDGNeg
     cd shineDGPos
     cp shine.pos.gz .
     wigAsciiToBinary -chrom=chr -wibFile=shineDGPos shine.pos.gz
     hgLoadWiggle pyrAer1 shineDGPos shineDGPos.wig
     cd ../shineDGNeg
     cp shine.neg.gz .
     wigAsciiToBinary -chrom=chr -wibFile=shineDGNeg shine.neg.gz
     hgLoadWiggle pyrAer1 shineDGNeg shineDGNeg.wig
     cd /gbdb/pyrAer1/wib  
     ln -s /cluster/data/pyrAer1/wiggle/shineDGPos/shineDGPos.wib shineDGPos.wib
     ln -s /cluster/data/pyrAer1/wiggle/shineDGNeg/shineDGNeg.wib shineDGNeg.wib
 
 # RNA genes
 
 cd /cluster/data/pyrAer1/bed
 mkdir rnaGenes
 cp paeAllRNA.bed .
 cp ~/kent/src/hg/lib/rnaGenes.sql .
 hgLoadBed -sqlTable=rnaGenes.sql pyrAer1 rnaGenes paeAllRNA.bed