src/hg/makeDb/doc/methTher1.txt 1.3

1.3 2009/11/25 21:48:41 hiram
change autoScaleDefault to autoScale
Index: src/hg/makeDb/doc/methTher1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/methTher1.txt,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/makeDb/doc/methTher1.txt	26 Jul 2006 16:59:56 -0000	1.2
+++ src/hg/makeDb/doc/methTher1.txt	25 Nov 2009 21:48:41 -0000	1.3
@@ -1,288 +1,288 @@
 # for emacs: -*- mode: sh; -*-
 
  
 # This file describes building the browser database for the archaeal
 # species Methanosarcina acetivorans.
 #
 # if this is the first time you are making your own hgwdev browser, need to do 
 # cd ~/kent/src/, then a make
 
 # DOWNLOAD SEQUENCE FROM GENBANK (DONE 10/05)
 
     mkdir /cluster/store5/archae/methTher1
     ln -s /cluster/store5/archae/methTher1 /cluster/data/methTher1
     cd /cluster/data/methTher1
     cp /projects/lowelab/db/Bacteria/Methanobacterium_thermoautotrophicum/Meth_ther.* .
     mv Meth_ther.fa methTher1.fa
     grep ">" methTher1.fa
     # Edit header of methTher1.fa seqs to '>chr "  -- no plasmids to worry about
    
     faToTwoBit methTher1.fa methTher1.2bit 
 
     mkdir /gbdb/methTher1
     ln -s /cluster/data/methTher1/methTher1.2bit /gbdb/methTher1/methTher1.2bit
 
 # CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE 10/05)
 
     ssh hgwdev
     echo 'create database methTher1' | hgsql ''
     cd /cluster/data/methTher1
 
     faSize -detailed methTher1.fa > chrom.sizes
     echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" | hgsql methTher1
     echo 'INSERT INTO dbDb (name, description, nibPath, organism, defaultPos, active, orderKey, genome, scientificName, htmlPath, hgNearOk) values ("methTher1", "Aug 1997", "/gbdb/methTher1", "Methanothermobacter thermoautotrophicus", "chr:500000-550000", 1, 240, "Methanothermobacter thermoautotrophicus","Methanothermobacter thermoautotrophicus str. Delta H", "/gbdb/methTher1/html/description.html", 0);' | hgsql hgcentraltest
     echo 'INSERT INTO defaultDb (genome, name) values ("Methanothermobacter thermoautotrophicus", "methTher1");' | hgsql hgcentraltest
     echo 'INSERT INTO genomeClade (genome, clade, priority) values ("Methanothermobacter thermoautotrophicus", "archaea",85);'  | hgsql hgcentraltest
 
 # CREATE CHROMINFO TABLE (DONE 10/05)
   ssh hgwdev
   cd /cluster/data/methTher1
 
    cp ~baertsch/kent/src/hg/lib/chromInfo.sql .
    hgsql methTher1 < chromInfo.sql
    echo "load data local infile 'chrom.sizes' into table chromInfo" | hgsql methTher1
    echo "update chromInfo set fileName = '/gbdb/methTher1/methTher1.2bit'" | hgsql methTher1
 
     cd ~/kent/src/hg/makeDb/trackDb
 
     # add the trackDb directories
     mkdir -p archae/methTher1
     cvs add archae/methTher1
     cvs commit archae/methTher1
 
     make DBS=methTher1
 
 # GC20BASE (DONE 10/13/05), kpollard
     mkdir -p /cluster/data/methTher1/bed/gc20Base
     cd /cluster/data/methTher1/bed/gc20Base
     hgGcPercent -wigOut -doGaps -file=stdout -win=10 -overlap=15 methTher1 /cluster/data/methTher1/ | wigEncode stdin gc20Base.wig gc20Base.wib
 
     cd /cluster/data/methTher1/bed/gc20Base
     mkdir /gbdb/methTher1/wib
     ln -s `pwd`/gc20Base.wib /gbdb/methTher1/wib
     hgLoadWiggle -pathPrefix=/gbdb/methTher1/wib methTher1 gc20Base gc20Base.wig
     #	verify index is correct:
     hgsql methTher1 -e "show index from gc20Base;"
     #	should see good numbers in Cardinality column
 
 
 # TANDEM REPEAT MASKER (DONE 10/13/05), kpollard
 
     ssh hgwdev
     mkdir -p /cluster/data/methTher1/bed/simpleRepeat
     cd /cluster/data/methTher1
     trfBig methTher1.fa /dev/null -bedAt=/cluster/data/methTher1/bed/simpleRepeat/chr.bed
     cd /cluster/data/methTher1/bed/simpleRepeat
     hgLoadBed methTher1 simpleRepeat *.bed -sqlTable=/cluster/home/lowe/kent/src/hg/lib/simpleRepeat.sql
 
 # MULTIZ (DONE 10/13/05), kpollard
 
     cd /cluster/data/methTher1/bed
     mkdir conservation
     cd conservation
     cp /cluster/data/pyrFur2/bed/conservation/HoxD55.q .
     cp /cluster/data/methJann1/bed/conservation/methTher1.chr .
     cp /cluster/data/methJann1/bed/conservation/metMar1.chr .
     cp /cluster/data/methJann1/bed/conservation/methJann1.chr .
     cp /cluster/data/methJann1/bed/conservation/methKand1.chr .
     cp /cluster/data/methJann1/bed/conservation/methTher1.chr.nib .
     cp /cluster/data/methJann1/bed/conservation/metMar1.chr.nib .
     cp /cluster/data/methJann1/bed/conservation/methJann1.2bit .
     cp /cluster/data/methJann1/bed/conservation/methKand1.chr.nib .
     faSize -detailed *.chr > chrom.sizes
 
     #blastz
     blastz methTher1.chr metMar1.chr Q=HoxD55.q > methTher1-metMar1.lav
     blastz methTher1.chr methJann1.chr Q=HoxD55.q > methTher1-methJann1.lav
     blastz methTher1.chr methKand1.chr Q=HoxD55.q > methTher1-methKand1.lav
 
     lavToAxt methTher1-metMar1.lav . . methTher1-metMar1.axt
     lavToAxt methTher1-methJann1.lav . methJann1.2bit methTher1-methJann1.axt
     lavToAxt methTher1-methKand1.lav . . methTher1-methKand1.axt
 
     axtBest methTher1-metMar1.axt methTher1.chr -winSize=500 -minScore=5000 methTher1-metMar1-best.axt
     axtBest methTher1-methJann1.axt methTher1.chr -winSize=500 -minScore=5000 methTher1-methJann1-best.axt
     axtBest methTher1-methKand1.axt methTher1.chr -winSize=500 -minScore=5000 methTher1-methKand1-best.axt
 
     axtToMaf methTher1-metMar1-best.axt chrom.sizes chrom.sizes methTher1-metMar1.maf
     axtToMaf methTher1-methJann1-best.axt chrom.sizes chrom.sizes methTher1-methJann1.maf
     axtToMaf methTher1-methKand1-best.axt chrom.sizes chrom.sizes methTher1-methKand1.maf
 
     #multiz
     #delete extra header lines from maf files
     multiz methTher1-metMar1.maf methTher1-methJann1.maf - > methTher1-metMar1-methJann1.maf
     multiz methTher1-methKand1.maf methTher1-metMar1-methJann1.maf - > methTher1-metMar1-methJann1-methKand1.maf
 
     #phyloHMM
     /cluster/bin/phast/msa_view -i MAF -M methTher1.chr -o SS methTher1-metMar1-methJann1-methKand1.maf > methTher1.ss
     /cluster/bin/phast/phyloFit -i SS methTher1.ss -t "(methKand1,(methTher1,(metMar1,methJann1)))" 
     /cluster/bin/phast/msa_view -i SS methTher1.ss --summary-only
     #insert GC content into the following command
     /cluster/bin/phast/phastCons methTher1.ss phyloFit.mod --gc 0.4634 \
 	--target-coverage 0.7 --estimate-trees meth-tree \
 	--expected-lengths 25 --no-post-probs --ignore-missing \
 	--nrates 1,1
     /cluster/bin/phast/phastCons methTher1.ss \
 	meth-tree.cons.mod,meth-tree.noncons.mod \
 	--target-coverage 0.7 --expected-lengths 25 \
 	--viterbi methTher1-elements.bed --score \
 	--require-informative 0 --seqname chr > cons.dat
     wigEncode cons.dat phastCons.wig phastCons.wib
     draw_tree phyloFit.mod > methTher1-tree.ps
     #check that tree is similar to tree from metMar1
 
     #move data
     mkdir wib
     mv phastCons.wib wib/phastCons.wib
     mv phastCons.wig wib/phastCons.wig
     ln -s /cluster/data/methTher1/bed/conservation/wib/phastCons.wib /gbdb/methTher1/wib
     mkdir -p /gbdb/methTher1/pwMaf
     mkdir -p otherSpp/metMar1 otherSpp/methJann1 otherSpp/methKand1
     mv methTher1-metMar1.maf otherSpp/metMar1/chr.maf
     mv methTher1-methJann1.maf otherSpp/methJann1/chr.maf
     mv methTher1-methKand1.maf otherSpp/methKand1/chr.maf
     ln -s /cluster/data/methTher1/bed/conservation/otherSpp/metMar1 /gbdb/methTher1/pwMaf/metMar1_pwMaf
     ln -s /cluster/data/methTher1/bed/conservation/otherSpp/methJann1 /gbdb/methTher1/pwMaf/methJann1_pwMaf
     ln -s /cluster/data/methTher1/bed/conservation/otherSpp/methKand1 /gbdb/methTher1/pwMaf/methKand1_pwMaf
     mkdir multiz
     mv methTher1-metMar1-methJann1-methKand1.maf multiz/chr.maf
     ln -s /cluster/data/methTher1/bed/conservation/multiz /gbdb/methTher1/multizMtMmMjMk
 
     #load
     hgLoadWiggle methTher1 phastCons /cluster/data/methTher1/bed/conservation/wib/phastCons.wig
     hgLoadMaf -warn methTher1 multizMtMmMjMk
     hgLoadMaf -warn methTher1 metMar1_pwMaf -pathPrefix=/gbdb/methTher1/pwMaf/metMar1_pwMaf
     hgLoadMaf -warn methTher1 methJann1_pwMaf -pathPrefix=/gbdb/methTher1/pwMaf/methJann1_pwMaf
    hgLoadMaf -warn methTher1 methKand1_pwMaf -pathPrefix=/gbdb/methTher1/pwMaf/methKand1_pwMaf
     hgLoadBed methTher1 phastConsElements methTher1-elements.bed 
 
     #trackDb
     cd ~/kent/src/hg/makeDb/trackDb/archae/methTher1
     #trackDb.ra entry
     # track multizMtMmMjMk
     # shortLabel Conservation
     # longLabel Methanogen multiz alignments
     # group compGeno
     # priority 10.0
     # visibility pack
     # type wigMaf 0.0 1.0
     # maxHeightPixels 100:40:11
     # wiggle phastCons
     # yLineOnOff Off
-    # autoScaleDefault Off
+    # autoScale Off
     # pairwise pwMaf
     # speciesOrder metMar1 methJann1 methKand1
     cvs add trackDb.ra
     cvs commit -m "Added multiz track" trackDb.ra
     #html page for multizMtMmMjMk
     cvs add multizMtMmMjMk.html
     cvs commit -m "Details page for multiz track" multizMtMmMjMk.html
  
 # DESCRIPTION PAGE (DONE 10/20/05), kpollard
 
     # Write ~/kent/src/hg/makeDb/trackDb/archae/methTher1/description.html
     cd ~/kent/src/hg/makeDb/trackDb/archae/methTher1/
     chmod a+r description.html
     cvs add description.html
     cvs commit -m "description page" description.html
     mkdir -p /cluster/data/methTher1/html/
     cp ~/kent/src/hg/makeDb/trackDb/archae/methTher1/description.html \
 	/cluster/data/methTher1/html/description.html
     mkdir -p /gbdb/methTher1/html
     ln -s /cluster/data/methTher1/html/description.html /gbdb/methTher1/html/
 
 # GENBANK PROTEIN-CODING GENES (TO DO)
 
     ssh hgwdev
     mkdir /cluster/data/methTher1/genbank
     cd /cluster/data/methTher1/genbank
     cp /projects/lowelab/db/Bacteria/Sulfolobus_acidocaldarius_DSM_639/ .
     
     mv NC_003552.gbk methTher1.gbk
     # Create 3 files to assist parsing of the genbank
     # 1. for a bed file
     echo 'chr
 start
 end
 gene
 1000
 strand' > methTher1-params-bed.txt
     # 2. for the peptide parts
     echo 'gene
 translation' > methTher1-params-pep.txt
     # 3. for the other gene information
     echo 'gene
 product
 note' > methTher1-params-xra.txt
     # Now extract the genes and information:
     gbArchaeGenome methTher1.gbk methTher1-params-bed.txt methTher1-genbank-cds.bed
     gbArchaeGenome methTher1.gbk methTher1-params-pep.txt methTher1-genbank-cds.pep
     gbArchaeGenome methTher1.gbk methTher1-params-xra.txt methTher1-genbank-cds.xra
     hgLoadBed methTher1 gbProtCode methTher1-genbank-cds.bed
     hgsql methTher1 < ~/kent/src/hg/lib/pepPred.sql
     hgsql methTher1 < ~/kent/src/hg/lib/minGeneInfo.sql
     echo rename table pepPred to gbProtCodePep | hgsql methTher1
     echo rename table minGeneInfo to gbProtCodeXra | hgsql methTher1
     echo load data local infile \'methTher1-genbank-cds.pep\' into table gbProtCodePep | hgsql methTher1
     echo load data local infile \'methTher1-genbank-cds.xra\' into table gbProtCodeXra | hgsql methTher1
 
 #genbank to genePred
 
 csh
 tawk '{print $1,$2,$3,$4,$5,$6,$2,$3,0,1,$3-$2,0}' methTher1-genbank-cds.bed | bedToGenePred stdin tmp.gp
 tawk '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,substr($1,3,4),name2,"cmpl","cmpl",0}' tmp.gp  > tmp2.gp
 join -t "     " -o 1.1,1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 2.3 1.13 1.14 1.15  tmp2.gp methTher1-genbank-cds.xra > methTher1.gp
 
 # GENBANK rRNA GENES (TO DO)
     ssh hgdev
     cd /cluster/data/methTher1/genbank
     gbArchaeGenome -kind=rRNA methTher1.gbk methTher1-params-bed.txt methTher1-rrnas.bed
     echo 'gene product NA' > methTher1-params-rrna-xra.txt
     gbArchaeGenome -kind=rRNA methTher1.gbk methTher1-params-rrna-xra.txt methTher1-rrnas-xra.txt
     hgLoadBed methTher1 gbRRNA methTher1-rrnas.bed
     hgsql methTher1 < ~/kent/src/hg/lib/minGeneInfo.sql
     echo rename table minGeneInfo to gbRRNAXra | hgsql methTher1
     echo load data local infile \'methTher1-rrnas-xra.txt\' into table gbRRNAXra | hgsql methTher1
 
 # COG STUFF
     # Cut and paste http://www.ncbi.nlm.nih.gov/cgi-bin/COG/palox into emacs (COG list)
     # and save as cogpage.txt
     awk '{printf("%s\t%s\n",$6,$5)}' < cogpage.txt | sed -e 's/\[//' -e 's/\]//' > cogs.txt
     rm cogpage.txt
     # Now we have the basic list of cogs and the letter code for each one.
     
 
 # TODD LOWE tRNA GENES (TO DO)
 
     # This one is a bed 6+ file created by hand of 46 tRNAs and 1 pseudo tRNA by Todd
     # Lowe.  See ~/kent/src/hg/lib/loweTrnaGene.as for a description of the fields.
     # **Showing the tRNAScanSE instructions would be nice in the future.  
     ssh hgwdev
     mkdir /cluster/data/methTher1/bed/loweTrnaGene
     cd /cluster/data/methTher1/bed/loweTrnaGene
     hgLoadBed -tab methTher1 loweTrnaGene methTher1-lowe-trnas.bed -sqlTable=~/kent/src/hg/lib/loweTrnaGene.sql
 
 # TODD LOWE snoRNA GENES (TO DO)
     # This is a bed 6 file created by hand.
     ssh hgwdev
     mkdir /cluster/data/methTher1/bed/loweSnoGene
     cd /cluster/data/methTher1/bed/loweSnoGene
     hgLoadBed -tab methTher1 loweSnoGene methTher1-snos.bed
 
 # TIGR GENES (TO DO)
     # First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi
     # and fill out the web form as follows:
     #   - Pick "Retrieve attributes for the specified DNA feature within a specific 
     #     organism and/or a specific role category".
     #       * Pick "Pyrobaculum aerophilum IM2", and "Primary and TIGR annotation ORFs" 
     #         from the 1st and 3rd box.
     #       * Select everything from "Choose TIGR Annotation Gene Attributes"
     #       * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes"
     #       * Select everything from "Choose Other Gene Attributes"
     #   - Click submit, and click save as tab-delimited file.
     ssh hgwdev
     mkdir /cluster/data/methTher1/bed/tigrCmrORFs
     cp methTher1-tigr.tab /cluster/data/methTher1/bed/tigrCmrORFs
     cd /cluster/data/methTher1/bed/tigrCmrORFs
     /projects/lowelab/users/aamp/bin/i386/tigrCmrToBed methTher1-tigr.tab methTher1-tigr.bed
     hgLoadBed -tab methTher1 tigrCmrORFs methTher1-tigr.bed -sqlTable=~/kent/src/hg/lib/tigrCmrGene.sql