src/hg/makeDb/doc/methBark1.txt 1.3

1.3 2009/11/25 21:48:40 hiram
change autoScaleDefault to autoScale
Index: src/hg/makeDb/doc/methBark1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/methBark1.txt,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/makeDb/doc/methBark1.txt	26 Jul 2006 16:59:56 -0000	1.2
+++ src/hg/makeDb/doc/methBark1.txt	25 Nov 2009 21:48:40 -0000	1.3
@@ -1,380 +1,380 @@
 # for emacs: -*- mode: sh; -*-
 
  
 # This file describes building the browser database for the archaeal
 # species Methanosarcina barkeri
 
 # DOWNLOAD SEQUENCE FROM GENBANK (DONE)
 
     ssh eieio
     mkdir /cluster/store5/archae/methBark1
     ln -s /cluster/store5/archae/methBark1 /cluster/data/methBark1
     cd /cluster/data/methBark1
      cp /projects/lowelab/db/Bacteria/Methanosarcina_barkeri_fusaro/NC_0073*.fna .
     mv NC_007355.fna chr.fa
     mv NC_007349.fna plasmid1.fa
     # Edit header of chr.fa and plasmid1.fa to '> chr and >plasmid1
       cat chr.fa plasmid1.fa > methBark1.fa
       faToTwoBit methBark1.fa methBark1.2bit 
 
 
 # CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE)
 
     ssh hgwdev
     echo 'create database methBark1' | hgsql ''
     cd /cluster/data/methBark1
     faSize -detailed methBark1.fa > chrom.sizes
     echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \
 	    | hgsql methBark1
     echo 'INSERT INTO dbDb \
         (name, description, nibPath, organism, \
                 defaultPos, active, orderKey, genome, scientificName, \
                 htmlPath, hgNearOk) values \
         ("methBark1", "June 2002", "/gbdb/methBark1", "Methanosarcina barkeri", \
                "chr:500000-550000", 1, 251, "Methanosarcina barkeri", \
                 "Methanosarcina barkeri fusaro", "/gbdb/methBark1/html/description.html", \
                 0);' \
       | hgsql hgcentraltest
     echo 'INSERT INTO defaultDb (genome, name) values ("Methanosarcina barkeri", "methBark1");' \
       | hgsql hgcentraltest
     echo 'INSERT INTO genomeClade (genome, clade, priority) values ("Methanosarcina barkeri", "archaea",85);'  \
       | hgsql hgcentraltest
 
 # CREATE CHROMINFO TABLE (DONE)
   ssh hgwdev
   cd /cluster/data/methBark1
 
    edit chromInfo.sql; allow fileName to be null
    cp ~baertsch/kent/src/hg/lib/chromInfo.sql .
    hgsql methBark1 < chromInfo.sql
    echo "load data local infile 'chrom.sizes' into table chromInfo" | hgsql methBark1
    echo "update chromInfo set fileName = '/gbdb/methBark1/rheMac1.2bit'" | hgsql methBark1
 
     cd ~/kent/src/hg/makeDb/trackDb
 
     # add the trackDb directories
     mkdir -p archae/methBark1
     cvs add archae/methBark1
     cvs commit
 
 # GC20BASE (DONE)
     ssh kkstore02
     mkdir -p /cluster/data/methBark1/bed/gc20Base
     cd /cluster/data/methBark1/bed/gc20Base
     hgGcPercent -wigOut -doGaps -file=stdout -win=20 methBark1 \
         /cluster/data/methBark1/ | wigEncode stdin gc20Base.wig gc20Base.wib
     ssh hgwdev
     cd /cluster/data/methBark1/bed/gc20Base
     mkdir /gbdb/methBark1/wib
     ln -s `pwd`/gc20Base.wib /gbdb/methBark1/wib
     hgLoadWiggle -pathPrefix=/gbdb/methBark1/wib methBark1 gc20Base gc20Base.wig
     #	verify index is correct:
     hgsql methBark1 -e "show index from gc20Base;"
     #	should see good numbers in Cardinality column
 
 
 # TANDEM REPEAT MASKER (DONE)
 
     ssh hgwdev
     mkdir -p /custer/data/methBark1/bed/simpleRepeat
     cd /cluster/data/methBark1
     trfBig methBark1.fa /dev/null -bedAt=/cluster/data/methBark1/bed/simpleRepeat/chr.bed
     cd /cluster/data/methBark1/bed/simpleRepeat
     vi chr.bed and replace methBark1 with chr
     hgLoadBed methBark1 simpleRepeat *.bed -sqlTable=/cluster/home/baertsch/kent/src/hg/lib/simpleRepeat.sql
 
 
 # MULTIZ with other methanogens
 # DONE (10/15/05), kpollard
 
     cd /cluster/data/methBark1/bed/
     mkdir conservation
     cd conservation
     cp /cluster/data/metAce1/bed/conservation/HoxD55.q .
     cp /cluster/data/metAce1/bed/conservation/*.chr .
     cp /cluster/data/metAce1/bed/conservation/*.nib .
     cp /cluster/data/metAce1/bed/conservation/*.2bit .
 
     #chrom sizes
     faSize -detailed *.chr > chrom.sizes
 
     #blastz 
     blastz metBak0.chr metAce1.chr Q=HoxD55.q > metBak0-metAce1.lav
     blastz metBak0.chr metMaz1.chr Q=HoxD55.q > metBak0-metMaz1.lav
     blastz metBak0.chr metBur0.2bit Q=HoxD55.q > metBak0-metBur0.lav
 
     /cluster/bin/i386/lavToAxt metBak0-metAce1.lav metBak0.2bit metAce1.2bit metBak0-metAce1.axt
     /cluster/bin/i386/lavToAxt metBak0-metMaz1.lav metBak0.2bit . metBak0-metMaz1.axt
     /cluster/bin/i386/lavToAxt metBak0-metBur0.lav metBak0.2bit metBur0.2bit metBak0-metBur0.axt
 
     axtBest metBak0-metAce1.axt metBak0.2bit -winSize=500 -minScore=5000 metBak0-metAce1-best.axt
     axtBest metBak0-metMaz1.axt metBak0.2bit -winSize=500 -minScore=5000 metBak0-metMaz1-best.axt
     axtBest metBak0-metBur0.axt metBak0.2bit -winSize=500 -minScore=5000 metBak0-metBur0-best.axt
 
     axtToMaf metBak0-metAce1-best.axt chrom.sizes chrom.sizes metBak0-metAce1.maf
     axtToMaf metBak0-metMaz1-best.axt chrom.sizes chrom.sizes metBak0-metMaz1.maf
     axtToMaf metBak0-metBur0-best.axt chrom.sizes chrom.sizes metBak0-metBur0.maf
 
     #STOP: SEEMS TO ONLY USE THE FIRST contig... Need to do separately!
 
     #TRY AGAIN WITH methBark1 (assembled into 1 chr, 1 plasmid)
 
     cd /cluster/data/methBark1/bed
     mv conservation conservation.metBak0
     mkdir conservation
     cd conservation
     cp ../conservation.metBak0/HoxD55.q .
     cp ../../methBark1.fa .
     sed s/chr/methBark1.chr/ methBark1.fa > temp
     sed s/plas/methBark1.plas/ temp > methBark1.fa
     cp /cluster/data/methBurt1/methBurt1.fa .
     sed s/Contig/methBurt1.Contig/ methBurt1.fa > temp
     cat temp | gawk '{if(/Contig/){print $1;}else{print toupper($0);}}' > methBurt1.fa
     cp /cluster/data/metAce1/bed/conservation/metAce1.chr .
     cp /cluster/data/metAce1/bed/conservation/metMaz1.chr methMaze1.chr 
     sed s/tMaz/thMaze/ methMaze1.chr > temp
     mv temp methMaze1.chr
     faToNib metAce1.chr metAce1.chr.nib
     faToNib methMaze1.chr methMaze1.chr.nib
     faToTwoBit methBurt1.fa methBurt1.2bit
     faToTwoBit methBark1.fa methBark1.2bit
 
     #chrom sizes
     faSize -detailed *.chr *.fa > chrom.sizes
     
     #split methBark1.fa
     faSplit byname methBark1.fa ./
     echo "chr" > chroms.txt
     echo "plasmid1" >> chroms.txt
 
     #blastz 
     foreach f (`cat chroms.txt`)
 	echo $f
 	blastz methBark1.$f.fa metAce1.chr Q=HoxD55.q > methBark1.${f}-metAce1.lav
 	blastz methBark1.$f.fa methMaze1.chr Q=HoxD55.q > methBark1.${f}-methMaze1.lav
 	blastz methBark1.$f.fa methBurt1.fa Q=HoxD55.q > methBark1.${f}-methBurt1.lav
     end
 
     foreach f (`cat chroms.txt`)
 	echo $f
 	echo "lavToAxt"
 	lavToAxt methBark1.${f}-metAce1.lav methBark1.2bit . methBark1.${f}-metAce1.axt
 	lavToAxt methBark1.${f}-methMaze1.lav methBark1.2bit . methBark1.${f}-methMaze1.axt
 	lavToAxt methBark1.${f}-methBurt1.lav methBark1.2bit methBurt1.2bit methBark1.${f}-methBurt1.axt
 	echo "axtBest"
 	axtBest methBark1.${f}-metAce1.axt all -winSize=500 -minScore=5000  methBark1.${f}-metAce1-best.axt
 	axtBest methBark1.${f}-methMaze1.axt all -winSize=500 -minScore=5000  methBark1.${f}-methMaze1-best.axt
 	axtBest methBark1.${f}-methBurt1.axt all -winSize=500 -minScore=5000  methBark1.${f}-methBurt1-best.axt
 	echo "axtToMaf"
 	axtToMaf methBark1.${f}-metAce1-best.axt chrom.sizes chrom.sizes methBark1.${f}-metAce1.maf
 	axtToMaf methBark1.${f}-methMaze1-best.axt chrom.sizes chrom.sizes methBark1.${f}-methMaze1.maf
 	axtToMaf methBark1.${f}-methBurt1-best.axt chrom.sizes chrom.sizes methBark1.${f}-methBurt1.maf
     end
 
     #multiz
     foreach f(*.maf)
 	cat $f | gawk 'BEGIN{getline; print $0; getline; getline; getline; getline;}{print $0;}' > temp
 	mv temp $f
     end
 
     foreach f(`cat chroms.txt`)
 	echo $f
 	multiz methBark1.${f}-metAce1.maf methBark1.${f}-methMaze1.maf - > methBark1.${f}-metAce1-methMaze1.maf
 	multiz methBark1.${f}-methBurt1.maf methBark1.${f}-metAce1-methMaze1.maf - >  methBark1.${f}-metAce1-methMaze1-methBurt1.maf
     end
 
     #phyloHMM
     foreach f(`cat chroms.txt`)
 	echo $f
 	msa_view -i MAF -M methBark1.$f.fa -o SS methBark1.${f}-metAce1-methMaze1-methBurt1.maf > methBark1.$f.ss
 	cat methBark1.$f.ss | gawk '{if(/^NAMES/){print "NAMES = methBark1,metAce1,methMaze1,methBurt1";} else{print $0;}}' > temp
 	mv temp methBark1.$f.ss
 	phyloFit -i SS methBark1.$f.ss -t "(methBurt1,(methBark1,(metAce1,methMaze1)))" -o ${f}_MbaMaMmMbu
     end
     
     #chr has the largest NTUPLES (1072) so use it for starting mod
     # it shows GC=0.207678+0.206065=0.413743
     #add GC content to next call
     foreach f(`cat chroms.txt`)
 	echo $f
 	phastCons methBark1.$f.ss chr_MbaMaMmMbu.mod \
 	--gc 0.4137 --target-coverage 0.7 --estimate-trees ${f} \
 	--expected-lengths 25 --no-post-probs --ignore-missing \
 	--nrates 1,1
     end
 
     foreach f(*.cons.mod)
 	set b=$f:r:r
 	echo $b
 	phastCons methBark1.$b.ss chr.cons.mod,chr.noncons.mod \
 	--target-coverage 0.7 --expected-lengths 25 \
 	--viterbi ${b}_methBark1-elements.bed --score \
 	--require-informative 0 --seqname $b > ${b}_cons.dat
 	wigEncode ${b}_cons.dat ${b}_phastCons.wig ${b}_phastCons.wib
     end
 
     #combine phastCons elements into 1 bed file
     cat *elements.bed > phastCons.bed
 
     #move data
     mkdir wib
     mv chr_phastCons.wib wib/.
     mv plasmid1_phastCons.wib wib/.
     mv chr_phastCons.wig wib/.
     mv plasmid1_phastCons.wig wib/.
     ln -s /cluster/data/methBark1/bed/conservation/wib/*.wib /gbdb/methBark1/wib
     mkdir /gbdb/methBark1/pwMaf
     mkdir -p otherSpp/metAce1 otherSpp/methMaze1 otherSpp/methBurt1
     foreach f(`cat chroms.txt`)
 	echo $f
 	mv methBark1.${f}-metAce1.maf otherSpp/metAce1/$f.maf
 	mv methBark1.${f}-methMaze1.maf otherSpp/methMaze1/$f.maf
 	mv methBark1.${f}-methBurt1.maf otherSpp/methBurt1/$f.maf
     end
     ln -s /cluster/data/methBark1/bed/conservation/otherSpp/metAce1 /gbdb/methBark1/pwMaf/metAce1_pwMaf
     ln -s /cluster/data/methBark1/bed/conservation/otherSpp/methBurt1 /gbdb/methBark1/pwMaf/methBurt1_pwMaf
     ln -s /cluster/data/methBark1/bed/conservation/otherSpp/methMaze1 /gbdb/methBark1/pwMaf/methMaze1_pwMaf
     mkdir multiz
     foreach f(`cat chroms.txt`)
 	echo $f
 	mv methBark1.${f}-metAce1-methMaze1-methBurt1.maf multiz/$f.maf
     end
     ln -s /cluster/data/methBark1/bed/conservation/multiz /gbdb/methBark1/multizMbaMaMmMbu
 
     #load
     hgLoadWiggle methBark1 phastCons /cluster/data/methBark1/bed/conservation/wib/*_phastCons.wig
     hgLoadMaf -warn methBark1 multizMbaMaMmMbu
     hgLoadMaf -warn methBark1 metAce1_pwMaf -pathPrefix=/gbdb/methBark1/pwMaf/metAce1_pwMaf
     hgLoadMaf -warn methBark1 methBurt1_pwMaf -pathPrefix=/gbdb/methBark1/pwMaf/methBurt1_pwMaf
     hgLoadMaf -warn methBark1 methMaze1_pwMaf -pathPrefix=/gbdb/methBark1/pwMaf/methMaze1_pwMaf
     hgLoadBed methBark1 phastConsElements phastCons.bed 
 
     #trackDb
     cd ~/kent/src/hg/makeDb/trackDb/archae/
     mkdir methBark1
     cvs add methBark1
     cd methBark1
     #trackDb.ra entry
     # track multizMbaMaMmMbu
     # shortLabel Conservation
     # longLabel Methanogen multiz alignments
     # group compGeno
     # priority 10.0
     # visibility pack
     # type wigMaf 0.0 1.0
     # maxHeightPixels 100:40:11
     # wiggle phastCons
     # yLineOnOff Off
-    # autoScaleDefault Off
+    # autoScale Off
     # pairwise pwMaf
     # speciesOrder metAce1 methMaze1 methBurt1
     cvs add trackDb.ra
     cvs commit -m "New multiz track" trackDb.ra
     #html page
     cvs add multizMbaMaMmMbu.html
     cvs commit -m "Details page for multiz track" multizMbaMaMmMbu.html
 
 # DESCRIPTION PAGE ()
 
     ssh hgwdev
     # Write ~/kent/src/hg/makeDb/trackDb/archae/methBark1/description.html
     chmod a+r ~/kent/src/hg/makeDb/trackDb/archae/methBark1/description.html
     # Check it in.
     mkdir /gbdb/methBark1/html
     ln -s /cluster/data/methBark1/html/description.html /gbdb/methBark1/html/
 
 # GENBANK PROTEIN-CODING GENES ()
 
     ssh hgwdev
     mkdir /cluster/data/methBark1/genbank
     cd /cluster/data/methBark1/genbank
     wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Methanosarcina_acetivorans/NC_003552.gbk
     mv NC_003552.gbk methBark1.gbk
     # Create 3 files to assist parsing of the genbank
     # 1. for a bed file
     echo 'chr
 start
 end
 gene
 1000
 strand' > methBark1-params-bed.txt
     # 2. for the peptide parts
     echo 'gene
 translation' > methBark1-params-pep.txt
     # 3. for the other gene information
     echo 'gene
 product
 note' > methBark1-params-xra.txt
     # Now extract the genes and information:
     gbArchaeGenome methBark1.gbk methBark1-params-bed.txt methBark1-genbank-cds.bed
     gbArchaeGenome methBark1.gbk methBark1-params-pep.txt methBark1-genbank-cds.pep
     gbArchaeGenome methBark1.gbk methBark1-params-xra.txt methBark1-genbank-cds.xra
     hgLoadBed methBark1 gbProtCode methBark1-genbank-cds.bed
     hgsql methBark1 < ~/kent/src/hg/lib/pepPred.sql
     hgsql methBark1 < ~/kent/src/hg/lib/minGeneInfo.sql
     echo rename table pepPred to gbProtCodePep | hgsql methBark1
     echo rename table minGeneInfo to gbProtCodeXra | hgsql methBark1
     echo load data local infile \'methBark1-genbank-cds.pep\' into table gbProtCodePep | hgsql methBark1
     echo load data local infile \'methBark1-genbank-cds.xra\' into table gbProtCodeXra | hgsql methBark1
 
 #genbank to genePred
 
 csh
 tawk '{print $1,$2,$3,$4,$5,$6,$2,$3,0,1,$3-$2,0}' methBark1-genbank-cds.bed | bedToGenePred stdin tmp.gp
 tawk '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,substr($1,3,4),name2,"cmpl","cmpl",0}' tmp.gp  > tmp2.gp
 join -t "     " -o 1.1,1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 2.3 1.13 1.14 1.15  tmp2.gp methBark1-genbank-cds.xra > methBark1.gp
 
 # GENBANK rRNA GENES ()
     ssh hgdev
     cd /cluster/data/methBark1/genbank
     gbArchaeGenome -kind=rRNA methBark1.gbk methBark1-params-bed.txt methBark1-rrnas.bed
     echo 'gene product NA' > methBark1-params-rrna-xra.txt
     gbArchaeGenome -kind=rRNA methBark1.gbk methBark1-params-rrna-xra.txt methBark1-rrnas-xra.txt
     hgLoadBed methBark1 gbRRNA methBark1-rrnas.bed
     hgsql methBark1 < ~/kent/src/hg/lib/minGeneInfo.sql
     echo rename table minGeneInfo to gbRRNAXra | hgsql methBark1
     echo load data local infile \'methBark1-rrnas-xra.txt\' into table gbRRNAXra | hgsql methBark1
 
 # COG STUFF
     # Cut and paste http://www.ncbi.nlm.nih.gov/cgi-bin/COG/palox into emacs (COG list)
     # and save as cogpage.txt
     awk '{printf("%s\t%s\n",$6,$5)}' < cogpage.txt | sed -e 's/\[//' -e 's/\]//' > cogs.txt
     rm cogpage.txt
     # Now we have the basic list of cogs and the letter code for each one.
     
 
 # TODD LOWE tRNA GENES ()
 
     # This one is a bed 6+ file created by hand of 46 tRNAs and 1 pseudo tRNA by Todd
     # Lowe.  See ~/kent/src/hg/lib/loweTrnaGene.as for a description of the fields.
     # **Showing the tRNAScanSE instructions would be nice in the future.  
     ssh hgwdev
     mkdir /cluster/data/methBark1/bed/loweTrnaGene
     cd /cluster/data/methBark1/bed/loweTrnaGene
     hgLoadBed -tab methBark1 loweTrnaGene methBark1-lowe-trnas.bed -sqlTable=~/kent/src/hg/lib/loweTrnaGene.sql
 
 # TODD LOWE snoRNA GENES ()
     # This is a bed 6 file created by hand.
     ssh hgwdev
     mkdir /cluster/data/methBark1/bed/loweSnoGene
     cd /cluster/data/methBark1/bed/loweSnoGene
     hgLoadBed -tab methBark1 loweSnoGene methBark1-snos.bed
 
 # TIGR GENES ()
     # First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi
     # and fill out the web form as follows:
     #   - Pick "Retrieve attributes for the specified DNA feature within a specific 
     #     organism and/or a specific role category".
     #       * Pick "Pyrobaculum aerophilum IM2", and "Primary and TIGR annotation ORFs" 
     #         from the 1st and 3rd box.
     #       * Select everything from "Choose TIGR Annotation Gene Attributes"
     #       * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes"
     #       * Select everything from "Choose Other Gene Attributes"
     #   - Click submit, and click save as tab-delimited file.
     ssh hgwdev
     mkdir /cluster/data/methBark1/bed/tigrCmrORFs
     cp methBark1-tigr.tab /cluster/data/methBark1/bed/tigrCmrORFs
     cd /cluster/data/methBark1/bed/tigrCmrORFs
     /projects/lowelab/users/aamp/bin/i386/tigrCmrToBed methBark1-tigr.tab methBark1-tigr.bed
     hgLoadBed -tab methBark1 tigrCmrORFs methBark1-tigr.bed -sqlTable=~/kent/src/hg/lib/tigrCmrGene.sql