src/hg/makeDb/doc/methKand1.txt 1.3
1.3 2009/11/25 21:48:41 hiram
change autoScaleDefault to autoScale
Index: src/hg/makeDb/doc/methKand1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/methKand1.txt,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/makeDb/doc/methKand1.txt 26 Jul 2006 16:59:56 -0000 1.2
+++ src/hg/makeDb/doc/methKand1.txt 25 Nov 2009 21:48:41 -0000 1.3
@@ -1,306 +1,306 @@
# for emacs: -*- mode: sh; -*-
# This file describes building the browser database for the archaeal
# species Methanosarcina acetivorans.
# DOWNLOAD SEQUENCE FROM GENBANK (DONE)
ssh eieio
mkdir /cluster/store5/archae/methKand1
ln -s /cluster/store5/archae/methKand1 /cluster/data/methKand1
cd /cluster/data/methKand1
cp /projects/lowelab/db/Bacteria/Methanopyrus_kandleri/NC_003551.fna .
mv NC_003551.fna NC_003551.fa
# Edit header of *.fa to '> methKand1 >smallextrachr >largeextrachr'
cat NC_003551.fa > methKand1.fa
faToTwoBit methKand1.fa methKand1.2bit
# CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE)
ssh hgwdev
echo 'create database methKand1' | hgsql ''
cd /cluster/data/methKand1
faSize -detailed methKand1.fa > chrom.sizes
echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \
| hgsql methKand1
echo 'INSERT INTO dbDb \
(name, description, nibPath, organism, \
defaultPos, active, orderKey, genome, scientificName, \
htmlPath, hgNearOk) values \
("methKand1", "April 2002", "/gbdb/methKand1", "Methanopyrus kandleri", \
"chr:500000-550000", 1, 242, "Methanopyrus kandleri", \
"Methanopyrus kandleri AV19", "/gbdb/methKand1/html/description.html", \
0);' \
| hgsql hgcentraltest
echo 'INSERT INTO defaultDb (genome, name) values ("Methanopyrus kandleri", "methKand1");' \
| hgsql hgcentraltest
echo 'INSERT INTO genomeClade (genome, clade, priority) values ("Methanopyrus kandleri", "archaea",85);' \
| hgsql hgcentraltest
# CREATE CHROMINFO TABLE (DONE)
ssh hgwdev
cd /cluster/data/methKand1
cp ~baertsch/kent/src/hg/lib/chromInfo.sql .
hgsql methKand1 < chromInfo.sql
echo "load data local infile 'chrom.sizes' into table chromInfo" | hgsql methKand1
echo "update chromInfo set fileName = '/gbdb/methKand1/methKand1.2bit'" | hgsql methKand1
cd ~/kent/src/hg/makeDb/trackDb
# add the trackDb directories
mkdir -p archae/methKand1
cvs add archae/methKand1
cvs commit
cd ~/kent/src/hg/makeDb/trackDb
# edit the trackDb makefile
# add the trackDb directories
mkdir -p archae/methKand1
cvs add archae
cvs add archae/methKand1
cvs commit
# GC20BASE (DONE)
ssh kkstore02
mkdir -p /cluster/data/methKand1/bed/gc20Base
cd /cluster/data/methKand1/bed/gc20Base
hgGcPercent -wigOut -doGaps -file=stdout -win=20 methKand1 \
/cluster/data/methKand1/ | wigEncode stdin gc20Base.wig gc20Base.wib
ssh hgwdev
cd /cluster/data/methKand1/bed/gc20Base
mkdir /gbdb/methKand1/wib
ln -s `pwd`/gc20Base.wib /gbdb/methKand1/wib
hgLoadWiggle -pathPrefix=/gbdb/methKand1/wib methKand1 gc20Base gc20Base.wig
# verify index is correct:
hgsql methKand1 -e "show index from gc20Base;"
# should see good numbers in Cardinality column
# TANDEM REPEAT MASKER (DONE)
ssh hgwdev
mkdir -p /cluster/data/methKand1/bed/simpleRepeat
cd /cluster/data/methKand1
trfBig methKand1.fa /dev/null -bedAt=/cluster/data/methKand1/bed/simpleRepeat/chr.bed
cd /cluster/data/methKand1/bed/simpleRepeat
hgLoadBed methKand1 simpleRepeat *.bed -sqlTable=/cluster/home/baertsch/kent/src/hg/lib/simpleRepeat.sql
# MULTIZ with methKand1, methJann1, methTher1
# DONE (10/11/05), kpollard
cd /cluster/data/methKand1/bed/
mkdir conservation
cd conservation
cp /cluster/data/metMar1/bed/conservation/HoxD55.q .
cp /cluster/data/metMar1/bed/conservation/metMar1.chr .
cp /cluster/data/metMar1/bed/conservation/methJann1.chr .
cp /cluster/data/metMar1/bed/conservation/methKand1.chr .
cp /cluster/data/metMar1/bed/conservation/methTher1.chr .
cp /cluster/data/metMar1/bed/conservation/metMar1.chr.nib .
cp /cluster/data/metMar1/bed/conservation/methKand1.chr.nib .
cp /cluster/data/metMar1/bed/conservation/methTher1.chr.nib .
cp /cluster/data/metMar1/bed/conservation/methJann1.2bit .
#chrom sizes
faSize -detailed *.chr > chrom.sizes
#blastz
blastz methKand1.chr methJann1.chr Q=HoxD55.q > methKand1-methJann1.lav
blastz methKand1.chr methTher1.chr Q=HoxD55.q > methKand1-methTher1.lav
blastz methKand1.chr metMar1.chr Q=HoxD55.q > methKand1-metMar1.lav
/cluster/bin/i386/lavToAxt methKand1-methJann1.lav . methJann1.2bit methKand1-methJann1.axt
/cluster/bin/i386/lavToAxt methKand1-methTher1.lav . . methKand1-methTher1.axt
/cluster/bin/i386/lavToAxt methKand1-metMar1.lav . . methKand1-metMar1.axt
axtBest methKand1-methJann1.axt methKand1.chr -winSize=500 -minScore=5000 methKand1-methJann1-best.axt
axtBest methKand1-methTher1.axt methKand1.chr -winSize=500 -minScore=5000 methKand1-methTher1-best.axt
axtBest methKand1-metMar1.axt methKand1.chr -winSize=500 -minScore=5000 methKand1-metMar1-best.axt
axtToMaf methKand1-methJann1-best.axt chrom.sizes chrom.sizes methKand1-methJann1.maf
axtToMaf methKand1-methTher1-best.axt chrom.sizes chrom.sizes methKand1-methTher1.maf
axtToMaf methKand1-metMar1-best.axt chrom.sizes chrom.sizes methKand1-metMar1.maf
#multiz
#remove extra header lines
multiz methKand1-methJann1.maf methKand1-metMar1.maf - > methKand1-methJann1-metMar1.maf
multiz methKand1-methTher1.maf methKand1-methJann1-metMar1.maf - > methKand1-methJann1-metMar1-methTher1.maf
#phyloHMM
/cluster/bin/phast/msa_view -i MAF -M methKand1.chr -o SS methKand1-methJann1-metMar1-methTher1.maf > methKand1.ss
/cluster/bin/phast/phyloFit -i SS methKand1.ss -t "(methKand1,(methTher1,(methJann1,metMar1)))" -o MkMjMmMt
/cluster/bin/phast/msa_view -i SS methKand1.ss --summary-only
#add GC content to next call
/cluster/bin/phast/phastCons methKand1.ss MkMjMmMt.mod --gc 0.5354 \
--target-coverage 0.7 --estimate-trees met-tree \
--expected-lengths 25 --no-post-probs --ignore-missing \
--nrates 1,1
/cluster/bin/phast/phastCons methKand1.ss \
met-tree.cons.mod,met-tree.noncons.mod \
--target-coverage 0.7 --expected-lengths 25 \
--viterbi methKand1-elements.bed --score \
--require-informative 0 --seqname chr > cons.dat
wigEncode cons.dat phastCons.wig phastCons.wib
/cluster/bin/phast/draw_tree MkMjMmMt.mod > met-tree.ps
#compare to metMar1 met-tree.ps
#move data
mkdir wib
mv phastCons.wib wib/phastCons.wib
mv phastCons.wig wib/phastCons.wig
ln -s /cluster/data/methKand1/bed/conservation/wib/phastCons.wib /gbdb/methKand1/wib
mkdir /gbdb/methKand1/pwMaf
mkdir -p otherSpp/methJann1 otherSpp/methTher1 otherSpp/metMar1
mv methKand1-methTher1.maf otherSpp/methTher1/chr.maf
mv methKand1-methJann1.maf otherSpp/methJann1/chr.maf
mv methKand1-metMar1.maf otherSpp/metMar1/chr.maf
ln -s /cluster/data/methKand1/bed/conservation/otherSpp/methTher1 /gbdb/methKand1/pwMaf/methTher1_pwMaf
ln -s /cluster/data/methKand1/bed/conservation/otherSpp/metMar1 /gbdb/methKand1/pwMaf/metMar1_pwMaf
ln -s /cluster/data/methKand1/bed/conservation/otherSpp/methJann1 /gbdb/methKand1/pwMaf/methJann1_pwMaf
mkdir multiz
mv methKand1-methJann1-metMar1-methTher1.maf multiz/chr.maf
ln -s /cluster/data/methKand1/bed/conservation/multiz /gbdb/methKand1/multizMkMjMmMt
#load
hgLoadWiggle methKand1 phastCons /cluster/data/methKand1/bed/conservation/wib/phastCons.wig
hgLoadMaf -warn methKand1 multizMkMjMmMt
hgLoadMaf -warn methKand1 methTher1_pwMaf -pathPrefix=/gbdb/methKand1/pwMaf/methTher1_pwMaf
hgLoadMaf -warn methKand1 metMar1_pwMaf -pathPrefix=/gbdb/methKand1/pwMaf/metMar1_pwMaf
hgLoadMaf -warn methKand1 methJann1_pwMaf -pathPrefix=/gbdb/methKand1/pwMaf/methJann1_pwMaf
hgLoadBed methKand1 phastConsElements methKand1-elements.bed
#trackDb
cd ~/kent/src/hg/makeDb/trackDb/archae/
mkdir methKand1
cvs add methKand1
cd methKand1
#trackDb.ra entry
# track multizMkMjMmMt
# shortLabel Conservation
# longLabel Thermoplasma/Ferroplasma/Picrophilus multiz alignments
# group compGeno
# priority 10.0
# visibility pack
# type wigMaf 0.0 1.0
# maxHeightPixels 100:40:11
# wiggle phastCons
# yLineOnOff Off
- # autoScaleDefault Off
+ # autoScale Off
# pairwise pwMaf
# speciesOrder methJann1 methTher1 metMar1
cvs add trackDb.ra
cvs commit -m "New multiz track" trackDb.ra
#html page
cvs add multizMkMjMmMt.html
cvs commit -m "Details page for multiz track" multizMkMjMmMt.html
# DESCRIPTION PAGE ()
ssh hgwdev
# Write ~/kent/src/hg/makeDb/trackDb/archae/methKand1/description.html
chmod a+r ~/kent/src/hg/makeDb/trackDb/archae/methKand1/description.html
# Check it in.
mkdir /gbdb/methKand1/html
ln -s /cluster/data/methKand1/html/description.html /gbdb/methKand1/html/
# GENBANK PROTEIN-CODING GENES ()
ssh hgwdev
mkdir /cluster/data/methKand1/genbank
cd /cluster/data/methKand1/genbank
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Methanosarcina_acetivorans/NC_003552.gbk
mv NC_003552.gbk methKand1.gbk
# Create 3 files to assist parsing of the genbank
# 1. for a bed file
echo 'chr
start
end
gene
1000
strand' > methKand1-params-bed.txt
# 2. for the peptide parts
echo 'gene
translation' > methKand1-params-pep.txt
# 3. for the other gene information
echo 'gene
product
note' > methKand1-params-xra.txt
# Now extract the genes and information:
gbArchaeGenome methKand1.gbk methKand1-params-bed.txt methKand1-genbank-cds.bed
gbArchaeGenome methKand1.gbk methKand1-params-pep.txt methKand1-genbank-cds.pep
gbArchaeGenome methKand1.gbk methKand1-params-xra.txt methKand1-genbank-cds.xra
hgLoadBed methKand1 gbProtCode methKand1-genbank-cds.bed
hgsql methKand1 < ~/kent/src/hg/lib/pepPred.sql
hgsql methKand1 < ~/kent/src/hg/lib/minGeneInfo.sql
echo rename table pepPred to gbProtCodePep | hgsql methKand1
echo rename table minGeneInfo to gbProtCodeXra | hgsql methKand1
echo load data local infile \'methKand1-genbank-cds.pep\' into table gbProtCodePep | hgsql methKand1
echo load data local infile \'methKand1-genbank-cds.xra\' into table gbProtCodeXra | hgsql methKand1
#genbank to genePred
csh
tawk '{print $1,$2,$3,$4,$5,$6,$2,$3,0,1,$3-$2,0}' methKand1-genbank-cds.bed | bedToGenePred stdin tmp.gp
tawk '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,substr($1,3,4),name2,"cmpl","cmpl",0}' tmp.gp > tmp2.gp
join -t " " -o 1.1,1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 2.3 1.13 1.14 1.15 tmp2.gp methKand1-genbank-cds.xra > methKand1.gp
# GENBANK rRNA GENES ()
ssh hgdev
cd /cluster/data/methKand1/genbank
gbArchaeGenome -kind=rRNA methKand1.gbk methKand1-params-bed.txt methKand1-rrnas.bed
echo 'gene product NA' > methKand1-params-rrna-xra.txt
gbArchaeGenome -kind=rRNA methKand1.gbk methKand1-params-rrna-xra.txt methKand1-rrnas-xra.txt
hgLoadBed methKand1 gbRRNA methKand1-rrnas.bed
hgsql methKand1 < ~/kent/src/hg/lib/minGeneInfo.sql
echo rename table minGeneInfo to gbRRNAXra | hgsql methKand1
echo load data local infile \'methKand1-rrnas-xra.txt\' into table gbRRNAXra | hgsql methKand1
# COG STUFF ()
# Cut and paste http://www.ncbi.nlm.nih.gov/cgi-bin/COG/palox into emacs (COG list)
# and save as cogpage.txt
awk '{printf("%s\t%s\n",$6,$5)}' < cogpage.txt | sed -e 's/\[//' -e 's/\]//' > cogs.txt
rm cogpage.txt
# Now we have the basic list of cogs and the letter code for each one.
# TODD LOWE tRNA GENES ()
# This one is a bed 6+ file created by hand of 46 tRNAs and 1 pseudo tRNA by Todd
# Lowe. See ~/kent/src/hg/lib/loweTrnaGene.as for a description of the fields.
# **Showing the tRNAScanSE instructions would be nice in the future.
ssh hgwdev
mkdir /cluster/data/methKand1/bed/loweTrnaGene
cd /cluster/data/methKand1/bed/loweTrnaGene
hgLoadBed -tab methKand1 loweTrnaGene methKand1-lowe-trnas.bed -sqlTable=~/kent/src/hg/lib/loweTrnaGene.sql
# TODD LOWE snoRNA GENES ()
# This is a bed 6 file created by hand.
ssh hgwdev
mkdir /cluster/data/methKand1/bed/loweSnoGene
cd /cluster/data/methKand1/bed/loweSnoGene
hgLoadBed -tab methKand1 loweSnoGene methKand1-snos.bed
# TIGR GENES ()
# First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi
# and fill out the web form as follows:
# - Pick "Retrieve attributes for the specified DNA feature within a specific
# organism and/or a specific role category".
# * Pick "Pyrobaculum aerophilum IM2", and "Primary and TIGR annotation ORFs"
# from the 1st and 3rd box.
# * Select everything from "Choose TIGR Annotation Gene Attributes"
# * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes"
# * Select everything from "Choose Other Gene Attributes"
# - Click submit, and click save as tab-delimited file.
ssh hgwdev
mkdir /cluster/data/methKand1/bed/tigrCmrORFs
cp methKand1-tigr.tab /cluster/data/methKand1/bed/tigrCmrORFs
cd /cluster/data/methKand1/bed/tigrCmrORFs
/projects/lowelab/users/aamp/bin/i386/tigrCmrToBed methKand1-tigr.tab methKand1-tigr.bed
hgLoadBed -tab methKand1 tigrCmrORFs methKand1-tigr.bed -sqlTable=~/kent/src/hg/lib/tigrCmrGene.sql