src/hg/makeDb/doc/pyrFur2.txt 1.4
1.4 2009/11/25 21:48:42 hiram
change autoScaleDefault to autoScale
Index: src/hg/makeDb/doc/pyrFur2.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/pyrFur2.txt,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/makeDb/doc/pyrFur2.txt 14 Sep 2006 20:16:43 -0000 1.3
+++ src/hg/makeDb/doc/pyrFur2.txt 25 Nov 2009 21:48:42 -0000 1.4
@@ -1,389 +1,389 @@
# for emacs: -*- mode: sh; -*-
# This file describes building the browser database for the archaeal
# species Pyrococcus furiosus
# DOWNLOAD SEQUENCE FROM GENBANK (DONE 02/10/04)
ssh eieo
mkdir /cluster/store5/archae/pyrFur2
ln -s /cluster/store5/archae/pyrFur2 /cluster/data/pyrFur2
cd /cluster/data/pyrFur2
wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Pyrococcus_furiosus/NC_003413.fna
mv NC_003413.fna chr1.fa
# Edit header of chr1.fa to '> pyrAer1'
# CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE 02/10/04)
ssh hgwdev
echo 'create database pyrFur2' | hgsql ''
cd /cluster/data/pyrFur2
hgNibSeq pyrFur2 /cluster/data/pyrFur2/nib chr1.fa
faSize -detailed chr1.fa > chrom.sizes
mkdir -p /gbdb/pyrFur2/nib
echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \
| hgsql pyrFur2
echo 'INSERT INTO dbDb \
(name, description, nibPath, organism, \
defaultPos, active, orderKey, genome, scientificName, \
htmlPath, hgNearOk) values \
("pyrFur2", "Pyrococcus furiosus", "/gbdb/pyrFur2/nib", "P. furiosus", \
"chr1:500000-550000", 1, 85, "Archae", \
"Pyrococcus furiosus", "/gbdb/pyrFur2/html/description.html", \
0);' \
| hgsql -h genome-testdb hgcentraltest
cd ~/kent/src/hg/makeDb/trackDb
# edit the trackDb makefile
# add the trackDb directories
mkdir -p archae/pyrFur2
cvs add archae
cvs add archae/pyrFur2
cvs commit
# GC PERCENT TRACK (DONE 02/10/04)
ssh hgwdev
mkdir -p /cluster/data/pyrFur2/bed/gcPercent
cd /cluster/data/pyrFur2/bed/gcPercent
hgsql pyrFur2 < ~/kent/src/hg/lib/gcPercent.sql
hgGcPercent -win=2000 pyrFur2 ../../nib
# edit ~/kent/src/hg/makeDb/trackDb/archae/trackDb.ra and add an entry for
# GC percent for 2,000 base windows instead (simply cut and paste
# ~/kent/src/hg/makeDb/trackDb/trackDb.ra).
# GC 20 BASE WIGGLE TRACK (DONE 8/25)
mkdir /cluster/data/pyrFur2/bed/gc20Base
cd /cluster/data/pyrFur2/bed/gc20Base
mkdir wigData20 dataLimits20
hgGcPercent -chr=chr -file=stdout -win=20 -overlap=19 pyrFur2 ../../nib | grep -w GC | \
awk '
{
bases = $3 - $2
perCent = $5/10.0
printf "%d\t%.1f\n", $2+1, perCent
}' | wigAsciiToBinary -dataSpan=1 -chrom=chr \
-wibFile=wigData20/gc20Base_1 -name=1 stdin > dataLimits20/chr
hgLoadWiggle pyrFur2 gc20Base wigData20/*.wig
mkdir /gbdb/pyrFur2/wib
ln -s `pwd`/wigData20/*.wib /gbdb/pyrFur2/wib
# GENBANK PROTEIN-CODING GENES (DONE 01/14/04)
ssh hgwdev
mkdir /cluster/data/pyrFur2/genbank
cd /cluster/data/pyrFur2/genbank
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Pyrococcus_furiosus/NC_003413.gbk
mv NC_003413.gbk pyrFur2.gbk
# Create 3 files to assist parsing of the genbank
# 1. for a bed file
echo 'chr1
start
end
locus_tag
1000
strand' > pyrFur2-params-bed.txt
# 2. for the peptide parts
echo 'locus_tag
translation' > pyrFur2-params-pep.txt
# 3. for the other gene information
echo 'locus_tag
product
note' > pyrFur2-params-xra.txt
# Now extract the genes and information:
gbArchaeGenome pyrFur2.gbk pyrFur2-params-bed.txt pyrFur2-genbank-cds.bed
gbArchaeGenome pyrFur2.gbk pyrFur2-params-pep.txt pyrFur2-genbank-cds.pep
gbArchaeGenome pyrFur2.gbk pyrFur2-params-xra.txt pyrFur2-genbank-cds.xra
hgLoadBed pyrFur2 gbProtCode pyrFur2-genbank-cds.bed
hgsql pyrFur2 < ~/kent/src/hg/lib/pepPred.sql
hgsql pyrFur2 < ~/kent/src/hg/lib/minGeneInfo.sql
echo rename table pepPred to gbProtCodePep | hgsql pyrFur2
echo rename table minGeneInfo to gbProtCodeXra | hgsql pyrFur2
echo load data local infile \'pyrFur2-genbank-cds.pep\' into table gbProtCodePep | hgsql pyrFur2
echo load data local infile \'pyrFur2-genbank-cds.xra\' into table gbProtCodeXra | hgsql pyrFur2
# TIGR GENES (DONE 02/10/04)
# First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi
# and fill out the web form as follows:
# - Pick "Retrieve attributes for the specified DNA feature within a specific
# organism and/or a specific role category".
# * Pick "Pyrobaculum aerophilum IM2", and "Primary and TIGR annotation ORFs"
# from the 1st and 3rd box.
# * Select everything from "Choose TIGR Annotation Gene Attributes"
# * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes"
# * Select everything from "Choose Other Gene Attributes"
# - Click submit, and click save as tab-delimited file.
ssh hgwdev
mkdir /cluster/data/pyrFur2/bed/tigrCmrORFs
cp pyrFur2-tigr.tab /cluster/data/pyrFur2/bed/tigrCmrORFs
cd /cluster/data/pyrFur2/bed/tigrCmrORFs
tigrCmrToBed pyrFur2-tigr.tab pyrFur2-tigr.bed
hgLoadBed -tab pyrFur2 tigrCmrGene pyrFur2-tigr.bed -sqlTable=~/kent/src/hg/lib/tigrCmrGene.sql
echo RENAME TABLE tigrCmrGene to tigrCmrORFs | hgsql pyrFur2
# Lowe Lab Microarrays (DONE 02/10/04)
ssh hgwdev
mkdir /cluster/data/pyrFur2/bed/llaPfuPrintA
cp Pfu-arrays.{bed,exps} /cluster/data/pyrFur2/bed/llaPfuPrintA
cd /cluster/data/pyrFur2/bed/llaPfuPrintA
hgLoadBed pyrFur2 llaPfuPrintA Pfu-arrays.bed
hgsql pyrFur2 < ~/kent/src/hg/lib/expRecord.sql
echo RENAME TABLE expRecord to llaPfuPrintAExps | hgsql pyrFur2
echo LOAD DATA LOCAL INFILE \'Pfu-arrays.exps\' INTO TABLE llaPfuPrintAExps | hgsql pyrFur2
# for now I'm using both hgFixed and the pyrFur2 database for this... that will change.
hgsql hgFixed < ~/kent/src/hg/lib/expRecord.sql
echo RENAME TABLE expRecord to llaPfuPrintAExps | hgsql hgFixed
echo LOAD DATA LOCAL INFILE \'Pfu-arrays.exps\' INTO TABLE llaPfuPrintAExps | hgsql hgFixed
# CHANGE "chr1" to "chr"
ssh hgdev
mv /cluster/data/pyrFur2/nib/chr1.nib /cluster/data/pyrFur2/nib/chr.nib
rm /gbdb/pyrFur2/nib/chr1.nib
ln -s /cluster/data/pyrFur2/nib/chr.nib /gbdb/pyrFur2/nib/chr.nib
# a quick script to replace chr1 with chr
#!/bin/bash
sed 's/chr1/chr/g' $1 > /tmp/whatever
mv /tmp/whatever $1
cd /cluster/data/pyrFur2/bed
find -name '*.bed' | xargs changeCh.sh
# Now change the DB
cd /tmp
hgsqldump pyrFur2 | sed 's/chr1/chr/g' > pfuNew.sql
echo drop database pyrFur2 | hgsql
echo create database pyrFur2 | hgsql
hgsql pyrFur2 < pfuNew.sql
rm pfuNew.sql
echo 'update dbDb set defaultPos="chr:550000-580000" where name="pyrFur2"' | hgsql -h genome-testdb hgcentraltest
# promoter track
ssh hgwdev
mkdir -p /cluster/data/pyrFur2/wiggle/promoterScanPos
cd /cluster/data/pyrFur2/wiggle/promoterScanPos
cp plus.wig.gz .
wigAsciiToBinary -chrom=chr -wibFile=promoterScanPos plus.wig.gz
hgLoadWiggle -pathPrefix=/gbdb/pyrFur2/wib/promoterScanPos pyrFur2 promoterScanPos promoterScanPos.wig
mkdir /gbdb/pyrFur2/wib/promoterScanPos
ln -s `pwd`/*.wib /gbdb/pyrFur2/wib/promoterScanPos/
mkdir /cluster/data/pyrFur2/promoterScanNeg
cd /cluster/data/pyrFur2/promoterScanNeg
cp minus.wig.gz .
wigAsciiToBinary -chrom=chr -wibFile=promoterScanNeg minus.wig.gz
hgLoadWiggle -pathPrefix=/gbdb/pyrFur2/wib/promoterScanNeg pyrFur2 promoterScanNeg promoterScanNeg.wig
mkdir /gbdb/pyrFur2/wib/promoterScanNeg
ln -s `pwd`/*.wib /gbdb/pyrFur2/wib/promoterScanNeg/
# shine DG track
cd /cluster/data/pyrFur2/wiggle
mkdir shineDGPos
mkdir shineDGNeg
cd shineDGPos
cp shine.pos.gz .
wigAsciiToBinary -chrom=chr -wibFile=shineDGPos shine.pos.gz
hgLoadWiggle pyrFur2 shineDGPos shineDGPos.wig
cd ../shineDGNeg
cp shine.neg.gz .
wigAsciiToBinary -chrom=chr -wibFile=shineDGNeg shine.neg.gz
hgLoadWiggle pyrFur2 shineDGNeg shineDGNeg.wig
cd /gbdb/pyrFur2/wib
ln -s /cluster/data/pyrFur2/wiggle/shineDGPos/shineDGPos.wib shineDGPos.wib
ln -s /cluster/data/pyrFur2/wiggle/shineDGNeg/shineDGNeg.wib shineDGNeg.wib
# Rfam TRACK
cd /cluster/data/pyrFur2/bed
mkdir Rfam
cd Rfam/
# file
cp pfu.rfam.bed .
hgLoadBed pyrFur2 Rfam pfu.rfam.bed
# trackDb entry:
track Rfam
shortLabel Rfam
longLabel Rfam noncoding RNA
group genes
priority 2.3
visibility pack
color 43,127,60
type bed 6 .
# Heatshock array track
cd /cluster/data/pyrFur2/bed
mkdir llaPfuPrintC
cd llaPfuPrintC/
cp Pfu-hs.bed .
cp Pfu-hs.other .
hgLoadBed pyrFur2 llaPfuPrintC Pfu-hs.bed
hgsql pyrFur2 < ~/kent/src/hg/lib/expRecord.sql
echo 'rename table expRecord to llaPfuPrintCExps' | hgsql pyrFur2
hgsql hgFixed < ~/kent/src/hg/lib/expRecord.sql
echo 'rename table expRecord to llaPfuPrintCExps' | hgsql hgFixed
echo 'load data local infile "Pfu-hs.other" into table llaPfuPrintCExps' | hgsql pyrFur2
echo 'load data local infile "Pfu-hs.other" into table llaPfuPrintCExps' | hgsql hgFixed
# trackDb entry:
track llaPfuPrintC
shortLabel HS Microarray
longLabel Heat Shock full-genome P. furiosus microarray experiments
group regulation
priority 86.0
visibility dense
type expRatio
expScale 4.0
expStep 0.5
expTable llaPfuPrintCExps
chip printA
canPack off
# Matt's Operons
cd /cluster/data/pyrFur2/bed/llOperons
cp matt.bed .
hgLoadBed pyrFur2 llOperons matt.bed
# RNA genes
cd /cluster/data/pyrFur2/bed
mkdir rnaGenes
cp pfuAllRNA.bed .
cp ~/kent/src/hg/lib/rnaGenes.sql .
hgLoadBed -sqlTable=rnaGenes.sql pyrFur2 rnaGenes pfuAllRNA.bed
# EasyGene
cd /cluster/data/pyrFur2/bed
mkdir easyGene
cd easyGene
wget -O easyGene.gff "http://www.binf.ku.dk/cgi-bin/easygene/output?id=129&format=gff&desc=CDS&desc=ORF&desc=ALT&r_cutoff=2&start=&stop=&strand=plus&strand=minus&per_page=-1&250="
pernilleGffToBed easyGene.gff easyGene.bed
cp ~/kent/src/hg/lib/easyGene.sql .
hgLoadBed -sqlTable=easyGene.sql pyrFur2 easyGene easyGene.bed
# MULTIZ with P. hori, P. abyssi, T. kodakarensis
# This is redo of an earlier run, so some files are already in place
# DONE (10/8/05), kpollard
cd /cluster/data/pyrFur2/bed/conservation
mv Thermokoda.chr therKoda1.chr
#edit headers to be pyrFur2, pyrHor1, pyrAby1, therKoda1
/cluster/bin/i386/faToNib pyrFur2.chr pyrFur2.chr.nib
/cluster/bin/i386/faToNib pyrHor1.chr pyrHor1.chr.nib
/cluster/bin/i386/faToNib pyrAby1.chr pyrAby1.chr.nib
/cluster/bin/i386/faToNib therKoda1.chr therKoda1.chr.nib
faSize -detailed *.chr > chrom.sizes
#blastz
blastz pyrFur2.chr pyrAby1.chr Q=HoxD55.q > pyrFur2-pyrAby1.lav
blastz pyrFur2.chr pyrHor1.chr Q=HoxD55.q > pyrFur2-pyrHor1.lav
blastz pyrFur2.chr therKoda1.chr Q=HoxD55.q > pyrFur2-therKoda1.lav
#lavToAxt
/cluster/bin/i386/lavToAxt pyrFur2-pyrAby1.lav . . pyrFur2-pyrAby1.axt
/cluster/bin/i386/lavToAxt pyrFur2-pyrHor1.lav . . pyrFur2-pyrHor1.axt
/cluster/bin/i386/lavToAxt pyrFur2-therKoda1.lav . . pyrFur2-therKoda1.axt
#axtBest
axtBest pyrFur2-pyrAby1.axt pyrFur2.chr -winSize=500 -minScore=5000 pyrFur2-pyrAby1-best.axt
axtBest pyrFur2-pyrHor1.axt pyrFur2.chr -winSize=500 -minScore=5000 pyrFur2-pyrHor1-best.axt
axtBest pyrFur2-therKoda1.axt pyrFur2.chr -winSize=500 -minScore=5000 pyrFur2-therKoda1-best.axt
#axtToMaf
axtToMaf pyrFur2-pyrAby1-best.axt chrom.sizes chrom.sizes pyrFur2-pyrAby1.maf
axtToMaf pyrFur2-pyrHor1-best.axt chrom.sizes chrom.sizes pyrFur2-pyrHor1.maf
axtToMaf pyrFur2-therKoda1-best.axt chrom.sizes chrom.sizes pyrFur2-therKoda1.maf
#multiz
#delete extra header lines from maf files
multiz pyrFur2-pyrAby1.maf pyrFur2-pyrHor1.maf - > pyrFur2-pyrAby1-pyrHor1.maf
multiz pyrFur2-therKoda1.maf pyrFur2-pyrAby1-pyrHor1.maf - > pyrFur2-pyrAby1-pyrHor1-therKoda1.maf
#phyloHMM
/cluster/bin/phast/msa_view -i MAF -M pyrFur2.chr -o SS pyrFur2-pyrAby1-pyrHor1-therKoda1.maf > pyrFur2.ss
/cluster/bin/phast/phyloFit -i SS pyrFur2.ss -t "((pyrFur2,(pyrAby1,pyrHor1)),therKoda1)"
/cluster/bin/phast/msa_view -i SS pyrFur2.ss --summary-only
/cluster/bin/phast/phastCons pyrFur2.ss phyloFit.mod --gc 0.4496 \
--target-coverage 0.7 --estimate-trees pyr-tree \
--expected-lengths 25 --no-post-probs --ignore-missing \
--nrates 1,1
/cluster/bin/phast/phastCons pyrFur2.ss \
pyr-tree.cons.mod,pyr-tree.noncons.mod \
--target-coverage 0.7 --expected-lengths 25 \
--viterbi pyrFur2-elements.bed --score \
--require-informative 0 --seqname chr > cons.dat
wigEncode cons.dat phastCons.wig phastCons.wib
draw_tree phyloFit.mod > pyr-tree.ps
#make ai and jpg files in Illustrator
cp pyr-tree.jpg /usr/local/apache/htdocs/images/
#move data
mkdir wib
mv phastCons.wib wib/phastCons.wib
mv phastCons.wig wib/phastCons.wig
ln -s /cluster/data/pyrFur2/bed/conservation/wib/phastCons.wib /gbdb/pyrFur2/wib
mkdir -p /gbdb/pyrFur2/pwMaf
mkdir -p otherSpp/pyrAby1 otherSpp/pyrHor1 otherSpp/therKoda1
mv pyrFur2-pyrAby1.maf otherSpp/pyrAby1/chr.maf
mv pyrFur2-pyrHor1.maf otherSpp/pyrHor1/chr.maf
mv pyrFur2-therKoda1.maf otherSpp/therKoda1/chr.maf
ln -s /cluster/data/pyrFur2/bed/conservation/otherSpp/pyrAby1 /gbdb/pyrFur2/pwMaf/pyrAby1_pwMaf
ln -s /cluster/data/pyrFur2/bed/conservation/otherSpp/pyrHor1 /gbdb/pyrFur2/pwMaf/pyrHor1_pwMaf
ln -s /cluster/data/pyrFur2/bed/conservation/otherSpp/therKoda1 /gbdb/pyrFur2/pwMaf/therKoda1_pwMaf
mkdir multiz
mv pyrFur2-pyrAby1-pyrHor1-therKoda1.maf multiz/chr.maf
ln -s /cluster/data/pyrFur2/bed/conservation/multiz /gbdb/pyrFur2/multizPf2Pa1Ph1Tk1
#load
hgLoadWiggle pyrFur2 phastCons /cluster/data/pyrFur2/bed/conservation/wib/phastCons.wig
hgLoadMaf -warn pyrFur2 multizPf2Pa1Ph1Tk1
hgLoadMaf -warn pyrFur2 pyrAby1_pwMaf -pathPrefix=/gbdb/pyrFur2/pwMaf/pyrAby1_pwMaf
hgLoadMaf -warn pyrFur2 pyrHor1_pwMaf -pathPrefix=/gbdb/pyrFur2/pwMaf/pyrHor1_pwMaf
hgLoadMaf -warn pyrFur2 therKoda1_pwMaf -pathPrefix=/gbdb/pyrFur2/pwMaf/therKoda1_pwMaf
hgLoadBed pyrFur2 phastConsElements pyrFur2-elements.bed
#trackDb
cd ~/kent/src/hg/makeDb/trackDb/archae/pyrFur2
#trackDb.ra entry
# track multizPf2Pa1Ph1Tk1
# shortLabel Conservation
# longLabel Pyrococcus 4-way multiz alignments
# group compGeno
# priority 10.0
# visibility pack
# type wigMaf 0.0 1.0
# maxHeightPixels 100:40:11
# wiggle phastCons
# yLineOnOff Off
- # autoScaleDefault Off
+ # autoScale Off
# pairwise pwMaf
# speciesOrder pyrAby1 pyrHor1 therKoda1
cvs commit -m "New multiz track" trackDb.ra
#html page for multizPf2Pa1Ph1Tk1
cd ~/kent/src/hg/makeDb/trackDb/archae/pyrFur2
mv multizPyro.html multizPf2Pa1Ph1Tk1.html
cvs remove multizPyro.html
cvs add multizPf2Pa1Ph1Tk1.html
cvs commit -m "Details page for multiz track" multizPf2Pa1Ph1Tk1.html
#KEGG
cd ~/kent/src/hg/makeDb/trackDb/archae/pyrFur2/bed
mkdir kegg
cd kegg
~/kent/src/hg/protein/KGpath.sh pyrFur2 pyrFur2 051019
### RNA LP FOLD (Daryl Thomas and Matthias Hoechsmann; September 7, 2006)
set workDir = /cluster/data/pyrFur2/bed/rnaLpFold
mkdir -p $workDir
cd $workDir
set file = pyrFur2_dna.bed
sed 's/chr1/chr/' ~mhoechsm/transfer/${file} >! ${file}
# cp ~mhoechsm/transfer/${file} ${file}
awk '{if ($3-$2>max) max=$3-$2} END {print max}' ${file}
set db = pyrFur2
hgsql ${db} -e "drop table rnaLpFold"
hgsql ${db} < ${HOME}/kent/src/hg/lib/rnaLpFold.sql
hgLoadBed -noSort -oldTable -strict ${db} rnaLpFold ${file}
rm -f bed.tab
###