src/hg/makeDb/doc/sulSol1.txt 1.2
1.2 2009/11/25 21:48:42 hiram
change autoScaleDefault to autoScale
Index: src/hg/makeDb/doc/sulSol1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/sulSol1.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/makeDb/doc/sulSol1.txt 25 Jul 2006 20:15:12 -0000 1.1
+++ src/hg/makeDb/doc/sulSol1.txt 25 Nov 2009 21:48:42 -0000 1.2
@@ -1,149 +1,149 @@
#########################################################
#mulitz pHMM conservation track for 3 solfolobus species#
#########################################################
## DONE: 8/12/05 (kpollard)
cd /cluster/data/sulSol1/bed
mkdir conservation
cd conservation
cp /cluster/data/pyrFur2/bed/conservation/HoxD55.q .
#get fastas
cp ../../chr.fa sulSol1.chr
cp /projects/lowelab/db/blastdb/Sulf_acid_DSM_639.fa sulAci1.chr
cp /projects/lowelab/db/blastdb/Sulf_toko.fa sulTok1.chr
#edit > lines to say sulSol1.chr, sulTok1.chr, and sulAci1.chr
#make nibs
cp /cluster/data/sulSol1/nib/chr.nib sulSol1.nib
/cluster/bin/i386/faToNib sulAci.chr sulAci1.nib
/cluster/bin/i386/faToNib sulTok.chr sulTok1.nib
#chrom sizes
faSize -detailed *.chr > chrom.sizes
#blastz
blastz sulSol1.chr sulTok1.chr Q=HoxD55.q > sulSol1-sulTok1.lav
blastz sulSol1.chr sulAci1.chr Q=HoxD55.q > sulSol1-sulAci1.lav
/cluster/bin/i386/lavToAxt sulSol1-sulTok1.lav . . sulSol1-sulTok1.axt
/cluster/bin/i386/lavToAxt sulSol1-sulAci1.lav . . sulSol1-sulAci1.axt
axtBest sulSol1-sulTok1.axt sulSol1.chr -winSize=500 -minScore=5000 sulSol1-sulTok1-best.axt
axtBest sulSol1-sulAci1.axt sulSol1.chr -winSize=500 -minScore=5000 sulSol1-sulAci1-best.axt
axtToMaf sulSol1-sulTok1-best.axt chrom.sizes chrom.sizes sulSol1-sulTok1.maf
axtToMaf sulSol1-sulAci1-best.axt chrom.sizes chrom.sizes sulSol1-sulAci1.maf
#multiz: v10 has new parameter (3rd par), 0=2 seqs not align to ref, else 1.
# stick with v8 for now.
multiz sulSol1-sulTok1.maf sulSol1-sulAci1.maf - > sulSol1-sulTok1-sulAci1.maf
#phyloHMM
/cluster/bin/phast/msa_view -i MAF -M sulSol1.chr -o SS sulSol1-sulTok1-sulAci1.maf > sulSol1.ss
/cluster/bin/phast/phyloFit -i SS sulSol1.ss -t "(sulSol1,(sulTok1,sulAci1))"
/cluster/bin/phast/msa_view -i SS sulSol1.ss --summary-only
/cluster/bin/phast/phastCons sulSol1.ss phyloFit.mod --gc 0.3527 \
--target-coverage 0.7 --estimate-trees sul-tree \
--expected-lengths 25 --no-post-probs --ignore-missing \
--nrates 1,1
/cluster/bin/phast/phastCons sulSol1.ss \
sul-tree.cons.mod,sul-tree.noncons.mod \
--target-coverage 0.7 --expected-lengths 25 \
--viterbi sulSol-elements.bed --score \
--require-informative 0 --seqname chr > cons.dat
wigEncode cons.dat phastCons.wig phastCons.wib
draw_tree phyloFit.mod > sul-tree.ps
#make .ai and .jpg files in Illustrator
cp sul-tree.jpg /usr/local/apache/htdocs/images/
#move data
mkdir wib
mv phastCons.wib wib/phastCons.wib
mv phastCons.wig wib/phastCons.wig
ln -s /cluster/data/sulSol1/bed/conservation/wib/phastCons.wib /gbdb/sulSol1/wib
mkdir /gbdb/sulSol1/pwMaf
mkdir -p otherSul/sulTok1 otherSul/sulAci1
mv sulSol1-sulTok1.maf otherSul/sulTok1/chr.maf
mv sulSol1-sulAci1.maf otherSul/sulAci1/chr.maf
ln -s /cluster/data/sulSol1/bed/conservation/otherSul/sulTok1 /gbdb/sulSol1/pwMaf/sulTok1_pwMaf
ln -s /cluster/data/sulSol1/bed/conservation/otherSul/sulAci1 /gbdb/sulSol1/pwMaf/sulAci1_pwMaf
mkdir multiz
mv sulSol1-sulTok1-sulAci1.maf multiz/chr.maf
ln -s /cluster/data/sulSol1/bed/conservation/multiz /gbdb/sulSol1/multizSs1St1Sa1
#load
hgLoadWiggle sulSol1 phastCons /cluster/data/sulSol1/bed/conservation/wib/phastCons.wig
hgLoadMaf -warn sulSol1 multizSs1St1Sa1
hgLoadMaf -warn sulSol1 sulTok1_pwMaf -pathPrefix=/gbdb/sulSol1/pwMaf/sulTok1_pwMaf
hgLoadMaf -warn sulSol1 sulAci1_pwMaf -pathPrefix=/gbdb/sulSol1/pwMaf/sulTok1_pwMaf
hgLoadBed sulSol1 phastConsElements sulSol-elements.bed
#trackDb.ra entry
# track multizSs1St1Sa1
# shortLabel Conservation
# longLabel Sulfolobus 3-way multiz alignments
# group compGeno
# priority 10.0
# visibility pack
# type wigMaf 0.0 1.0
# maxHeightPixels 100:40:11
# wiggle phastCons
# yLineOnOff Off
- # autoScaleDefault Off
+ # autoScale Off
# pairwise pwMaf
# speciesOrder sulTok1 sulAci1
cd ~/kent/src/hg/makeDb/trackDb/archae/sulSol1
cvs commit -m "Added multiz track" trackDb.ra
#html page for multizSs1St1Sa1
cd ~/kent/src/hg/makeDb/trackDb/archae/sulSol1
cvs add multizSs1St1Sa1.html
cvs commit -m "Details page for multiz track" multizSs1St1Sa1.html
# load genbank gene predictions
# Create 3 files to assist parsing of the genbank
# 1. for a bed file
echo 'chr
start
end
locus_tag
1000
strand' > sulSol1-params-bed.txt
# 2. for the peptide parts
echo 'locus_tag
translation' > sulSol1-params-pep.txt
# 3. for the other gene information
echo 'locus_tag
locus_tag
product
note
protein_id
db_xref
EC_number
db_xref2' > sulSol1-params-xra.txt
# Now extract the genes and information:
gbArchaeGenome methJann1.fix.gbk methJann1-params-bed.txt methJann1-genbank-cds.bed
sed -e 's/db_xref="GeneID:/db_xref2="/' sulSol1.gbk > sulSol1.fix.gbk
gbArchaeGenome sulSol1.fix.gbk sulSol1-params-bed.txt sulSol1-genbank-cds.bed
gbArchaeGenome sulSol1.fix.gbk sulSol1-params-pep.txt sulSol1-genbank-cds.pep
gbArchaeGenome sulSol1.fix.gbk sulSol1-params-xra.txt sulSol1-genbank-cds.xra
hgsql sulSol1 < /cluster/home/baertsch/kent/src/hg/lib/minGeneInfo.sql
echo rename table minGeneInfo to gbProtCodeXra | hgsql sulSol1
echo load data local infile \'sulSol1-genbank-cds.xra\' into table gbProtCodeXra | hgsql sulSol1
tawk '{print $1,$2,$3,$4,1000,$6,$2,$3,0,1,$3-$2,0}' sulSol1-genbank-cds.bed | bedToGenePred stdin tmp.gp
tawk '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,NR,name2,"cmpl","cmpl",0}' tmp.gp > tmp2.gp
# hard tab must be input for the join command in double quotes - use ctrl-v followed by tab
join -t " " -o 1.1,1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 2.2 1.13 1.14 1.15 tmp2.gp sulSol1-genbank-cds.xra > sulSol1.gp
ldHgGene sulSol1 refSeq sulSol1.gp -predTab -genePredExt
#load COG codes
grep -h COG /projects/lowelab/db/Bacteria/Sulfolobus_solfataricus/NC_00*.ptt | awk 'NR>3{OFS="\t";print $6,$8,$7}' > COG
hgsql sulSol1 < /cluster/home/baertsch/kent/src/hg/lib/cogs.sql
echo "load data local infile 'COG' into table COG" | hgsql sulSol1
hgsql sulSol1 < /cluster/data/metAce1/genbank/COGXra.sql
#KEGG
cd ~/kent/src/hg/makeDb/trackDb/archae/sulSol1/bed
mkdir kegg
cd kegg
~/kent/src/hg/protein/KGpath.sh sulSol1 sulSol1 051020&