src/hg/makeDb/doc/h1n1.txt 1.4
1.4 2009/05/15 22:44:54 fanhsu
Removed old h1n1 track build processes.
Index: src/hg/makeDb/doc/h1n1.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/h1n1.txt,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/makeDb/doc/h1n1.txt 6 May 2009 19:27:46 -0000 1.3
+++ src/hg/makeDb/doc/h1n1.txt 15 May 2009 22:44:54 -0000 1.4
@@ -1,252 +1,191 @@
##########################################################################
# CREATE H1N1 DATABASE (STARTED 4/26/09, Fan)
# for emacs: -*- mode: sh; -*-
# DOWNLOAD SEQUENCE (DONE, Fan, 4/26/09)
ssh hgwdev
mkdir /hive/data/genomes/h1n1
cd /hive/data/genomes/h1n1
# Get H1N1 sequences from GISAID/EpiFluDB
mkdir download
cd download
# downloaded the following sequences:
epiflu_0421_dna_sequence.fasta
epiflu_0425_dna_sequence.fasta
epiflu_0421_protein_sequence.fasta
epiflu_0425_protein_sequence.fasta
# translate to nib
cd ..
ln -s /hive/data/genomes/h1n1 ~/h1n1
cd ~/h1n1
mkdir nib
# use sequence segments in epiflu_0421_dna_sequence.fasta as the base genome
fgrep -v ">" download/epiflu_0421_dna_sequence.fasta >j.1
echo ">chr1" >j.0
cat j.0 j.1 >chr1.fa
rm j.0 j.1
faToNib chr1.fa nib/chr1.nib
# CREATING DATABASE (DONE 4/26/09)
# Create the h1n1 database.
ssh hgwdev
echo 'create database h1n1' | hgsql ''
# make a semi-permanent read-only alias:
alias h1n1 "mysql -u hguser -phguserstuff -A h1n1"
# CREATING GRP TABLE FOR TRACK GROUPING (DONE 4/26/09)
ssh hgwdev
echo "create table grp (PRIMARY KEY(NAME)) select * from hg18.grp" \
| hgsql h1n1
# remove ENCODE groups
echo "delete from grp where name like 'encode%'" | hgsql h1n1
# STORING O+O SEQUENCE AND ASSEMBLY INFORMATION (DONE 4/26/09)
# Make symbolic links from /gbdb/h1n1/nib to the real nibs.
ssh hgwdev
mkdir -p /gbdb/h1n1/nib
ln -s /hive/data/genomes/h1n1/nib/chr1.nib /gbdb/h1n1/nib/chr1.nib
# Load /gbdb/h1n1/nib paths into database and save size info.
hgsql h1n1 < ~/src/hg/lib/chromInfo.sql
hgNibSeq -preMadeNib h1n1 /gbdb/h1n1/nib chr1.fa
echo "select chrom,size from chromInfo" | hgsql -N h1n1 > chrom.sizes
# MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE FOR H1N1 (DONE 08/02/06)
echo 'insert into defaultDb values("A/California/04/2009(EPI_ISL_29573)", "h1n1");' \
| hgsql -h genome-testdb hgcentraltest
echo 'insert into dbDb values("h1n1", "Apr. 2009", \
"/gbdb/h1n1/nib", "Human case of H1N1 swine influenza", "chr1", 1, 99.5, "A H1N1","Human case of H1N1 swine influenza", "/gbdb/h1n1/html/description.html", 0, 0, "GISAID sequence as of Apr. 21st, 2009", 99999);' \
| hgsql -h genome-testdb hgcentraltest
echo 'insert into genomeClade values("A H1N1", "other", 100);'\
| hgsql -h genome-testdb hgcentraltest
# Make trackDb table so browser knows what tracks to expect:
ssh hgwdev
cd ~/src/hg/makeDb/trackDb
cvs up -d -P
# Edit that makefile to add h1n1 in all the right places
vi makefile
make update
make alpha
cvs commit makefile
# START BLAT SERVERS (on hgwdev for now, DONE 4/26/09, Fan)
cd /hive/data/genomes/h1n1/nib
gfServer start hgwdev 18891 -trans -mask -log=gfServer.trans.log chr1.nib &
gfServer start hgwdev 18892 -stepSize=5 -log=gfServer.log chr1.nib &
# MAKE HGCENTRALTEST BLATSERVERS ENTRY FOR H1N1
echo 'insert into blatServers values("h1n1", "hgwdev", "18891", "1", "0"); \
insert into blatServers values("h1n1", "hgwdev", "18892", "0", "0");' \
| hgsql -h genome-testdb hgcentraltest
# CREATE h1n1Gene TRACK (DONE. 4/26/09, Fan)
mkdir /hive/data/genomes/h1n1/bed/h1n1Gene
cd /hive/data/genomes/h1n1/bed/h1n1Gene
cp ../../download/epiflu_0421_dna_sequence.fasta h1n1Gene.fa
# manually edit h1n1Gene.fa
# change line like:
# >EPI176470 | HA | A/California/04/2009 | EPI_ISL_29573 | 2009712049_seg4 | H1N1
# into:
# >HA EPI176470
gfClient -minScore=200 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
-out=psl -t=dna -q=dna h1n1Gene.fa h1n1GenePsl.psl
hgLoadPsl h1n1 h1n1GenePsl.psl
hgsql h1n1 -N -e 'select tName, tStart, tEnd, qName from h1n1GenePsl' >h1n1Gene.bed
hgLoadBed h1n1 h1n1Gene h1n1Gene.bed
# CREATE h1n1Seq TRACK (DONE. 4/26/09, Fan)
mkdir –p /hive/data/genomes/h1n1/bed/h1n1Seq
cd /hive/data/genomes/h1n1/bed/h1n1Seq
# get h1n1Seq sequences
cat ../../download/*dna*.fasta |sed -e 's/ | /_/g' |sed -e 's/_EPI_ISL/ | /' >h1n1Seq.fa
# create .fa file
mkdir -p /gbdb/h1n1/h1n1Seq
cp -p h1n1Seq.fa /gbdb/h1n1/h1n1Seq/h1n1Seq.fa
hgLoadSeq –replace h1n1 /gbdb/h1n1/h1n1Seq/h1n1Seq.fa
# BLAT
gfClient -minScore=200 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
-out=psl -t=dna -q=dna h1n1Seq.fa h1n1Seq.psl
# load the psl result into h1n1Seq table
hgLoadPsl h1n1 h1n1Seq.psl
# CREATE humanHA TRACK (DONE. 4/26/09, Fan)
mkdir -p /hive/data/genomes/h1n1/bed/humanHA
cd /hive/data/genomes/h1n1/bed/humanHA
# get humanHA sequences
cat ../../download/humanHA_dna.fasta |sed -e 's/ | /_/g' |sed -e 's/_EPI_ISL/ | /' >humanHA.fa
# create .fa file
mkdir -p /gbdb/h1n1/humanHA
cp -p humanHA.fa /gbdb/h1n1/humanHA/humanHA.fa
hgLoadSeq -replace h1n1 /gbdb/h1n1/humanHA/humanHA.fa
# BLAT
gfClient -minScore=500 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
-out=psl -t=dna -q=dna humanHA.fa humanHA.psl
# load the psl result into humanHA table
hgLoadPsl h1n1 humanHA.psl
# CREATE swineHA TRACK (DONE. 4/26/09, Fan)
mkdir -p /hive/data/genomes/h1n1/bed/swineHA
cd /hive/data/genomes/h1n1/bed/swineHA
# get swineHA sequences
cat ../../download/swineHA_dna.fasta |sed -e 's/ | /_/g' |sed -e 's/_EPI_ISL/ | /' >swineHA.fa
# create .fa file
mkdir -p /gbdb/h1n1/swineHA
cp -p swineHA.fa /gbdb/h1n1/swineHA/swineHA.fa
hgLoadSeq -replace h1n1 /gbdb/h1n1/swineHA/swineHA.fa
# BLAT
gfClient -minScore=500 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
-out=psl -t=dna -q=dna swineHA.fa swineHA.psl
# load the psl result into humanHA table
hgLoadPsl h1n1 swineHA.psl
-#################################
-# CREATE h1n1_0429Seq TRACK (DONE. 4/29/09, Fan)
-
- mkdir –p /hive/data/genomes/h1n1/bed/h1n1_0429Seq
- cd /hive/data/genomes/h1n1/bed/h1n1_0429Seq
-
-
-# create .fa file
-
- mkdir -p /gbdb/h1n1/h1n1_0429Seq
- cp –p ../../download/epiflu_0429_dna_sequence.fasta /gbdb/h1n1/h1n1_0429Seq
-
- hgLoadSeq –replace h1n1 /gbdb/h1n1/h1n1_0429Seq/epiflu_0429_dna_sequence.fasta
-
-
-# BLAT
- gfClient -minScore=200 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
- -out=psl -t=dna -q=dna /gbdb/h1n1/h1n1_0429Seq/epiflu_0429_dna_sequence.fasta h1n1_0429Seq.psl
-
-# load the psl result into h1n1_0429Seq table
- hgLoadPsl h1n1 h1n1_0429Seq.psl
-
-#################################
-# CREATE h1n1_0506Seq TRACK (DONE. 050609, Fan)
-
- mkdir -p /hive/data/genomes/h1n1/bed/h1n1_0506Seq
- cd /hive/data/genomes/h1n1/bed/h1n1_0506Seq
-
-
-# create .fa file
-
- mkdir -p /gbdb/h1n1/h1n1_0506Seq
- cp -p ../../download/0506/epiflu_0506_dna_sequence.fasta /gbdb/h1n1/h1n1_0506Seq
-
- hgLoadSeq -replace h1n1 /gbdb/h1n1/h1n1_0506Seq/epiflu_0506_dna_sequence.fasta
-
-# BLAT
- gfClient -minScore=200 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
- -out=psl -t=dna -q=dna /gbdb/h1n1/h1n1_0506Seq/epiflu_0506_dna_sequence.fasta h1n1_0506Seq.psl
-
-# load the psl result into h1n1_0506Seq table
- hgLoadPsl h1n1 h1n1_0506Seq.psl
-
-# create CDS table
-
- hgsql h1n1 -N -e \
- 'select qName, qStart+1,"xxx", qEnd from h1n1_0506Seq'|\
- sed -e 's/\txxx\t/../' >h1n1_0506SeqCds.tab
-
- hgLoadSqlTab h1n1 h1n1_0506SeqCds ~/hg/lib/cdsSpec.sql h1n1_0506SeqCds.tab
-
-# update h1n1SeqXref table
-
- fgrep ">" /gbdb/h1n1/h1n1_0506Seq/epiflu_0506_dna_sequence.fasta|\
- sed -e 's/ | /\t/g' |sed -e "s/>//" >j.tmp
- rmLf j.tmp >h1n1SeqXref.tab
-
- hgsql h1n1 -e 'delete from h1n1SeqXref'
- hgsql h1n1 -e 'load data local infile "h1n1SeqXref.tab" into table h1n1SeqXref'
-
-#########################################################################################