src/hg/makeDb/doc/h1n1.txt 1.1
1.1 2009/04/27 03:06:09 fanhsu
Created h1n1.txt.
Index: src/hg/makeDb/doc/h1n1.txt
===================================================================
RCS file: src/hg/makeDb/doc/h1n1.txt
diff -N src/hg/makeDb/doc/h1n1.txt
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/makeDb/doc/h1n1.txt 27 Apr 2009 03:06:09 -0000 1.1
@@ -0,0 +1,191 @@
+##########################################################################
+# CREATE H1N1 DATABASE (STARTED 4/26/09, Fan)
+
+# for emacs: -*- mode: sh; -*-
+
+# DOWNLOAD SEQUENCE (DONE, Fan, 4/26/09)
+
+ ssh hgwdev
+ mkdir /hive/data/genomes/h1n1
+ cd /hive/data/genomes/h1n1
+
+# Get H1N1 sequences from GISAID/EpiFluDB
+
+ mkdir download
+ cd download
+
+# downloaded the following sequences:
+
+ epiflu_0421_dna_sequence.fasta
+ epiflu_0425_dna_sequence.fasta
+ epiflu_0421_protein_sequence.fasta
+ epiflu_0425_protein_sequence.fasta
+
+# translate to nib
+ cd ..
+
+ ln -s /hive/data/genomes/h1n1 ~/h1n1
+ cd ~/h1n1
+ mkdir nib
+
+# use sequence segments in epiflu_0421_dna_sequence.fasta as the base genome
+
+ fgrep -v ">" download/epiflu_0421_dna_sequence.fasta >j.1
+ echo ">chr1" >j.0
+ cat j.0 j.1 >chr1.fa
+ rm j.0 j.1
+
+ faToNib chr1.fa nib/chr1.nib
+
+# CREATING DATABASE (DONE 4/26/09)
+
+ # Create the h1n1 database.
+
+ ssh hgwdev
+ echo 'create database h1n1' | hgsql ''
+
+ # make a semi-permanent read-only alias:
+ alias h1n1 "mysql -u hguser -phguserstuff -A h1n1"
+
+# CREATING GRP TABLE FOR TRACK GROUPING (DONE 4/26/09)
+ ssh hgwdev
+ echo "create table grp (PRIMARY KEY(NAME)) select * from hg18.grp" \
+ | hgsql h1n1
+
+ # remove ENCODE groups
+ echo "delete from grp where name like 'encode%'" | hgsql h1n1
+
+# STORING O+O SEQUENCE AND ASSEMBLY INFORMATION (DONE 4/26/09)
+
+ # Make symbolic links from /gbdb/h1n1/nib to the real nibs.
+ ssh hgwdev
+ mkdir -p /gbdb/h1n1/nib
+ ln -s /hive/data/genomes/h1n1/nib/chr1.nib /gbdb/h1n1/nib/chr1.nib
+
+ # Load /gbdb/h1n1/nib paths into database and save size info.
+ hgsql h1n1 < ~/src/hg/lib/chromInfo.sql
+
+ hgNibSeq -preMadeNib h1n1 /gbdb/h1n1/nib chr1.fa
+ echo "select chrom,size from chromInfo" | hgsql -N h1n1 > chrom.sizes
+
+# MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE FOR H1N1 (DONE 08/02/06)
+ echo 'insert into defaultDb values("A/California/04/2009(EPI_ISL_29573)", "h1n1");' \
+ | hgsql -h genome-testdb hgcentraltest
+
+ echo 'insert into dbDb values("h1n1", "Apr. 2009", \
+ "/gbdb/h1n1/nib", "Human case of H1N1 swine influenza", "chr1", 1, 99.5, "A H1N1","Human case of H1N1 swine influenza", "/gbdb/h1n1/html/description.html", 0, 0, "GISAID sequence as of Apr. 21st, 2009", 99999);' \
+ | hgsql -h genome-testdb hgcentraltest
+
+ echo 'insert into genomeClade values("A H1N1", "other", 100);'\
+ | hgsql -h genome-testdb hgcentraltest
+
+ # Make trackDb table so browser knows what tracks to expect:
+ ssh hgwdev
+ cd ~/src/hg/makeDb/trackDb
+ cvs up -d -P
+
+ # Edit that makefile to add h1n1 in all the right places
+
+ vi makefile
+
+ make update
+ make alpha
+ cvs commit makefile
+
+# START BLAT SERVERS (on hgwdev for now, DONE 4/26/09, Fan)
+
+ cd /hive/data/genomes/h1n1/nib
+ gfServer start hgwdev 18891 -trans -mask -log=gfServer.trans.log chr1.nib &
+ gfServer start hgwdev 18892 -stepSize=5 -log=gfServer.log chr1.nib &
+
+ # MAKE HGCENTRALTEST BLATSERVERS ENTRY FOR H1N1
+
+ echo 'insert into blatServers values("h1n1", "hgwdev", "18891", "1", "0"); \
+ insert into blatServers values("h1n1", "hgwdev", "18892", "0", "0");' \
+ | hgsql -h genome-testdb hgcentraltest
+
+# CREATE h1n1Gene TRACK (DONE. 4/26/09, Fan)
+
+ mkdir /hive/data/genomes/h1n1/bed/h1n1Gene
+ cd /hive/data/genomes/h1n1/bed/h1n1Gene
+
+ cp ../../download/epiflu_0421_dna_sequence.fasta h1n1Gene.fa
+ # manually edit h1n1Gene.fa
+ # change line like:
+ # >EPI176470 | HA | A/California/04/2009 | EPI_ISL_29573 | 2009712049_seg4 | H1N1
+ # into:
+ # >HA EPI176470
+
+ gfClient -minScore=200 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
+ -out=psl -t=dna -q=dna h1n1Gene.fa h1n1GenePsl.psl
+
+ hgLoadPsl h1n1 h1n1GenePsl.psl
+ hgsql h1n1 -N -e 'select tName, tStart, tEnd, qName from h1n1GenePsl' >h1n1Gene.bed
+ hgLoadBed h1n1 h1n1Gene h1n1Gene.bed
+
+# CREATE h1n1Seq TRACK (DONE. 4/26/09, Fan)
+
+ mkdir –p /hive/data/genomes/h1n1/bed/h1n1Seq
+ cd /hive/data/genomes/h1n1/bed/h1n1Seq
+
+# get h1n1Seq sequences
+ cat ../../download/*dna*.fasta |sed -e 's/ | /_/g' |sed -e 's/_EPI_ISL/ | /' >h1n1Seq.fa
+
+# create .fa file
+
+ mkdir -p /gbdb/h1n1/h1n1Seq
+ cp -p h1n1Seq.fa /gbdb/h1n1/h1n1Seq/h1n1Seq.fa
+
+ hgLoadSeq –replace h1n1 /gbdb/h1n1/h1n1Seq/h1n1Seq.fa
+
+# BLAT
+ gfClient -minScore=200 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
+ -out=psl -t=dna -q=dna h1n1Seq.fa h1n1Seq.psl
+
+# load the psl result into h1n1Seq table
+ hgLoadPsl h1n1 h1n1Seq.psl
+
+# CREATE humanHA TRACK (DONE. 4/26/09, Fan)
+
+ mkdir -p /hive/data/genomes/h1n1/bed/humanHA
+ cd /hive/data/genomes/h1n1/bed/humanHA
+
+# get humanHA sequences
+ cat ../../download/humanHA_dna.fasta |sed -e 's/ | /_/g' |sed -e 's/_EPI_ISL/ | /' >humanHA.fa
+
+# create .fa file
+
+ mkdir -p /gbdb/h1n1/humanHA
+ cp -p humanHA.fa /gbdb/h1n1/humanHA/humanHA.fa
+
+ hgLoadSeq -replace h1n1 /gbdb/h1n1/humanHA/humanHA.fa
+
+# BLAT
+ gfClient -minScore=500 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
+ -out=psl -t=dna -q=dna humanHA.fa humanHA.psl
+
+# load the psl result into humanHA table
+ hgLoadPsl h1n1 humanHA.psl
+
+# CREATE swineHA TRACK (DONE. 4/26/09, Fan)
+
+ mkdir -p /hive/data/genomes/h1n1/bed/swineHA
+ cd /hive/data/genomes/h1n1/bed/swineHA
+
+# get swineHA sequences
+ cat ../../download/swineHA_dna.fasta |sed -e 's/ | /_/g' |sed -e 's/_EPI_ISL/ | /' >swineHA.fa
+
+# create .fa file
+
+ mkdir -p /gbdb/h1n1/swineHA
+ cp -p swineHA.fa /gbdb/h1n1/swineHA/swineHA.fa
+
+ hgLoadSeq -replace h1n1 /gbdb/h1n1/swineHA/swineHA.fa
+
+# BLAT
+ gfClient -minScore=500 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892 /gbdb/h1n1/nib \
+ -out=psl -t=dna -q=dna swineHA.fa swineHA.psl
+
+# load the psl result into humanHA table
+ hgLoadPsl h1n1 swineHA.psl
+#################################