src/hg/makeDb/doc/h1n1.txt 1.1

1.1 2009/04/27 03:06:09 fanhsu
Created h1n1.txt.
Index: src/hg/makeDb/doc/h1n1.txt
===================================================================
RCS file: src/hg/makeDb/doc/h1n1.txt
diff -N src/hg/makeDb/doc/h1n1.txt
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/makeDb/doc/h1n1.txt	27 Apr 2009 03:06:09 -0000	1.1
@@ -0,0 +1,191 @@
+##########################################################################
+# CREATE H1N1 DATABASE (STARTED 4/26/09, Fan) 
+
+# for emacs: -*- mode: sh; -*-
+
+# DOWNLOAD SEQUENCE (DONE, Fan, 4/26/09)
+
+    ssh hgwdev
+    mkdir /hive/data/genomes/h1n1
+    cd /hive/data/genomes/h1n1
+
+# Get H1N1 sequences from GISAID/EpiFluDB
+    
+    mkdir download
+    cd download
+
+# downloaded the following sequences:
+
+    epiflu_0421_dna_sequence.fasta
+    epiflu_0425_dna_sequence.fasta
+    epiflu_0421_protein_sequence.fasta
+    epiflu_0425_protein_sequence.fasta
+
+# translate to nib
+    cd ..
+
+    ln -s /hive/data/genomes/h1n1 ~/h1n1
+    cd ~/h1n1
+    mkdir nib
+
+# use sequence segments in epiflu_0421_dna_sequence.fasta as the base genome
+
+    fgrep -v ">" download/epiflu_0421_dna_sequence.fasta >j.1
+    echo ">chr1" >j.0
+    cat j.0 j.1 >chr1.fa
+    rm j.0 j.1
+
+    faToNib chr1.fa nib/chr1.nib
+
+# CREATING DATABASE (DONE 4/26/09)
+
+    # Create the h1n1 database.
+
+    ssh hgwdev
+    echo 'create database h1n1' | hgsql ''
+
+    # make a semi-permanent read-only alias:
+    alias h1n1 "mysql -u hguser -phguserstuff -A h1n1"
+
+# CREATING GRP TABLE FOR TRACK GROUPING (DONE 4/26/09)
+    ssh hgwdev
+    echo "create table grp (PRIMARY KEY(NAME)) select * from hg18.grp" \
+      | hgsql h1n1
+
+    # remove ENCODE groups
+    echo "delete from grp where name like 'encode%'" | hgsql h1n1
+
+# STORING O+O SEQUENCE AND ASSEMBLY INFORMATION  (DONE 4/26/09)
+
+    # Make symbolic links from /gbdb/h1n1/nib to the real nibs.
+    ssh hgwdev
+    mkdir -p /gbdb/h1n1/nib
+    ln -s /hive/data/genomes/h1n1/nib/chr1.nib /gbdb/h1n1/nib/chr1.nib 
+
+    # Load /gbdb/h1n1/nib paths into database and save size info.
+    hgsql h1n1  < ~/src/hg/lib/chromInfo.sql
+
+    hgNibSeq -preMadeNib h1n1 /gbdb/h1n1/nib chr1.fa
+    echo "select chrom,size from chromInfo" | hgsql -N h1n1 > chrom.sizes
+    
+# MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE FOR H1N1 (DONE 08/02/06)
+    echo 'insert into defaultDb values("A/California/04/2009(EPI_ISL_29573)", "h1n1");' \
+      | hgsql -h genome-testdb hgcentraltest
+
+    echo 'insert into dbDb values("h1n1", "Apr. 2009", \
+          "/gbdb/h1n1/nib", "Human case of H1N1 swine influenza", "chr1", 1, 99.5, "A H1N1","Human case of H1N1 swine influenza", "/gbdb/h1n1/html/description.html", 0, 0, "GISAID sequence as of Apr. 21st, 2009", 99999);' \
+      | hgsql -h genome-testdb hgcentraltest
+
+    echo 'insert into genomeClade values("A H1N1", "other", 100);'\
+      | hgsql -h genome-testdb hgcentraltest
+
+    # Make trackDb table so browser knows what tracks to expect:
+    ssh hgwdev
+    cd ~/src/hg/makeDb/trackDb
+    cvs up -d -P
+
+    # Edit that makefile to add h1n1 in all the right places
+
+    vi makefile
+
+    make update
+    make alpha
+    cvs commit makefile
+
+# START BLAT SERVERS (on hgwdev for now, DONE 4/26/09, Fan)
+    
+    cd /hive/data/genomes/h1n1/nib
+    gfServer start hgwdev 18891 -trans -mask -log=gfServer.trans.log chr1.nib &
+    gfServer start hgwdev 18892 -stepSize=5  -log=gfServer.log       chr1.nib &
+
+    # MAKE HGCENTRALTEST BLATSERVERS ENTRY FOR H1N1 
+
+    echo 'insert into blatServers values("h1n1", "hgwdev", "18891", "1", "0"); \
+          insert into blatServers values("h1n1", "hgwdev", "18892", "0", "0");' \
+      | hgsql -h genome-testdb hgcentraltest
+
+# CREATE h1n1Gene TRACK (DONE. 4/26/09, Fan)
+
+    mkdir /hive/data/genomes/h1n1/bed/h1n1Gene
+    cd    /hive/data/genomes/h1n1/bed/h1n1Gene 
+
+    cp ../../download/epiflu_0421_dna_sequence.fasta h1n1Gene.fa
+    # manually edit h1n1Gene.fa
+    # change line like:
+    # >EPI176470 | HA | A/California/04/2009 | EPI_ISL_29573 | 2009712049_seg4 | H1N1
+    # into:
+    # >HA EPI176470
+
+    gfClient -minScore=200 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892  /gbdb/h1n1/nib \
+    -out=psl -t=dna -q=dna h1n1Gene.fa h1n1GenePsl.psl
+
+    hgLoadPsl h1n1 h1n1GenePsl.psl
+    hgsql h1n1 -N -e 'select tName, tStart, tEnd, qName from h1n1GenePsl' >h1n1Gene.bed
+    hgLoadBed h1n1  h1n1Gene h1n1Gene.bed
+
+# CREATE h1n1Seq TRACK (DONE.  4/26/09, Fan)
+
+    mkdir –p /hive/data/genomes/h1n1/bed/h1n1Seq
+    cd /hive/data/genomes/h1n1/bed/h1n1Seq
+
+# get h1n1Seq sequences
+    cat ../../download/*dna*.fasta |sed -e 's/ | /_/g' |sed -e 's/_EPI_ISL/ | /' >h1n1Seq.fa
+
+# create .fa file
+
+    mkdir -p /gbdb/h1n1/h1n1Seq
+    cp -p h1n1Seq.fa /gbdb/h1n1/h1n1Seq/h1n1Seq.fa
+
+    hgLoadSeq –replace h1n1 /gbdb/h1n1/h1n1Seq/h1n1Seq.fa
+
+# BLAT
+    gfClient -minScore=200 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892  /gbdb/h1n1/nib \
+    -out=psl -t=dna -q=dna h1n1Seq.fa h1n1Seq.psl
+
+# load the psl result into h1n1Seq table
+    hgLoadPsl h1n1 h1n1Seq.psl
+
+# CREATE humanHA TRACK (DONE.  4/26/09, Fan)
+
+    mkdir -p /hive/data/genomes/h1n1/bed/humanHA
+    cd /hive/data/genomes/h1n1/bed/humanHA
+
+# get humanHA sequences
+    cat ../../download/humanHA_dna.fasta |sed -e 's/ | /_/g' |sed -e 's/_EPI_ISL/ | /' >humanHA.fa
+
+# create .fa file
+
+    mkdir -p /gbdb/h1n1/humanHA
+    cp -p humanHA.fa /gbdb/h1n1/humanHA/humanHA.fa
+
+    hgLoadSeq -replace h1n1 /gbdb/h1n1/humanHA/humanHA.fa
+
+# BLAT
+    gfClient -minScore=500 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892  /gbdb/h1n1/nib \
+    -out=psl -t=dna -q=dna humanHA.fa humanHA.psl
+
+# load the psl result into humanHA table
+    hgLoadPsl h1n1 humanHA.psl
+
+# CREATE swineHA TRACK (DONE.  4/26/09, Fan)
+
+    mkdir -p /hive/data/genomes/h1n1/bed/swineHA
+    cd /hive/data/genomes/h1n1/bed/swineHA
+
+# get swineHA sequences
+    cat ../../download/swineHA_dna.fasta |sed -e 's/ | /_/g' |sed -e 's/_EPI_ISL/ | /' >swineHA.fa
+
+# create .fa file
+
+    mkdir -p /gbdb/h1n1/swineHA
+    cp -p swineHA.fa /gbdb/h1n1/swineHA/swineHA.fa
+
+    hgLoadSeq -replace h1n1 /gbdb/h1n1/swineHA/swineHA.fa
+
+# BLAT
+    gfClient -minScore=500 -minIdentity=80 -nohead hgwdev.cse.ucsc.edu 18892  /gbdb/h1n1/nib \
+    -out=psl -t=dna -q=dna swineHA.fa swineHA.psl
+
+# load the psl result into humanHA table
+    hgLoadPsl h1n1 swineHA.psl
+#################################