src/hg/makeDb/doc/hivVax003Vax004.txt 1.9

1.9 2009/09/29 15:48:30 fanhsu
Added the section of immuno build.
Index: src/hg/makeDb/doc/hivVax003Vax004.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hivVax003Vax004.txt,v
retrieving revision 1.8
retrieving revision 1.9
diff -b -B -U 1000000 -r1.8 -r1.9
--- src/hg/makeDb/doc/hivVax003Vax004.txt	2 Mar 2009 21:41:47 -0000	1.8
+++ src/hg/makeDb/doc/hivVax003Vax004.txt	29 Sep 2009 15:48:30 -0000	1.9
@@ -1,927 +1,965 @@
 # for emacs: -*- mode: sh; -*-
 #########################################################################
 # hivVax003Vax004 DATABASE BUILD (DONE 5/20/08, Fan)
 
     ssh hiv1
     mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004
     cd /cluster/store12/medical/hiv/hivVax003Vax004
 
 #########################################################################
 # Create hivVax003Vax004 DB
 
     hgsql –e 'create database hivVax003Vax004'
 
 # Ask admin to copy over all tables from hiv1 to hivVax003Vax004
 
 #########################################################################
 # CREATE MAF TRACKS FOR VAX004
 
     mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa
     cd /cluster/store12/medical/hiv/hivVax003Vax004/msa
 
 # create a script file, doall
 
     hgsql hivVax003Vax004 -N -e \
     'select id from dnaSeq where id like "%U%"'\
     |sed -e 's/ss/do1 ss/g' >doall
 
 # create one line script file, do1, with the following line in it:
 
     hgsql hivVax003Vax004 -N -e  "select id, seq from vax004Msa where id='${1}'"
 
     chmod +x do*
 
 # run the script to get the .tab file with all MSA sequences of VAX004
     doall >Vax003Vax004.tab
 # convert .tab into .fa file
     tabToFa Vax003Vax004
 
 # grab the base alignment sequence
     echo ">hivVax003Vax004" >Vax003Vax004.aln
     hgsql hivVax003Vax004 -N -e 'select seq from vax004Msa where id="HXB2"'  >> Vax003Vax004.aln
 
 # prepare an interium file, jjAll.mfa
     cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
     echo = >>jjAll.mfa
 
 # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
 
     xmfaToMafVax003Vax004Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
     cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
 
     rm jjAll.mfa j.out
 
     cat chr1.tmp |sed -e 's/ss_U/U/g' >chr1.maf
 
 # copy .maf to /gbdb.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax004Maf 
     cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p
 
     hgLoadMaf hivVax003Vax004 vax004Maf
 
 # create another copy for protein MAF.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax004AaMaf 
     cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf
     hgLoadMaf hivVax003Vax004 vax004AaMaf
     
 #########################################################################
 # CREATE CONSERVATION TRACKS FOR VAX003 AE STRAIN
 
     mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE
     cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE
 
 # create the .wig file and .fa file of the consensus sequence.
     gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AECons.wig vax003AEConsensus.fa
 # encode and load the wig file
     wigEncode vax003AECons.wig stdout vax003AECons.wib \
     | hgLoadWiggle hivVax003Vax004 vax003AECons stdin
 
 # copy .wib file to /gbdb
     mkdir -p /gbdb/hivVax003Vax004/wib
     cp vax003AECons.wib /gbdb/hivVax003Vax004/wib
 
 # do the same for protein conservation track
 
     mkdir aa
     cd aa
 
 # create .wig file
     gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AEAaCons.wig vax003AEAaConsensus.fa
 
 # encode and load the .wib file   
     wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \
     | hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin
 
     cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib
 
 #########################################################################
 # CREATE MAF TRACKS FOR VAX003 AE STRAIN
 
     mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE
     cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE
 
 # create a script file, doall
 
     hgsql hivVax003Vax004 -N -e \
     'select id from dnaSeq where id like "%T%"'\
     |sed -e 's/ss/do1 ss/g' >doall
 
 # create one line script file, do1, with the following line in it:
 
     hgsql hivVax003Vax004 -N -e  "select id, seq from vax003AEMsa where id='${1}'"
 
     chmod +x do*
 
 # run the script to get the .tab file with all MSA sequences of VAX003 AE
     doall >Vax003Vax004.tab
 # convert .tab into .fa file
     tabToFa Vax003Vax004
 
 # grab the base alignment sequence
     echo ">hivVax003Vax004" >Vax003Vax004.aln
     hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"'  >> Vax003Vax004.aln
 
 # prepare an interium file, jjAll.mfa
     cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
     echo = >>jjAll.mfa
 
 # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
 
     xmfaToMafVax003Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
     cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
 
 #    rm jjAll.mfa j.out
 
     cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
 
 # copy .maf to /gbdb.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax004Maf
     cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p
 
     hgLoadMaf hivVax003Vax004 vax004Maf
 
 # create another copy for protein MAF.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax004AaMaf 
     cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf
     hgLoadMaf hivVax003Vax004 vax004AaMaf
 
 #########################################################################
 # COPY OVER MSA TABLES FOR VAX003 B STRAIN
 
     mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
     cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
 
 # get table definition
     mysqldump -d hivVax003Vax004 vax003BMsa -u medcat -p$HGPSWD|hgsql hivVax003Vax004
 
 # load the table   
     hgsql hivVax003Vax004 -e "insert into vax003BMsa select * from hivVax003Vax004.vax003BMsa"
 
 #########################################################################
 # CREATE CONSERVATION TRACKS FOR VAX003 B STRAIN
 
     mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B
     cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B
 
 # create the .wig file and .fa file of the consensus sequence.
     gsidMsa hivVax003Vax004 vax003BMsa HXB2 6228 vax003BCons.wig vax003BConsensus.fa
 
 # encode and load the wig file
     wigEncode vax003BCons.wig stdout vax003BCons.wib \
     | hgLoadWiggle hivVax003Vax004 vax003BCons stdin
 
 # copy .wib file to /gbdb
     mkdir -p /gbdb/hivVax003Vax004/wib
     cp vax003BCons.wib /gbdb/hivVax003Vax004/wib
 
 # do the same for protein conservation track
 
     mkdir aa
     cd aa
 
 # create .wig file
     gsidAaMsa2 hivVax003Vax004 vax003BMsa HXB2 6228 vax003BAaCons.wig vax003BAaConsensus.fa
 
 # encode and load the .wib file   
     wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \
     | hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin
 
     cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib
 
 #########################################################################
 # CREATE MAF TRACKS FOR VAX003 B STRAIN
 
     mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
     cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
 
 # create a script file, doall
 
     hgsql hivVax003Vax004 -N -e \
     'select id from dnaSeq where id like "%T%"'\
     |sed -e 's/ss/do1 ss/g' >doall
 
 # create one line script file, do1, with the following line in it:
 
     hgsql hivVax003Vax004 -N -e  "select id, seq from vax003BMsa where id='${1}'"
 
     chmod +x do*
 
 # run the script to get the .tab file with all MSA sequences of VAX004
     doall >Vax003Vax004.tab
 # convert .tab into .fa file
     tabToFa Vax003Vax004
 
 # grab the base alignment sequence
     echo ">hivVax003Vax004" >Vax003Vax004.aln
     hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"'  >> Vax003Vax004.aln
 
 # prepare an interium file, jjAll.mfa
     cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
     echo = >>jjAll.mfa
 
 # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
 
     xmfaToMafVax003Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
     cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
 
     rm jjAll.mfa j.out
 
     cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
 
 # copy .maf to /gbdb.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax003BMaf
     cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p
     
     hgLoadMaf hivVax003Vax004 vax003BMaf
 
 # create another copy for protein MAF.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax003BAaMaf 
     cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf
     hgLoadMaf hivVax003Vax004 vax003BAaMaf
 
 #########################################################################
 # Process, check, correct and load VAX003 clinical tables
 
     mkdir -p /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical
     cd /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical
 
 # copy over original raw data files
     
     cp -p /cluster/store12/medical/vaxGen/fromEvie/VAX003/*.txt .
     ls -l *.txt
 
 # shorten the file name and run processRaw to generate .sql def
 
     cp "VAX003 RNACD4 match with sequence ID_20080501_EMZ18Jun.txt" VAX003_RNACD4080501.txt
     processRaw VAX003_RNACD4080501.txt
 
     hgsql hiv1 -e 'drop database  hivVax003Vax004Build'
     hgsql hiv1 -e 'create database  hivVax003Vax004Build'
 
 # create hivVax003Vax004Build DB to be used in this build process
     hgsql hiv1 -e 'create database hivVax003Vax004Build'
 
 # load raw demographic and RNACD3 data
     hgsql hivVax003Vax004Build < GSID_DEMOG_SEQNO_003Raw.sql
     hgsql hivVax003Vax004Build < VAX003_RNACD4080501Raw.sql
 
     hgsql hivVax003Vax004Build -e \
     'load data local infile "GSID_DEMOG_SEQNO_003.txt" into table GSID_DEMOG_SEQNO_003Raw ignore 1 lines'
 
     hgsql hivVax003Vax004Build -e \
     'load data local infile "VAX003_RNACD4080501.txt" into table VAX003_RNACD4080501Raw ignore 1 lines'
 
 # build initial gsidClinicRecTemp table ...
 
     hgsql hivVax003Vax004Build -N -e \
     'select "specId",GSID, MBLabcd, DRNACD4, "rna","cd4" from VAX003_RNACD4080501Raw' \
     >gsidClinicRecTemp.tab
 
     hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecTemp'
     getDbTableDef hiv1 gsidClinicRecTemp >gsidClinicRecTemp.sql
     hgsql hivVax003Vax004Build < gsidClinicRecTemp.sql
 
     hgsql hivVax003Vax004Build -e \
     'load data local infile "gsidClinicRecTemp.tab" into table gsidClinicRecTemp'
 
 # build subjLabcode table ...
 
     hgsql hivVax003Vax004Build -N -e \
     'select GSID, MBLabcd from VAX003_RNACD4080501Raw where MBLabcd!=""' \
     | sort -u > subjLabcode.tab
 
     hgsql hivVax003Vax004Build -e "drop table subjLabcode"
 
     getDbTableDef hiv1 subjLabcode > subjLabcode.sql
     hgsql hivVax003Vax004Build < subjLabcode.sql
     hgsql hivVax003Vax004Build -e \
     'load data local infile "subjLabcode.tab" into table subjLabcode'
 
 # fill in labCode in gsidClinicRecTemp
 
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecTemp t, subjLabcode l set t.labCode=l.labCode where t.subjId=l.subjId'
 
 # fill in specimenId
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecTemp t, GSID_DEMOG_SEQNO_003Raw r set t.specimenId=r.SpecimenNumber where t.subjId=r.subjId and r.SpecimenNumber !=""'
 
 # fill in RNA 
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.hivQuan=r.RNA where t.subjId=r.GSID and t.daysCollection=r.DRNACD4' 
 
 # fill in CD4
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.cd4Count=r.CD4ABS where t.subjId=r.GSID and t.daysCollection=r.DRNACD4'
 
 # change RNA "399" to "200" (which will be displayed as "<400")
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecTemp set hivQuan="200" where hivQuan = "399"'
 
 # update cd4 NULL ...
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecTemp set cd4Count="NULL" where cd4Count="."'
 
 # Echo update daysCollection NULL ...
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecTemp set daysCollection="NULL" where daysCollection="."'
 
 # build gsidClinicRecNew table
 
     hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecTemp ' \
     |uniq |sed -e 's/NULL/-1/g'  > gsidClinicRecNew.tab
 
     hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecNew'
     hgsql hivVax003Vax004Build < gsidClinicRecNew.sql
     hgsql hivVax003Vax004Build -e \
     'load data local infile "gsidClinicRecNew.tab" into table gsidClinicRecNew'
 
 # set NULLs
 
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecNew set hivQuan=NULL where hivQuan=-1'
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecNew set cd4Count=NULL where cd4Count=-1'
 
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecNew set daysCollection=NULL where daysCollection=-1'
 
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecNew set specimenId=NULL where specimenId="specId"'
 
 # build gsidClinicRecWithSeqNew table
 
     hgsql hivVax003Vax004Build -N -e \
     'select c.* from GSID_DEMOG_SEQNO_003Raw r,gsidClinicRecNew c where SequenceDataStatus="Sequence data exist" and r.subjId=c.subjId and r.labCode=c.labCode' \
     |sort -u |sed -e 's/NULL/-1/g' >gsidClinicRecWithSeqNew.tmp
 
     hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecWithSeqNew'
     hgsql hivVax003Vax004Build < gsidClinicRecWithSeqNew.sql
     hgsql hivVax003Vax004Build -e \
     'load data local infile "gsidClinicRecWithSeqNew.tmp" into table gsidClinicRecWithSeqNew'
     rm gsidClinicRecWithSeqNew.tmp
 
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecWithSeqNew set hivQuan=NULL where hivQuan=-1'
     hgsql hivVax003Vax004Build -e \
     'update gsidClinicRecWithSeqNew set cd4Count=NULL where cd4Count=-1'
 
 # compare with previous old data
     hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecWithSeqNew' |sort -u >j.n
     hgsql hivVax003Vax004 -N -e 'select * from gsidClinicRecWithSeq'|sort -u  >j.o
     diff j.o j.n |grep -v "GSID4" >j.diff
 
 # load the newly build data into hivVax003Vax004 tables
 
     hgsql hivVax003Vax004 -e 'delete from gsidClinicRec where subjId like "GSID3%"'
     hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
 
    hgsql hivVax003Vax004 -e \
     "insert into gsidClinicRec select * from hivVax003Vax004Build.gsidClinicRecNew"
 
     hgsql hivVax003Vax004 -e \
     "insert into gsidClinicRecWithSeq select * from hivVax003Vax004Build.gsidClinicRecWithSeqNew"
 
 
 #########################################################################
 # Build the gsidSubjSeq table (used by Table View).
 
    gsidSubjSeq hivVax003Vax004 dnaSeqId > j.dna
    gsidSubjSeq hivVax003Vax004 aaSeqId > j.aa
 
    cut -f 1 j.dna >j.1
    cut -f 1 j.aa  >j.2
 
    cut -f 2 j.dna  >j.3
    cut -f 2 j.aa   >j.4
 
    paste j.1 j.3 j.4> gsidSubjSeq.tab
 
    hgsql hivVax003Vax004 -e 'delete from gsidSubjSeq'
    hgsql hivVax003Vax004 -e \
    'load data local infile "gsidSubjSeq.tab" into table gsidSubjSeq'
 
    rm j.1 j.2 j.3 j.4 j.dna j.aa
 
 #################################################################################
 # RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 AE STRAIN (DONE, 7/10/08, Fan)
 
 # First cut the vax003AEMsa sequences so that they start with VPV and end with REKR
 
 # rename existing vax003AEMsa table as vax003AEMsaOld
     hgsql hivVax003Vax004 –e 'rename table vax003AEMsa to vax003AEMsaOld'
 
 # use BLAT to visually decide what are the appropriate starting and ending positions to cut.
 
     hgsql hivVax003Vax004 -N -e \
     'select id,substring(seq, 124, 1743) from vax003AEMsaOld' >vax003AEMsa.tab
 
     tabToFa vax003AEMsa
 # use resulting vax003AEMsa.fa to check that the cut is correct, and then load the new MSA sequences.
 
     hgsql hivVax003Vax004 < ~/src/hg/lib/vax003AEMsa.sql
     hgsql hivVax003Vax004 -N -e 'load data local infile "vax003AEMsa.tab" into table vax003AEMsa'
 
 # RE-BUILD CONSERVATION TRACKS FOR VAX003 AE STRAIN  
 
     mkdir -p \
     /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild
     cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild
 
 # create the .wig file and .fa file of the consensus sequence.
     gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AECons.wig vax003AEConsensus.fa
 # encode and load the wig file
     wigEncode vax003AECons.wig stdout vax003AECons.wib \
     | hgLoadWiggle hivVax003Vax004 vax003AECons stdin
 
 # copy .wib file to /gbdb
     mkdir -p /gbdb/hivVax003Vax004/wib
     cp vax003AECons.wib /gbdb/hivVax003Vax004/wib
 
 # do the same for protein conservation track
 
     mkdir aa
     cd aa
 
 # create .wig file
     gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AEAaCons.wig vax003AEAaConsensus.fa
 
 # encode and load the .wib file   
     wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \
     | hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin
 
     cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib
 
 # CREATE MAF TRACKS FOR VAX003 AE STRAIN
 
     mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild
     cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild
 
 # create a script file, doall
 
     hgsql hivVax003Vax004 -N -e \
     'select id from dnaSeq where id like "%T%"'\
     |sed -e 's/ss/do1 ss/g' >doall
 
 # create one line script file, do1, with the following line in it:
 
     hgsql hivVax003Vax004 -N -e  "select id, seq from vax003AEMsa where id='${1}'"
 
     chmod +x do*
 
 # run the script to get the .tab file with all MSA sequences of VAX003 AE
     doall >Vax003Vax004.tab
 # convert .tab into .fa file
     tabToFa Vax003Vax004
 
 # grab the base alignment sequence
     echo ">hivVax003Vax004" >Vax003Vax004.aln
     hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"'  >> Vax003Vax004.aln
 
 # prepare an interium file, jjAll.mfa
     cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
     echo = >>jjAll.mfa
 
 # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
 
     xmfaToMafVax003Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
     cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
 
 #    rm jjAll.mfa j.out
 
     cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
 
 # copy .maf to /gbdb.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax003AEMaf
     cp chr1.maf /gbdb/hivVax003Vax004/vax003AEMaf -p
 
     hgLoadMaf hivVax003Vax004 vax003AEMaf
 
 # create another copy for protein MAF.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax003AEMaf 
     cp -p chr1.maf /gbdb/hivVax003Vax004/vax003AEAaMaf
     hgLoadMaf hivVax003Vax004 vax003AEAaMaf
 
 #################################################################################
 # RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 B STRAIN (DONE, 7/10/08, Fan)
 # First cut the vax003BMsa sequences so that they start with VPV and end with REKR
 
 # rename existing vax003BMsa table as vax003BMsaOld
     hgsql hivVax003Vax004 –e 'rename table vax003BMsa to vax003BMsaOld'
 
 # use BLAT to visually decide what are the appropriate starting and ending positions to cut.
 
     hgsql hivVax003Vax004 -N -e \
     'select id,substring(seq, 121, 1620) from vax003BMsaOld' >vax003BMsa.tab
 
     tabToFa vax003BMsa
 # use resulting vax003BMsa.fa to check that the cut is correct, then load the new MSA sequences.
 
     hgsql hivVax003Vax004 -e 'drop table vax003BMsa' 
     hgsql hivVax003Vax004 < ~/src/hg/lib/vax003BMsa.sql
     hgsql hivVax003Vax004 -N -e 'load data local infile "vax003BMsa.tab" into table vax003BMsa'
 
 # RE-BUILD CONSERVATION TRACKS FOR VAX003 B STRAIN  
 
     mkdir -p \
     /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild
     cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild
 
 # create the .wig file and .fa file of the consensus sequence.
     gsidMsa hivVax003Vax004 vax003BMsa HXB2 6348 vax003BCons.wig vax003BConsensus.fa
 # encode and load the wig file
     wigEncode vax003BCons.wig stdout vax003BCons.wib \
     | hgLoadWiggle hivVax003Vax004 vax003BCons stdin
 
 # copy .wib file to /gbdb
     mkdir -p /gbdb/hivVax003Vax004/wib
     cp vax003BCons.wib /gbdb/hivVax003Vax004/wib
 
 # do the same for protein conservation track
 
     mkdir aa
     cd aa
 
 # create .wig file
     gsidAaMsa2 hivVax003Vax004 vax0 '03BMsa HXB2 6348 vax003BAaCons.wig vax003BAaConsensus.fa
 
 # encode and load the .wib file   
     wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \
     | hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin
 
     cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib
 
 # CREATE MAF TRACKS FOR VAX003 B STRAIN
 
     mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild
     cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild
 
 # create a script file, doall
 
     hgsql hivVax003Vax004 -N -e \
     'select id from dnaSeq where id like "%T%"'\
     |sed -e 's/ss/do1 ss/g' >doall
 
 # create one line script file, do1, with the following line in it:
 
     hgsql hivVax003Vax004 -N -e  "select id, seq from vax003BMsa where id='${1}'"
 
     chmod +x do*
 
 # run the script to get the .tab file with all MSA sequences of VAX003 B
     doall >Vax003Vax004.tab
 # convert .tab into .fa file
     tabToFa Vax003Vax004
 
 # grab the base alignment sequence
     echo ">hivVax003Vax004" >Vax003Vax004.aln
     hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"'  >> Vax003Vax004.aln
 
 # prepare an interium file, jjAll.mfa
     cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
     echo = >>jjAll.mfa
 
 # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
 
     xmfaToMafVax003Vax004 jjAll.mfa j.out  org1=hivVax003Vax004
     cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
 
 #    rm jjAll.mfa j.out
 
     cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
 
 # copy .maf to /gbdb.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax003BMaf
     cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p
 
     hgLoadMaf hivVax003Vax004 vax003BMaf
 
 # create another copy for protein MAF.
 
     mkdir -p  /gbdb/hivVax003Vax004/vax003BMaf 
     cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf
     hgLoadMaf hivVax003Vax004 vax003BAaMaf
 
 ########################################################################################
 # REBUILD THE gsidClinicRecWithSeq TABLE (DONE 11/03/08, Fan)
 
 mkdir -p /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild
 
 cd /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild
 
 #copy table gsidClinicRecNew into gsidClinicRecNew2
 
 cp ~/hg/lib/gsidClinicRec.sql gsidClinicRecNew2.sql
 vi gsidClinicRecNew2.sql
 hgsql  hivVax003Vax004Build -e 'drop table gsidClinicRecNew2'
 hgsql  hivVax003Vax004Build < gsidClinicRecNew2.sql
 
 hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew' >gsidClinicRecNew2.tab
 
 hgsql  hivVax003Vax004Build -e \
 'load data local infile "gsidClinicRecNew2.tab" into table  gsidClinicRecNew2'
 
 hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >j.tab
 diff j.tab gsidClinicRecNew2.tab
 
 # change 200 to 399 so that they are consistent between two table
 
 hgsql hivVax003Vax004Build -e 'update gsidClinicRecNew2 set hivQuan=399 where hivQuan=200'
 hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >jj.tab
 diff j.tab jj.tab
 
 # rebuild the gsidClinicRecWithSeq table for VAX003 subjects
 
 hgsql hivVax003Vax004Build -N -e \
 'select c.specimenId, c.subjId, c.labCode, c.daysCollection, r.RNA, r.CD4ABS from gsidClinicRecNew2 c, VAX003_RNACD4080501Raw r where c.specimenId=r.SpecimenNo and c.daysCollection=r.DRNACD4 and c.subjId=r.GSID and r.RNA=c.hivQuan and r.CD4ABS=c.cd4Count and r.SpecimenNo != ""' >j.out
 
 cut -f 1-4 j.out >j.1
 
 # revert back from 399 to 200
 cut -f 5-6 j.out  | sed -e 's/399\t/200\t/' >j.2
 
 paste j.1 j.2 >gsidClinicRecWithSeq.vax003.tab
 
 hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
 hgsql hivVax003Vax004 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
 
 # update the same table for the other 3 genomes
 
 hgsql hivgne8v2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
 hgsql hivgne8v2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
 
 hgsql hivmn2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
 hgsql hivmn2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
 
 hgsql hiva244 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
 hgsql hiva244 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
 
 ######################################################################################
 
 # Create VAX003 subtype B Positive Selection tracks for hivVax003Vax004
 
 cd /hive/groups/gsid/medical/hiv/hivVax003Vax004
 mkdir posSelection
 cd posSelection
 
 # BLAT /hive/groups/gsid/medical/hiv/hiva244/posSelection/BMsaAaConsensus.fa
 # against hivVax003Vax004 base genome, select psl without header option
 # cut and paste the result into the file BMsa.psl
 
 hgLoadPsl -keep -table=BMsaPsl -nobin hivVax003Vax004 BMsa.psl 
 
 # will get the following error:
 #Processing BMsa.psl
 #Can't start query:
 #LOAD DATA CONCURRENT  INFILE
 '/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl'
 INTO TABLE BMsaPsl
 
 #mySQL error 13: Can't get stat of
 '/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl'
 (Errcode: 13)
 
 # load manually then
 
 hgsql hivVax003Vax004
 load data local infile "BMsa.psl" into table BMsaPsl;
 quit
 
 # build the positive selection tracks for model 2 and model 8.
 
 gsidPosSelect hivVax003Vax004  BMsaPsl posSelBuild pSelectBModel2
 posSelModel2.bed
 hgLoadBed hivVax003Vax004 posSelModel2 posSelModel2.bed
 
 gsidPosSelect hivVax003Vax004 BMsaPsl posSelBuild pSelectBModel8
 posSelModel8.bed
 hgLoadBed hivVax003Vax004 posSelModel8 posSelModel8.bed
 
 ##########################################################################
 # BUILD THE POSITIVE SELECTION TRACKS FOR VAX003 SUBTYPE AE
 
     ssh hiv1
     mkdir -p /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004
     cd /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004
 
 # BLAT
 # /cluster/hive/groups/gsid/medical/hiv/posSelection/AE/AEMsaAaConsensus.fa
 # against hivVax003Vax004 base genome, select psl without header option
 # cut and paste the result into the file AEMsa.psl
 
 hgLoadPsl -keep -table=AEMsaPsl -nobin hivVax003Vax004 AEMsa.psl 
 
 # will get the following error:
 #Processing AEMsa.psl
 #Can't start query:
 #LOAD DATA CONCURRENT  INFILE
 #'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l'
 INTO TABLE AEMsaPsl
 
 #mySQL error 13: Can't get stat of
 #'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l'
 (Errcode: 13)
 
 # load manually then
 
 hgsql hivVax003Vax004
 load data local infile "AEMsa.psl" into table AEMsaPsl;
 quit
 
 # build positive selection tracks for model 2 and model 8.
 
 gsidPosSelect hivVax003Vax004  AEMsaPsl posSelBuild pSelectAEModel2
 posSelAEModel2.bed
 hgLoadBed hivVax003Vax004 posSelAEModel2 posSelAEModel2.bed
 
 gsidPosSelect hivVax003Vax004 AEMsaPsl posSelBuild pSelectAEModel8
 posSelAEModel8.bed
 hgLoadBed hivVax003Vax004 posSelAEModel8 posSelAEModel8.bed
 
 ##########################################################################
 # BUILD THE POSITIVE SELECTION TRACKS FOR VAX004 (Done Fan, 3/2/09)
 
 cd /cluster/hive/groups/gsid/medical/hiv/posSelection
 
 mkdir vax004
 cd vax004
 
 # Since there are large number (12) of subclasses and 4 HIV genomes,
 # this has to be automated.  So create the do1, do2, do3 script first. 
 # Please note that the do3 script works on all 4 HIV genomes.
 
 cat << '_EOF_' >do1
 #do1.1
 
 mkdir -p $1
 # start with clean slate
 rm $1/*
 
 cp -p /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1
 
 cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/data/$1.nex $1
 
 cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1
 
 #do1.2
 
 cd $1
 cat VAX004-$1-sites.paml|grep "+-" >j.tmp
 get1stHalf j.tmp >$1Model2.paml
 cat $1Model2.paml |\
 sed -e 's/+-//g'|\
 sed -e 's/ \* / xxx /g'|\
 sed -e 's/\*//g'|\
 sed -e 's/xxx/\*/g'|\
 sed -e 's/  / /g'|\
 sed -e 's/  / /g'|\
 sed -e 's/  / /g'|\
 sed -e 's/  / /g'|\
 sed -e 's/  / /g'|\
 sed -e 's/ //'|\
 sed -e 's/ /\t/g' > vax004$1Model2.tab
 
 hgLoadSqlTab -notOnServer hgFixed vax004$1Model2 ~/src/hg/lib/posSelectModel.sql vax004$1Model2.tab
 
 get2ndHalf j.tmp >$1Model8.paml
 cat $1Model8.paml |\
 sed -e 's/+-//g'|\
 sed -e 's/ \* / xxx /g'|\
 sed -e 's/\*//g'|\
 sed -e 's/xxx/\*/g'|\
 sed -e 's/  / /g'|\
 sed -e 's/  / /g'|\
 sed -e 's/  / /g'|\
 sed -e 's/  / /g'|\
 sed -e 's/  / /g'|\
 sed -e 's/ //'|\
 sed -e 's/ /\t/g' > vax004$1Model8.tab
 
 hgLoadSqlTab -notOnServer hgFixed vax004$1Model8 ~/src/hg/lib/posSelectModel.sql vax004$1Model8.tab
 
 rm j.tmp
 
 #do1.3
 
 cat $1.nex|grep 'U\.'|\
     sed -e 's/  / /g'|\
     sed -e 's/  / /g'|\
     sed -e 's/  / /g'|\
     sed -e 's/  / /g'|\
     sed -e 's/  / /g'|\
     sed -e 's/ /\t/g' >vax004$1Msa.tab
 chmod +rx *.tab
 
 hgLoadSqlTab -notOnServer hgFixed vax004$1Msa /hive/groups/gsid/medical/hiv/posSelection/vax004/dnaSeq.sql vax004$1Msa.tab
 cd ..
 
 #do1.4
 
 hgsql -N -e "select concat('do2 ${1} ', id) from hgFixed.vax004${1}Msa limit 1" >doit
 chmod +x doit
 doit
 '_EOF_'
 
 chmod +x do1
 
 cat << '_EOF_' >do2
 gsidAaMsa2 hgFixed vax004$1Msa $2 1 $1/$1Msa.wig $1/$1MsaAaConsensus.fa
 '_EOF_'
 chmod +x do2
 
 cat << '_EOF_' >do3
 # process hivVax003Vax004
 hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivVax003Vax004 ', port) from blatServers where db='hivVax003Vax004' and isTrans=1" >doBlat
 chmod +x doBlat
 ./doBlat
 
 cd $1
 gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
 hgLoadBed     hivVax003Vax004 posSelVax004$1Model2                  posSelVax004$1Model2.bed
 
 gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
 hgLoadBed     hivVax003Vax004 posSelVax004$1Model8                  posSelVax004$1Model8.bed
 cd ..
 
 # process hivmn2
 hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivmn2 ', port) from blatServers where db='hivmn2' and isTrans=1" >doBlat
 chmod +x doBlat
 ./doBlat
 
 cd $1
 gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
 hgLoadBed     hivmn2 posSelVax004$1Model2                  posSelVax004$1Model2.bed
 
 gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
 hgLoadBed     hivmn2 posSelVax004$1Model8                  posSelVax004$1Model8.bed
 cd ..
 
 # process hivgne8v2
 hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivgne8v2 ', port) from blatServers where db='hivgne8v2' and isTrans=1" >doBlat
 chmod +x doBlat
 ./doBlat
 
 cd $1
 gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
 hgLoadBed     hivgne8v2 posSelVax004$1Model2                  posSelVax004$1Model2.bed
 
 gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
 hgLoadBed     hivgne8v2 posSelVax004$1Model8                  posSelVax004$1Model8.bed
 cd ..
 
 # process hiva244
 hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hiva244 ', port) from blatServers where db='hiva244' and isTrans=1" >doBlat
 chmod +x doBlat
 ./doBlat
 
 cd $1
 gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
 hgLoadBed     hiva244 posSelVax004$1Model2                  posSelVax004$1Model2.bed
 
 gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
 hgLoadBed     hiva244 posSelVax004$1Model8                  posSelVax004$1Model8.bed
 cd ..
 '_EOF_'
 
 chmod +x do3
 
 # Now run the scripts for all subclasses.
 
 do1 Hispanic
 do1 Midwest
 do1 Northeast
 do1 Other
 do1 South
 do1 Southwest
 do1 Westcoast
 do1 White
 do1 Asian
 do1 Black
 do1 pla
 do1 vac
 
 # BTW, do1 calls do2
 
 do3 Hispanic
 do3 Midwest
 do3 Northeast
 do3 Other
 do3 South
 do3 Southwest
 do3 Westcoast
 do3 White
 do3 Asian
 do3 Black
 do3 pla
 do3 vac
 
 ##########################################################################
+# BUILD IMMUNO TABLE FOR IMMUOGENICITY DATA.  (DONE 9/29/09, Fan)
+
+cd ~/kent/src/hg/lib
+
+hgsql hgFixed -e 'drop table immunoRaw'
+hgsql hgFixed < immunoRaw.sql
+
+cd /cluster/hive/groups/gsid/medical/hiv/hivmnV3
+
+# copy over raw data file.
+cp -p /cluster/hive/groups/gsid/medical/hiv/immunogenicityData/orig/Antibody_Inf_LPLT_31Jul_forFH.txt jRaw.txt
+
+# replace empty file with "-3", "N/A" with "-1", and "N/D" with "-2"
+cat jRaw.txt|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/N\/A/-1/g'|\
+sed -e 's/N\/D/-2/g' >jRaw2.txt
+
+# load data into immunoRaw table
+hgsql hgFixed -e 'load data local infile "jRaw2.txt" into table immunoRaw ignore 1 lines'
+
+hgsql hgFixed -e 'update immunoRaw set LastTrAntiGP120="-3" where LastTrAntiGP120=""'
+
+hgsql hgFixed -e 'select * from immunoRaw' >immuno.tab
+
+# load data into immuno table
+hgsql hgFixed -e 'drop table immuno'
+hgsql hgFixed < immuno.sql
+hgsql hgFixed -e 'load data local infile "immuno.tab" into table immuno'
+
+# replace NULL with -1 for SDayLastPTest.
+
+hgsql hgFixed -e 'update immuno set SDayLastPTest = NULL where SDayLastPTest = -1'
+#######################################################################################