src/hg/makeDb/doc/hivVax003Vax004.txt 1.9
1.9 2009/09/29 15:48:30 fanhsu
Added the section of immuno build.
Index: src/hg/makeDb/doc/hivVax003Vax004.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hivVax003Vax004.txt,v
retrieving revision 1.8
retrieving revision 1.9
diff -b -B -U 1000000 -r1.8 -r1.9
--- src/hg/makeDb/doc/hivVax003Vax004.txt 2 Mar 2009 21:41:47 -0000 1.8
+++ src/hg/makeDb/doc/hivVax003Vax004.txt 29 Sep 2009 15:48:30 -0000 1.9
@@ -1,927 +1,965 @@
# for emacs: -*- mode: sh; -*-
#########################################################################
# hivVax003Vax004 DATABASE BUILD (DONE 5/20/08, Fan)
ssh hiv1
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004
cd /cluster/store12/medical/hiv/hivVax003Vax004
#########################################################################
# Create hivVax003Vax004 DB
hgsql –e 'create database hivVax003Vax004'
# Ask admin to copy over all tables from hiv1 to hivVax003Vax004
#########################################################################
# CREATE MAF TRACKS FOR VAX004
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%U%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax004Msa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX004
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax004Msa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_U/U/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax004Maf
cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p
hgLoadMaf hivVax003Vax004 vax004Maf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax004AaMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf
hgLoadMaf hivVax003Vax004 vax004AaMaf
#########################################################################
# CREATE CONSERVATION TRACKS FOR VAX003 AE STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE
cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE
# create the .wig file and .fa file of the consensus sequence.
gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AECons.wig vax003AEConsensus.fa
# encode and load the wig file
wigEncode vax003AECons.wig stdout vax003AECons.wib \
| hgLoadWiggle hivVax003Vax004 vax003AECons stdin
# copy .wib file to /gbdb
mkdir -p /gbdb/hivVax003Vax004/wib
cp vax003AECons.wib /gbdb/hivVax003Vax004/wib
# do the same for protein conservation track
mkdir aa
cd aa
# create .wig file
gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AEAaCons.wig vax003AEAaConsensus.fa
# encode and load the .wib file
wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin
cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib
#########################################################################
# CREATE MAF TRACKS FOR VAX003 AE STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%T%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax003AEMsa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX003 AE
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
# rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax004Maf
cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p
hgLoadMaf hivVax003Vax004 vax004Maf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax004AaMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf
hgLoadMaf hivVax003Vax004 vax004AaMaf
#########################################################################
# COPY OVER MSA TABLES FOR VAX003 B STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
# get table definition
mysqldump -d hivVax003Vax004 vax003BMsa -u medcat -p$HGPSWD|hgsql hivVax003Vax004
# load the table
hgsql hivVax003Vax004 -e "insert into vax003BMsa select * from hivVax003Vax004.vax003BMsa"
#########################################################################
# CREATE CONSERVATION TRACKS FOR VAX003 B STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B
cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B
# create the .wig file and .fa file of the consensus sequence.
gsidMsa hivVax003Vax004 vax003BMsa HXB2 6228 vax003BCons.wig vax003BConsensus.fa
# encode and load the wig file
wigEncode vax003BCons.wig stdout vax003BCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003BCons stdin
# copy .wib file to /gbdb
mkdir -p /gbdb/hivVax003Vax004/wib
cp vax003BCons.wib /gbdb/hivVax003Vax004/wib
# do the same for protein conservation track
mkdir aa
cd aa
# create .wig file
gsidAaMsa2 hivVax003Vax004 vax003BMsa HXB2 6228 vax003BAaCons.wig vax003BAaConsensus.fa
# encode and load the .wib file
wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin
cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib
#########################################################################
# CREATE MAF TRACKS FOR VAX003 B STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%T%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax003BMsa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX004
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax003BMaf
cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p
hgLoadMaf hivVax003Vax004 vax003BMaf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax003BAaMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf
hgLoadMaf hivVax003Vax004 vax003BAaMaf
#########################################################################
# Process, check, correct and load VAX003 clinical tables
mkdir -p /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical
cd /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical
# copy over original raw data files
cp -p /cluster/store12/medical/vaxGen/fromEvie/VAX003/*.txt .
ls -l *.txt
# shorten the file name and run processRaw to generate .sql def
cp "VAX003 RNACD4 match with sequence ID_20080501_EMZ18Jun.txt" VAX003_RNACD4080501.txt
processRaw VAX003_RNACD4080501.txt
hgsql hiv1 -e 'drop database hivVax003Vax004Build'
hgsql hiv1 -e 'create database hivVax003Vax004Build'
# create hivVax003Vax004Build DB to be used in this build process
hgsql hiv1 -e 'create database hivVax003Vax004Build'
# load raw demographic and RNACD3 data
hgsql hivVax003Vax004Build < GSID_DEMOG_SEQNO_003Raw.sql
hgsql hivVax003Vax004Build < VAX003_RNACD4080501Raw.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "GSID_DEMOG_SEQNO_003.txt" into table GSID_DEMOG_SEQNO_003Raw ignore 1 lines'
hgsql hivVax003Vax004Build -e \
'load data local infile "VAX003_RNACD4080501.txt" into table VAX003_RNACD4080501Raw ignore 1 lines'
# build initial gsidClinicRecTemp table ...
hgsql hivVax003Vax004Build -N -e \
'select "specId",GSID, MBLabcd, DRNACD4, "rna","cd4" from VAX003_RNACD4080501Raw' \
>gsidClinicRecTemp.tab
hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecTemp'
getDbTableDef hiv1 gsidClinicRecTemp >gsidClinicRecTemp.sql
hgsql hivVax003Vax004Build < gsidClinicRecTemp.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "gsidClinicRecTemp.tab" into table gsidClinicRecTemp'
# build subjLabcode table ...
hgsql hivVax003Vax004Build -N -e \
'select GSID, MBLabcd from VAX003_RNACD4080501Raw where MBLabcd!=""' \
| sort -u > subjLabcode.tab
hgsql hivVax003Vax004Build -e "drop table subjLabcode"
getDbTableDef hiv1 subjLabcode > subjLabcode.sql
hgsql hivVax003Vax004Build < subjLabcode.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "subjLabcode.tab" into table subjLabcode'
# fill in labCode in gsidClinicRecTemp
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp t, subjLabcode l set t.labCode=l.labCode where t.subjId=l.subjId'
# fill in specimenId
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp t, GSID_DEMOG_SEQNO_003Raw r set t.specimenId=r.SpecimenNumber where t.subjId=r.subjId and r.SpecimenNumber !=""'
# fill in RNA
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.hivQuan=r.RNA where t.subjId=r.GSID and t.daysCollection=r.DRNACD4'
# fill in CD4
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.cd4Count=r.CD4ABS where t.subjId=r.GSID and t.daysCollection=r.DRNACD4'
# change RNA "399" to "200" (which will be displayed as "<400")
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp set hivQuan="200" where hivQuan = "399"'
# update cd4 NULL ...
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp set cd4Count="NULL" where cd4Count="."'
# Echo update daysCollection NULL ...
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp set daysCollection="NULL" where daysCollection="."'
# build gsidClinicRecNew table
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecTemp ' \
|uniq |sed -e 's/NULL/-1/g' > gsidClinicRecNew.tab
hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecNew'
hgsql hivVax003Vax004Build < gsidClinicRecNew.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "gsidClinicRecNew.tab" into table gsidClinicRecNew'
# set NULLs
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecNew set hivQuan=NULL where hivQuan=-1'
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecNew set cd4Count=NULL where cd4Count=-1'
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecNew set daysCollection=NULL where daysCollection=-1'
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecNew set specimenId=NULL where specimenId="specId"'
# build gsidClinicRecWithSeqNew table
hgsql hivVax003Vax004Build -N -e \
'select c.* from GSID_DEMOG_SEQNO_003Raw r,gsidClinicRecNew c where SequenceDataStatus="Sequence data exist" and r.subjId=c.subjId and r.labCode=c.labCode' \
|sort -u |sed -e 's/NULL/-1/g' >gsidClinicRecWithSeqNew.tmp
hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecWithSeqNew'
hgsql hivVax003Vax004Build < gsidClinicRecWithSeqNew.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "gsidClinicRecWithSeqNew.tmp" into table gsidClinicRecWithSeqNew'
rm gsidClinicRecWithSeqNew.tmp
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecWithSeqNew set hivQuan=NULL where hivQuan=-1'
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecWithSeqNew set cd4Count=NULL where cd4Count=-1'
# compare with previous old data
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecWithSeqNew' |sort -u >j.n
hgsql hivVax003Vax004 -N -e 'select * from gsidClinicRecWithSeq'|sort -u >j.o
diff j.o j.n |grep -v "GSID4" >j.diff
# load the newly build data into hivVax003Vax004 tables
hgsql hivVax003Vax004 -e 'delete from gsidClinicRec where subjId like "GSID3%"'
hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivVax003Vax004 -e \
"insert into gsidClinicRec select * from hivVax003Vax004Build.gsidClinicRecNew"
hgsql hivVax003Vax004 -e \
"insert into gsidClinicRecWithSeq select * from hivVax003Vax004Build.gsidClinicRecWithSeqNew"
#########################################################################
# Build the gsidSubjSeq table (used by Table View).
gsidSubjSeq hivVax003Vax004 dnaSeqId > j.dna
gsidSubjSeq hivVax003Vax004 aaSeqId > j.aa
cut -f 1 j.dna >j.1
cut -f 1 j.aa >j.2
cut -f 2 j.dna >j.3
cut -f 2 j.aa >j.4
paste j.1 j.3 j.4> gsidSubjSeq.tab
hgsql hivVax003Vax004 -e 'delete from gsidSubjSeq'
hgsql hivVax003Vax004 -e \
'load data local infile "gsidSubjSeq.tab" into table gsidSubjSeq'
rm j.1 j.2 j.3 j.4 j.dna j.aa
#################################################################################
# RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 AE STRAIN (DONE, 7/10/08, Fan)
# First cut the vax003AEMsa sequences so that they start with VPV and end with REKR
# rename existing vax003AEMsa table as vax003AEMsaOld
hgsql hivVax003Vax004 –e 'rename table vax003AEMsa to vax003AEMsaOld'
# use BLAT to visually decide what are the appropriate starting and ending positions to cut.
hgsql hivVax003Vax004 -N -e \
'select id,substring(seq, 124, 1743) from vax003AEMsaOld' >vax003AEMsa.tab
tabToFa vax003AEMsa
# use resulting vax003AEMsa.fa to check that the cut is correct, and then load the new MSA sequences.
hgsql hivVax003Vax004 < ~/src/hg/lib/vax003AEMsa.sql
hgsql hivVax003Vax004 -N -e 'load data local infile "vax003AEMsa.tab" into table vax003AEMsa'
# RE-BUILD CONSERVATION TRACKS FOR VAX003 AE STRAIN
mkdir -p \
/cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild
cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild
# create the .wig file and .fa file of the consensus sequence.
gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AECons.wig vax003AEConsensus.fa
# encode and load the wig file
wigEncode vax003AECons.wig stdout vax003AECons.wib \
| hgLoadWiggle hivVax003Vax004 vax003AECons stdin
# copy .wib file to /gbdb
mkdir -p /gbdb/hivVax003Vax004/wib
cp vax003AECons.wib /gbdb/hivVax003Vax004/wib
# do the same for protein conservation track
mkdir aa
cd aa
# create .wig file
gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AEAaCons.wig vax003AEAaConsensus.fa
# encode and load the .wib file
wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin
cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib
# CREATE MAF TRACKS FOR VAX003 AE STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%T%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax003AEMsa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX003 AE
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
# rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax003AEMaf
cp chr1.maf /gbdb/hivVax003Vax004/vax003AEMaf -p
hgLoadMaf hivVax003Vax004 vax003AEMaf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax003AEMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax003AEAaMaf
hgLoadMaf hivVax003Vax004 vax003AEAaMaf
#################################################################################
# RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 B STRAIN (DONE, 7/10/08, Fan)
# First cut the vax003BMsa sequences so that they start with VPV and end with REKR
# rename existing vax003BMsa table as vax003BMsaOld
hgsql hivVax003Vax004 –e 'rename table vax003BMsa to vax003BMsaOld'
# use BLAT to visually decide what are the appropriate starting and ending positions to cut.
hgsql hivVax003Vax004 -N -e \
'select id,substring(seq, 121, 1620) from vax003BMsaOld' >vax003BMsa.tab
tabToFa vax003BMsa
# use resulting vax003BMsa.fa to check that the cut is correct, then load the new MSA sequences.
hgsql hivVax003Vax004 -e 'drop table vax003BMsa'
hgsql hivVax003Vax004 < ~/src/hg/lib/vax003BMsa.sql
hgsql hivVax003Vax004 -N -e 'load data local infile "vax003BMsa.tab" into table vax003BMsa'
# RE-BUILD CONSERVATION TRACKS FOR VAX003 B STRAIN
mkdir -p \
/cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild
cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild
# create the .wig file and .fa file of the consensus sequence.
gsidMsa hivVax003Vax004 vax003BMsa HXB2 6348 vax003BCons.wig vax003BConsensus.fa
# encode and load the wig file
wigEncode vax003BCons.wig stdout vax003BCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003BCons stdin
# copy .wib file to /gbdb
mkdir -p /gbdb/hivVax003Vax004/wib
cp vax003BCons.wib /gbdb/hivVax003Vax004/wib
# do the same for protein conservation track
mkdir aa
cd aa
# create .wig file
gsidAaMsa2 hivVax003Vax004 vax0 '03BMsa HXB2 6348 vax003BAaCons.wig vax003BAaConsensus.fa
# encode and load the .wib file
wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin
cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib
# CREATE MAF TRACKS FOR VAX003 B STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%T%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax003BMsa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX003 B
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
# rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax003BMaf
cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p
hgLoadMaf hivVax003Vax004 vax003BMaf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax003BMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf
hgLoadMaf hivVax003Vax004 vax003BAaMaf
########################################################################################
# REBUILD THE gsidClinicRecWithSeq TABLE (DONE 11/03/08, Fan)
mkdir -p /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild
cd /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild
#copy table gsidClinicRecNew into gsidClinicRecNew2
cp ~/hg/lib/gsidClinicRec.sql gsidClinicRecNew2.sql
vi gsidClinicRecNew2.sql
hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecNew2'
hgsql hivVax003Vax004Build < gsidClinicRecNew2.sql
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew' >gsidClinicRecNew2.tab
hgsql hivVax003Vax004Build -e \
'load data local infile "gsidClinicRecNew2.tab" into table gsidClinicRecNew2'
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >j.tab
diff j.tab gsidClinicRecNew2.tab
# change 200 to 399 so that they are consistent between two table
hgsql hivVax003Vax004Build -e 'update gsidClinicRecNew2 set hivQuan=399 where hivQuan=200'
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >jj.tab
diff j.tab jj.tab
# rebuild the gsidClinicRecWithSeq table for VAX003 subjects
hgsql hivVax003Vax004Build -N -e \
'select c.specimenId, c.subjId, c.labCode, c.daysCollection, r.RNA, r.CD4ABS from gsidClinicRecNew2 c, VAX003_RNACD4080501Raw r where c.specimenId=r.SpecimenNo and c.daysCollection=r.DRNACD4 and c.subjId=r.GSID and r.RNA=c.hivQuan and r.CD4ABS=c.cd4Count and r.SpecimenNo != ""' >j.out
cut -f 1-4 j.out >j.1
# revert back from 399 to 200
cut -f 5-6 j.out | sed -e 's/399\t/200\t/' >j.2
paste j.1 j.2 >gsidClinicRecWithSeq.vax003.tab
hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivVax003Vax004 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
# update the same table for the other 3 genomes
hgsql hivgne8v2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivgne8v2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
hgsql hivmn2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivmn2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
hgsql hiva244 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hiva244 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
######################################################################################
# Create VAX003 subtype B Positive Selection tracks for hivVax003Vax004
cd /hive/groups/gsid/medical/hiv/hivVax003Vax004
mkdir posSelection
cd posSelection
# BLAT /hive/groups/gsid/medical/hiv/hiva244/posSelection/BMsaAaConsensus.fa
# against hivVax003Vax004 base genome, select psl without header option
# cut and paste the result into the file BMsa.psl
hgLoadPsl -keep -table=BMsaPsl -nobin hivVax003Vax004 BMsa.psl
# will get the following error:
#Processing BMsa.psl
#Can't start query:
#LOAD DATA CONCURRENT INFILE
'/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl'
INTO TABLE BMsaPsl
#mySQL error 13: Can't get stat of
'/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl'
(Errcode: 13)
# load manually then
hgsql hivVax003Vax004
load data local infile "BMsa.psl" into table BMsaPsl;
quit
# build the positive selection tracks for model 2 and model 8.
gsidPosSelect hivVax003Vax004 BMsaPsl posSelBuild pSelectBModel2
posSelModel2.bed
hgLoadBed hivVax003Vax004 posSelModel2 posSelModel2.bed
gsidPosSelect hivVax003Vax004 BMsaPsl posSelBuild pSelectBModel8
posSelModel8.bed
hgLoadBed hivVax003Vax004 posSelModel8 posSelModel8.bed
##########################################################################
# BUILD THE POSITIVE SELECTION TRACKS FOR VAX003 SUBTYPE AE
ssh hiv1
mkdir -p /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004
cd /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004
# BLAT
# /cluster/hive/groups/gsid/medical/hiv/posSelection/AE/AEMsaAaConsensus.fa
# against hivVax003Vax004 base genome, select psl without header option
# cut and paste the result into the file AEMsa.psl
hgLoadPsl -keep -table=AEMsaPsl -nobin hivVax003Vax004 AEMsa.psl
# will get the following error:
#Processing AEMsa.psl
#Can't start query:
#LOAD DATA CONCURRENT INFILE
#'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l'
INTO TABLE AEMsaPsl
#mySQL error 13: Can't get stat of
#'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l'
(Errcode: 13)
# load manually then
hgsql hivVax003Vax004
load data local infile "AEMsa.psl" into table AEMsaPsl;
quit
# build positive selection tracks for model 2 and model 8.
gsidPosSelect hivVax003Vax004 AEMsaPsl posSelBuild pSelectAEModel2
posSelAEModel2.bed
hgLoadBed hivVax003Vax004 posSelAEModel2 posSelAEModel2.bed
gsidPosSelect hivVax003Vax004 AEMsaPsl posSelBuild pSelectAEModel8
posSelAEModel8.bed
hgLoadBed hivVax003Vax004 posSelAEModel8 posSelAEModel8.bed
##########################################################################
# BUILD THE POSITIVE SELECTION TRACKS FOR VAX004 (Done Fan, 3/2/09)
cd /cluster/hive/groups/gsid/medical/hiv/posSelection
mkdir vax004
cd vax004
# Since there are large number (12) of subclasses and 4 HIV genomes,
# this has to be automated. So create the do1, do2, do3 script first.
# Please note that the do3 script works on all 4 HIV genomes.
cat << '_EOF_' >do1
#do1.1
mkdir -p $1
# start with clean slate
rm $1/*
cp -p /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1
cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/data/$1.nex $1
cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1
#do1.2
cd $1
cat VAX004-$1-sites.paml|grep "+-" >j.tmp
get1stHalf j.tmp >$1Model2.paml
cat $1Model2.paml |\
sed -e 's/+-//g'|\
sed -e 's/ \* / xxx /g'|\
sed -e 's/\*//g'|\
sed -e 's/xxx/\*/g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ //'|\
sed -e 's/ /\t/g' > vax004$1Model2.tab
hgLoadSqlTab -notOnServer hgFixed vax004$1Model2 ~/src/hg/lib/posSelectModel.sql vax004$1Model2.tab
get2ndHalf j.tmp >$1Model8.paml
cat $1Model8.paml |\
sed -e 's/+-//g'|\
sed -e 's/ \* / xxx /g'|\
sed -e 's/\*//g'|\
sed -e 's/xxx/\*/g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ //'|\
sed -e 's/ /\t/g' > vax004$1Model8.tab
hgLoadSqlTab -notOnServer hgFixed vax004$1Model8 ~/src/hg/lib/posSelectModel.sql vax004$1Model8.tab
rm j.tmp
#do1.3
cat $1.nex|grep 'U\.'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ / /g'|\
sed -e 's/ /\t/g' >vax004$1Msa.tab
chmod +rx *.tab
hgLoadSqlTab -notOnServer hgFixed vax004$1Msa /hive/groups/gsid/medical/hiv/posSelection/vax004/dnaSeq.sql vax004$1Msa.tab
cd ..
#do1.4
hgsql -N -e "select concat('do2 ${1} ', id) from hgFixed.vax004${1}Msa limit 1" >doit
chmod +x doit
doit
'_EOF_'
chmod +x do1
cat << '_EOF_' >do2
gsidAaMsa2 hgFixed vax004$1Msa $2 1 $1/$1Msa.wig $1/$1MsaAaConsensus.fa
'_EOF_'
chmod +x do2
cat << '_EOF_' >do3
# process hivVax003Vax004
hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivVax003Vax004 ', port) from blatServers where db='hivVax003Vax004' and isTrans=1" >doBlat
chmod +x doBlat
./doBlat
cd $1
gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
hgLoadBed hivVax003Vax004 posSelVax004$1Model2 posSelVax004$1Model2.bed
gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
hgLoadBed hivVax003Vax004 posSelVax004$1Model8 posSelVax004$1Model8.bed
cd ..
# process hivmn2
hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivmn2 ', port) from blatServers where db='hivmn2' and isTrans=1" >doBlat
chmod +x doBlat
./doBlat
cd $1
gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
hgLoadBed hivmn2 posSelVax004$1Model2 posSelVax004$1Model2.bed
gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
hgLoadBed hivmn2 posSelVax004$1Model8 posSelVax004$1Model8.bed
cd ..
# process hivgne8v2
hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivgne8v2 ', port) from blatServers where db='hivgne8v2' and isTrans=1" >doBlat
chmod +x doBlat
./doBlat
cd $1
gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
hgLoadBed hivgne8v2 posSelVax004$1Model2 posSelVax004$1Model2.bed
gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
hgLoadBed hivgne8v2 posSelVax004$1Model8 posSelVax004$1Model8.bed
cd ..
# process hiva244
hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hiva244 ', port) from blatServers where db='hiva244' and isTrans=1" >doBlat
chmod +x doBlat
./doBlat
cd $1
gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
hgLoadBed hiva244 posSelVax004$1Model2 posSelVax004$1Model2.bed
gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
hgLoadBed hiva244 posSelVax004$1Model8 posSelVax004$1Model8.bed
cd ..
'_EOF_'
chmod +x do3
# Now run the scripts for all subclasses.
do1 Hispanic
do1 Midwest
do1 Northeast
do1 Other
do1 South
do1 Southwest
do1 Westcoast
do1 White
do1 Asian
do1 Black
do1 pla
do1 vac
# BTW, do1 calls do2
do3 Hispanic
do3 Midwest
do3 Northeast
do3 Other
do3 South
do3 Southwest
do3 Westcoast
do3 White
do3 Asian
do3 Black
do3 pla
do3 vac
##########################################################################
+# BUILD IMMUNO TABLE FOR IMMUOGENICITY DATA. (DONE 9/29/09, Fan)
+
+cd ~/kent/src/hg/lib
+
+hgsql hgFixed -e 'drop table immunoRaw'
+hgsql hgFixed < immunoRaw.sql
+
+cd /cluster/hive/groups/gsid/medical/hiv/hivmnV3
+
+# copy over raw data file.
+cp -p /cluster/hive/groups/gsid/medical/hiv/immunogenicityData/orig/Antibody_Inf_LPLT_31Jul_forFH.txt jRaw.txt
+
+# replace empty file with "-3", "N/A" with "-1", and "N/D" with "-2"
+cat jRaw.txt|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/\t\t/\t-3\t/g'|\
+sed -e 's/N\/A/-1/g'|\
+sed -e 's/N\/D/-2/g' >jRaw2.txt
+
+# load data into immunoRaw table
+hgsql hgFixed -e 'load data local infile "jRaw2.txt" into table immunoRaw ignore 1 lines'
+
+hgsql hgFixed -e 'update immunoRaw set LastTrAntiGP120="-3" where LastTrAntiGP120=""'
+
+hgsql hgFixed -e 'select * from immunoRaw' >immuno.tab
+
+# load data into immuno table
+hgsql hgFixed -e 'drop table immuno'
+hgsql hgFixed < immuno.sql
+hgsql hgFixed -e 'load data local infile "immuno.tab" into table immuno'
+
+# replace NULL with -1 for SDayLastPTest.
+
+hgsql hgFixed -e 'update immuno set SDayLastPTest = NULL where SDayLastPTest = -1'
+#######################################################################################