src/hg/makeDb/doc/hivVax003Vax004.txt 1.8
1.8 2009/03/02 21:41:47 fanhsu
Checked in the section of the posSelection tracks build for vax004.
Index: src/hg/makeDb/doc/hivVax003Vax004.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hivVax003Vax004.txt,v
retrieving revision 1.7
retrieving revision 1.8
diff -b -B -U 1000000 -r1.7 -r1.8
--- src/hg/makeDb/doc/hivVax003Vax004.txt 14 Jan 2009 18:10:52 -0000 1.7
+++ src/hg/makeDb/doc/hivVax003Vax004.txt 2 Mar 2009 21:41:47 -0000 1.8
@@ -1,751 +1,927 @@
# for emacs: -*- mode: sh; -*-
#########################################################################
# hivVax003Vax004 DATABASE BUILD (DONE 5/20/08, Fan)
ssh hiv1
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004
cd /cluster/store12/medical/hiv/hivVax003Vax004
#########################################################################
# Create hivVax003Vax004 DB
hgsql –e 'create database hivVax003Vax004'
# Ask admin to copy over all tables from hiv1 to hivVax003Vax004
#########################################################################
# CREATE MAF TRACKS FOR VAX004
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%U%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax004Msa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX004
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax004Msa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_U/U/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax004Maf
cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p
hgLoadMaf hivVax003Vax004 vax004Maf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax004AaMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf
hgLoadMaf hivVax003Vax004 vax004AaMaf
#########################################################################
# CREATE CONSERVATION TRACKS FOR VAX003 AE STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE
cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE
# create the .wig file and .fa file of the consensus sequence.
gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AECons.wig vax003AEConsensus.fa
# encode and load the wig file
wigEncode vax003AECons.wig stdout vax003AECons.wib \
| hgLoadWiggle hivVax003Vax004 vax003AECons stdin
# copy .wib file to /gbdb
mkdir -p /gbdb/hivVax003Vax004/wib
cp vax003AECons.wib /gbdb/hivVax003Vax004/wib
# do the same for protein conservation track
mkdir aa
cd aa
# create .wig file
gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AEAaCons.wig vax003AEAaConsensus.fa
# encode and load the .wib file
wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin
cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib
#########################################################################
# CREATE MAF TRACKS FOR VAX003 AE STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%T%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax003AEMsa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX003 AE
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
# rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax004Maf
cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p
hgLoadMaf hivVax003Vax004 vax004Maf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax004AaMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf
hgLoadMaf hivVax003Vax004 vax004AaMaf
#########################################################################
# COPY OVER MSA TABLES FOR VAX003 B STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
# get table definition
mysqldump -d hivVax003Vax004 vax003BMsa -u medcat -p$HGPSWD|hgsql hivVax003Vax004
# load the table
hgsql hivVax003Vax004 -e "insert into vax003BMsa select * from hivVax003Vax004.vax003BMsa"
#########################################################################
# CREATE CONSERVATION TRACKS FOR VAX003 B STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B
cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B
# create the .wig file and .fa file of the consensus sequence.
gsidMsa hivVax003Vax004 vax003BMsa HXB2 6228 vax003BCons.wig vax003BConsensus.fa
# encode and load the wig file
wigEncode vax003BCons.wig stdout vax003BCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003BCons stdin
# copy .wib file to /gbdb
mkdir -p /gbdb/hivVax003Vax004/wib
cp vax003BCons.wib /gbdb/hivVax003Vax004/wib
# do the same for protein conservation track
mkdir aa
cd aa
# create .wig file
gsidAaMsa2 hivVax003Vax004 vax003BMsa HXB2 6228 vax003BAaCons.wig vax003BAaConsensus.fa
# encode and load the .wib file
wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin
cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib
#########################################################################
# CREATE MAF TRACKS FOR VAX003 B STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%T%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax003BMsa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX004
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax003BMaf
cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p
hgLoadMaf hivVax003Vax004 vax003BMaf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax003BAaMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf
hgLoadMaf hivVax003Vax004 vax003BAaMaf
#########################################################################
# Process, check, correct and load VAX003 clinical tables
mkdir -p /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical
cd /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical
# copy over original raw data files
cp -p /cluster/store12/medical/vaxGen/fromEvie/VAX003/*.txt .
ls -l *.txt
# shorten the file name and run processRaw to generate .sql def
cp "VAX003 RNACD4 match with sequence ID_20080501_EMZ18Jun.txt" VAX003_RNACD4080501.txt
processRaw VAX003_RNACD4080501.txt
hgsql hiv1 -e 'drop database hivVax003Vax004Build'
hgsql hiv1 -e 'create database hivVax003Vax004Build'
# create hivVax003Vax004Build DB to be used in this build process
hgsql hiv1 -e 'create database hivVax003Vax004Build'
# load raw demographic and RNACD3 data
hgsql hivVax003Vax004Build < GSID_DEMOG_SEQNO_003Raw.sql
hgsql hivVax003Vax004Build < VAX003_RNACD4080501Raw.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "GSID_DEMOG_SEQNO_003.txt" into table GSID_DEMOG_SEQNO_003Raw ignore 1 lines'
hgsql hivVax003Vax004Build -e \
'load data local infile "VAX003_RNACD4080501.txt" into table VAX003_RNACD4080501Raw ignore 1 lines'
# build initial gsidClinicRecTemp table ...
hgsql hivVax003Vax004Build -N -e \
'select "specId",GSID, MBLabcd, DRNACD4, "rna","cd4" from VAX003_RNACD4080501Raw' \
>gsidClinicRecTemp.tab
hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecTemp'
getDbTableDef hiv1 gsidClinicRecTemp >gsidClinicRecTemp.sql
hgsql hivVax003Vax004Build < gsidClinicRecTemp.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "gsidClinicRecTemp.tab" into table gsidClinicRecTemp'
# build subjLabcode table ...
hgsql hivVax003Vax004Build -N -e \
'select GSID, MBLabcd from VAX003_RNACD4080501Raw where MBLabcd!=""' \
| sort -u > subjLabcode.tab
hgsql hivVax003Vax004Build -e "drop table subjLabcode"
getDbTableDef hiv1 subjLabcode > subjLabcode.sql
hgsql hivVax003Vax004Build < subjLabcode.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "subjLabcode.tab" into table subjLabcode'
# fill in labCode in gsidClinicRecTemp
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp t, subjLabcode l set t.labCode=l.labCode where t.subjId=l.subjId'
# fill in specimenId
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp t, GSID_DEMOG_SEQNO_003Raw r set t.specimenId=r.SpecimenNumber where t.subjId=r.subjId and r.SpecimenNumber !=""'
# fill in RNA
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.hivQuan=r.RNA where t.subjId=r.GSID and t.daysCollection=r.DRNACD4'
# fill in CD4
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.cd4Count=r.CD4ABS where t.subjId=r.GSID and t.daysCollection=r.DRNACD4'
# change RNA "399" to "200" (which will be displayed as "<400")
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp set hivQuan="200" where hivQuan = "399"'
# update cd4 NULL ...
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp set cd4Count="NULL" where cd4Count="."'
# Echo update daysCollection NULL ...
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecTemp set daysCollection="NULL" where daysCollection="."'
# build gsidClinicRecNew table
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecTemp ' \
|uniq |sed -e 's/NULL/-1/g' > gsidClinicRecNew.tab
hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecNew'
hgsql hivVax003Vax004Build < gsidClinicRecNew.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "gsidClinicRecNew.tab" into table gsidClinicRecNew'
# set NULLs
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecNew set hivQuan=NULL where hivQuan=-1'
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecNew set cd4Count=NULL where cd4Count=-1'
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecNew set daysCollection=NULL where daysCollection=-1'
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecNew set specimenId=NULL where specimenId="specId"'
# build gsidClinicRecWithSeqNew table
hgsql hivVax003Vax004Build -N -e \
'select c.* from GSID_DEMOG_SEQNO_003Raw r,gsidClinicRecNew c where SequenceDataStatus="Sequence data exist" and r.subjId=c.subjId and r.labCode=c.labCode' \
|sort -u |sed -e 's/NULL/-1/g' >gsidClinicRecWithSeqNew.tmp
hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecWithSeqNew'
hgsql hivVax003Vax004Build < gsidClinicRecWithSeqNew.sql
hgsql hivVax003Vax004Build -e \
'load data local infile "gsidClinicRecWithSeqNew.tmp" into table gsidClinicRecWithSeqNew'
rm gsidClinicRecWithSeqNew.tmp
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecWithSeqNew set hivQuan=NULL where hivQuan=-1'
hgsql hivVax003Vax004Build -e \
'update gsidClinicRecWithSeqNew set cd4Count=NULL where cd4Count=-1'
# compare with previous old data
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecWithSeqNew' |sort -u >j.n
hgsql hivVax003Vax004 -N -e 'select * from gsidClinicRecWithSeq'|sort -u >j.o
diff j.o j.n |grep -v "GSID4" >j.diff
# load the newly build data into hivVax003Vax004 tables
hgsql hivVax003Vax004 -e 'delete from gsidClinicRec where subjId like "GSID3%"'
hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivVax003Vax004 -e \
"insert into gsidClinicRec select * from hivVax003Vax004Build.gsidClinicRecNew"
hgsql hivVax003Vax004 -e \
"insert into gsidClinicRecWithSeq select * from hivVax003Vax004Build.gsidClinicRecWithSeqNew"
#########################################################################
# Build the gsidSubjSeq table (used by Table View).
gsidSubjSeq hivVax003Vax004 dnaSeqId > j.dna
gsidSubjSeq hivVax003Vax004 aaSeqId > j.aa
cut -f 1 j.dna >j.1
cut -f 1 j.aa >j.2
cut -f 2 j.dna >j.3
cut -f 2 j.aa >j.4
paste j.1 j.3 j.4> gsidSubjSeq.tab
hgsql hivVax003Vax004 -e 'delete from gsidSubjSeq'
hgsql hivVax003Vax004 -e \
'load data local infile "gsidSubjSeq.tab" into table gsidSubjSeq'
rm j.1 j.2 j.3 j.4 j.dna j.aa
#################################################################################
# RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 AE STRAIN (DONE, 7/10/08, Fan)
# First cut the vax003AEMsa sequences so that they start with VPV and end with REKR
# rename existing vax003AEMsa table as vax003AEMsaOld
hgsql hivVax003Vax004 –e 'rename table vax003AEMsa to vax003AEMsaOld'
# use BLAT to visually decide what are the appropriate starting and ending positions to cut.
hgsql hivVax003Vax004 -N -e \
'select id,substring(seq, 124, 1743) from vax003AEMsaOld' >vax003AEMsa.tab
tabToFa vax003AEMsa
# use resulting vax003AEMsa.fa to check that the cut is correct, and then load the new MSA sequences.
hgsql hivVax003Vax004 < ~/src/hg/lib/vax003AEMsa.sql
hgsql hivVax003Vax004 -N -e 'load data local infile "vax003AEMsa.tab" into table vax003AEMsa'
# RE-BUILD CONSERVATION TRACKS FOR VAX003 AE STRAIN
mkdir -p \
/cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild
cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild
# create the .wig file and .fa file of the consensus sequence.
gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AECons.wig vax003AEConsensus.fa
# encode and load the wig file
wigEncode vax003AECons.wig stdout vax003AECons.wib \
| hgLoadWiggle hivVax003Vax004 vax003AECons stdin
# copy .wib file to /gbdb
mkdir -p /gbdb/hivVax003Vax004/wib
cp vax003AECons.wib /gbdb/hivVax003Vax004/wib
# do the same for protein conservation track
mkdir aa
cd aa
# create .wig file
gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AEAaCons.wig vax003AEAaConsensus.fa
# encode and load the .wib file
wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin
cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib
# CREATE MAF TRACKS FOR VAX003 AE STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%T%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax003AEMsa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX003 AE
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
# rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax003AEMaf
cp chr1.maf /gbdb/hivVax003Vax004/vax003AEMaf -p
hgLoadMaf hivVax003Vax004 vax003AEMaf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax003AEMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax003AEAaMaf
hgLoadMaf hivVax003Vax004 vax003AEAaMaf
#################################################################################
# RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 B STRAIN (DONE, 7/10/08, Fan)
# First cut the vax003BMsa sequences so that they start with VPV and end with REKR
# rename existing vax003BMsa table as vax003BMsaOld
hgsql hivVax003Vax004 –e 'rename table vax003BMsa to vax003BMsaOld'
# use BLAT to visually decide what are the appropriate starting and ending positions to cut.
hgsql hivVax003Vax004 -N -e \
'select id,substring(seq, 121, 1620) from vax003BMsaOld' >vax003BMsa.tab
tabToFa vax003BMsa
# use resulting vax003BMsa.fa to check that the cut is correct, then load the new MSA sequences.
hgsql hivVax003Vax004 -e 'drop table vax003BMsa'
hgsql hivVax003Vax004 < ~/src/hg/lib/vax003BMsa.sql
hgsql hivVax003Vax004 -N -e 'load data local infile "vax003BMsa.tab" into table vax003BMsa'
# RE-BUILD CONSERVATION TRACKS FOR VAX003 B STRAIN
mkdir -p \
/cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild
cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild
# create the .wig file and .fa file of the consensus sequence.
gsidMsa hivVax003Vax004 vax003BMsa HXB2 6348 vax003BCons.wig vax003BConsensus.fa
# encode and load the wig file
wigEncode vax003BCons.wig stdout vax003BCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003BCons stdin
# copy .wib file to /gbdb
mkdir -p /gbdb/hivVax003Vax004/wib
cp vax003BCons.wib /gbdb/hivVax003Vax004/wib
# do the same for protein conservation track
mkdir aa
cd aa
# create .wig file
gsidAaMsa2 hivVax003Vax004 vax0 '03BMsa HXB2 6348 vax003BAaCons.wig vax003BAaConsensus.fa
# encode and load the .wib file
wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \
| hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin
cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib
# CREATE MAF TRACKS FOR VAX003 B STRAIN
mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild
cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild
# create a script file, doall
hgsql hivVax003Vax004 -N -e \
'select id from dnaSeq where id like "%T%"'\
|sed -e 's/ss/do1 ss/g' >doall
# create one line script file, do1, with the following line in it:
hgsql hivVax003Vax004 -N -e "select id, seq from vax003BMsa where id='${1}'"
chmod +x do*
# run the script to get the .tab file with all MSA sequences of VAX003 B
doall >Vax003Vax004.tab
# convert .tab into .fa file
tabToFa Vax003Vax004
# grab the base alignment sequence
echo ">hivVax003Vax004" >Vax003Vax004.aln
hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"' >> Vax003Vax004.aln
# prepare an interium file, jjAll.mfa
cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa
echo = >>jjAll.mfa
# Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf
xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004
cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp
# rm jjAll.mfa j.out
cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf
# copy .maf to /gbdb.
mkdir -p /gbdb/hivVax003Vax004/vax003BMaf
cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p
hgLoadMaf hivVax003Vax004 vax003BMaf
# create another copy for protein MAF.
mkdir -p /gbdb/hivVax003Vax004/vax003BMaf
cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf
hgLoadMaf hivVax003Vax004 vax003BAaMaf
########################################################################################
# REBUILD THE gsidClinicRecWithSeq TABLE (DONE 11/03/08, Fan)
mkdir -p /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild
cd /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild
#copy table gsidClinicRecNew into gsidClinicRecNew2
cp ~/hg/lib/gsidClinicRec.sql gsidClinicRecNew2.sql
vi gsidClinicRecNew2.sql
hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecNew2'
hgsql hivVax003Vax004Build < gsidClinicRecNew2.sql
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew' >gsidClinicRecNew2.tab
hgsql hivVax003Vax004Build -e \
'load data local infile "gsidClinicRecNew2.tab" into table gsidClinicRecNew2'
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >j.tab
diff j.tab gsidClinicRecNew2.tab
# change 200 to 399 so that they are consistent between two table
hgsql hivVax003Vax004Build -e 'update gsidClinicRecNew2 set hivQuan=399 where hivQuan=200'
hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >jj.tab
diff j.tab jj.tab
# rebuild the gsidClinicRecWithSeq table for VAX003 subjects
hgsql hivVax003Vax004Build -N -e \
'select c.specimenId, c.subjId, c.labCode, c.daysCollection, r.RNA, r.CD4ABS from gsidClinicRecNew2 c, VAX003_RNACD4080501Raw r where c.specimenId=r.SpecimenNo and c.daysCollection=r.DRNACD4 and c.subjId=r.GSID and r.RNA=c.hivQuan and r.CD4ABS=c.cd4Count and r.SpecimenNo != ""' >j.out
cut -f 1-4 j.out >j.1
# revert back from 399 to 200
cut -f 5-6 j.out | sed -e 's/399\t/200\t/' >j.2
paste j.1 j.2 >gsidClinicRecWithSeq.vax003.tab
hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivVax003Vax004 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
# update the same table for the other 3 genomes
hgsql hivgne8v2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivgne8v2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
hgsql hivmn2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hivmn2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
hgsql hiva244 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"'
hgsql hiva244 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq'
######################################################################################
# Create VAX003 subtype B Positive Selection tracks for hivVax003Vax004
cd /hive/groups/gsid/medical/hiv/hivVax003Vax004
mkdir posSelection
cd posSelection
# BLAT /hive/groups/gsid/medical/hiv/hiva244/posSelection/BMsaAaConsensus.fa
# against hivVax003Vax004 base genome, select psl without header option
# cut and paste the result into the file BMsa.psl
hgLoadPsl -keep -table=BMsaPsl -nobin hivVax003Vax004 BMsa.psl
# will get the following error:
#Processing BMsa.psl
#Can't start query:
#LOAD DATA CONCURRENT INFILE
'/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl'
INTO TABLE BMsaPsl
#mySQL error 13: Can't get stat of
'/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl'
(Errcode: 13)
# load manually then
hgsql hivVax003Vax004
load data local infile "BMsa.psl" into table BMsaPsl;
quit
# build the positive selection tracks for model 2 and model 8.
gsidPosSelect hivVax003Vax004 BMsaPsl posSelBuild pSelectBModel2
posSelModel2.bed
hgLoadBed hivVax003Vax004 posSelModel2 posSelModel2.bed
gsidPosSelect hivVax003Vax004 BMsaPsl posSelBuild pSelectBModel8
posSelModel8.bed
hgLoadBed hivVax003Vax004 posSelModel8 posSelModel8.bed
##########################################################################
# BUILD THE POSITIVE SELECTION TRACKS FOR VAX003 SUBTYPE AE
ssh hiv1
mkdir -p /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004
cd /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004
# BLAT
# /cluster/hive/groups/gsid/medical/hiv/posSelection/AE/AEMsaAaConsensus.fa
# against hivVax003Vax004 base genome, select psl without header option
# cut and paste the result into the file AEMsa.psl
hgLoadPsl -keep -table=AEMsaPsl -nobin hivVax003Vax004 AEMsa.psl
# will get the following error:
#Processing AEMsa.psl
#Can't start query:
#LOAD DATA CONCURRENT INFILE
#'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l'
INTO TABLE AEMsaPsl
#mySQL error 13: Can't get stat of
#'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l'
(Errcode: 13)
# load manually then
hgsql hivVax003Vax004
load data local infile "AEMsa.psl" into table AEMsaPsl;
quit
# build positive selection tracks for model 2 and model 8.
gsidPosSelect hivVax003Vax004 AEMsaPsl posSelBuild pSelectAEModel2
posSelAEModel2.bed
hgLoadBed hivVax003Vax004 posSelAEModel2 posSelAEModel2.bed
gsidPosSelect hivVax003Vax004 AEMsaPsl posSelBuild pSelectAEModel8
posSelAEModel8.bed
hgLoadBed hivVax003Vax004 posSelAEModel8 posSelAEModel8.bed
##########################################################################
+# BUILD THE POSITIVE SELECTION TRACKS FOR VAX004 (Done Fan, 3/2/09)
+cd /cluster/hive/groups/gsid/medical/hiv/posSelection
+
+mkdir vax004
+cd vax004
+
+# Since there are large number (12) of subclasses and 4 HIV genomes,
+# this has to be automated. So create the do1, do2, do3 script first.
+# Please note that the do3 script works on all 4 HIV genomes.
+
+cat << '_EOF_' >do1
+#do1.1
+
+mkdir -p $1
+# start with clean slate
+rm $1/*
+
+cp -p /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1
+
+cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/data/$1.nex $1
+
+cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1
+
+#do1.2
+
+cd $1
+cat VAX004-$1-sites.paml|grep "+-" >j.tmp
+get1stHalf j.tmp >$1Model2.paml
+cat $1Model2.paml |\
+sed -e 's/+-//g'|\
+sed -e 's/ \* / xxx /g'|\
+sed -e 's/\*//g'|\
+sed -e 's/xxx/\*/g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ //'|\
+sed -e 's/ /\t/g' > vax004$1Model2.tab
+
+hgLoadSqlTab -notOnServer hgFixed vax004$1Model2 ~/src/hg/lib/posSelectModel.sql vax004$1Model2.tab
+
+get2ndHalf j.tmp >$1Model8.paml
+cat $1Model8.paml |\
+sed -e 's/+-//g'|\
+sed -e 's/ \* / xxx /g'|\
+sed -e 's/\*//g'|\
+sed -e 's/xxx/\*/g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ / /g'|\
+sed -e 's/ //'|\
+sed -e 's/ /\t/g' > vax004$1Model8.tab
+
+hgLoadSqlTab -notOnServer hgFixed vax004$1Model8 ~/src/hg/lib/posSelectModel.sql vax004$1Model8.tab
+
+rm j.tmp
+
+#do1.3
+
+cat $1.nex|grep 'U\.'|\
+ sed -e 's/ / /g'|\
+ sed -e 's/ / /g'|\
+ sed -e 's/ / /g'|\
+ sed -e 's/ / /g'|\
+ sed -e 's/ / /g'|\
+ sed -e 's/ /\t/g' >vax004$1Msa.tab
+chmod +rx *.tab
+
+hgLoadSqlTab -notOnServer hgFixed vax004$1Msa /hive/groups/gsid/medical/hiv/posSelection/vax004/dnaSeq.sql vax004$1Msa.tab
+cd ..
+
+#do1.4
+
+hgsql -N -e "select concat('do2 ${1} ', id) from hgFixed.vax004${1}Msa limit 1" >doit
+chmod +x doit
+doit
+'_EOF_'
+
+chmod +x do1
+
+cat << '_EOF_' >do2
+gsidAaMsa2 hgFixed vax004$1Msa $2 1 $1/$1Msa.wig $1/$1MsaAaConsensus.fa
+'_EOF_'
+chmod +x do2
+
+cat << '_EOF_' >do3
+# process hivVax003Vax004
+hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivVax003Vax004 ', port) from blatServers where db='hivVax003Vax004' and isTrans=1" >doBlat
+chmod +x doBlat
+./doBlat
+
+cd $1
+gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
+hgLoadBed hivVax003Vax004 posSelVax004$1Model2 posSelVax004$1Model2.bed
+
+gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
+hgLoadBed hivVax003Vax004 posSelVax004$1Model8 posSelVax004$1Model8.bed
+cd ..
+
+# process hivmn2
+hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivmn2 ', port) from blatServers where db='hivmn2' and isTrans=1" >doBlat
+chmod +x doBlat
+./doBlat
+
+cd $1
+gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
+hgLoadBed hivmn2 posSelVax004$1Model2 posSelVax004$1Model2.bed
+
+gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
+hgLoadBed hivmn2 posSelVax004$1Model8 posSelVax004$1Model8.bed
+cd ..
+
+# process hivgne8v2
+hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivgne8v2 ', port) from blatServers where db='hivgne8v2' and isTrans=1" >doBlat
+chmod +x doBlat
+./doBlat
+
+cd $1
+gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
+hgLoadBed hivgne8v2 posSelVax004$1Model2 posSelVax004$1Model2.bed
+
+gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
+hgLoadBed hivgne8v2 posSelVax004$1Model8 posSelVax004$1Model8.bed
+cd ..
+
+# process hiva244
+hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hiva244 ', port) from blatServers where db='hiva244' and isTrans=1" >doBlat
+chmod +x doBlat
+./doBlat
+
+cd $1
+gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed
+hgLoadBed hiva244 posSelVax004$1Model2 posSelVax004$1Model2.bed
+
+gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed
+hgLoadBed hiva244 posSelVax004$1Model8 posSelVax004$1Model8.bed
+cd ..
+'_EOF_'
+
+chmod +x do3
+
+# Now run the scripts for all subclasses.
+
+do1 Hispanic
+do1 Midwest
+do1 Northeast
+do1 Other
+do1 South
+do1 Southwest
+do1 Westcoast
+do1 White
+do1 Asian
+do1 Black
+do1 pla
+do1 vac
+
+# BTW, do1 calls do2
+
+do3 Hispanic
+do3 Midwest
+do3 Northeast
+do3 Other
+do3 South
+do3 Southwest
+do3 Westcoast
+do3 White
+do3 Asian
+do3 Black
+do3 pla
+do3 vac
+
+##########################################################################