src/hg/tcga/pBamBam/asBamBam.c 1.8
1.8 2010/03/14 23:58:25 jsanborn
updated
Index: src/hg/tcga/pBamBam/asBamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/pBamBam/asBamBam.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -b -B -U 4 -r1.7 -r1.8
--- src/hg/tcga/pBamBam/asBamBam.c 8 Mar 2010 19:56:11 -0000 1.7
+++ src/hg/tcga/pBamBam/asBamBam.c 14 Mar 2010 23:58:25 -0000 1.8
@@ -474,13 +474,8 @@
continue;
if (NUCS[i] == majorL.nuc && majorL.nuc != majorR.nuc && majorL.nuc != minorR.nuc)
{
- // if (countsL1[i] >= 2 && countsL2[i] >= 1)
- // retVal1 = 1;
- // else if (countsL1[i] > 0 && countsL2[i] > 0 && countsL3[i] > 0)
- // retVal1 = 1;
-
if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
retVal1 = 1;
else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
retVal1 = 1;
@@ -499,13 +494,8 @@
}
if (NUCS[i] == minorL.nuc && (minorL.nuc != majorR.nuc && minorL.nuc != minorR.nuc))
{
- // if (countsL1[i] >= 2 && countsL2[i] >= 1)
- // retVal2 = 1;
- // else if (countsL1[i] > 0 && countsL2[i] > 0 && countsL3[i] > 0)
- // retVal2 = 1;
-
if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
retVal2 = 1;
else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
retVal2 = 1;
@@ -793,147 +783,8 @@
interR.qstrand = -1;
interR.mstrand = -1;
}
-static int inter_struct_variant(dual_pileup_t *d, analysis_params_t *ap)
-{
-int i;
-const bam_pileup1_t *p;
-int maxStartL = -1, maxStartR = -1;
-int nL = 0, nR = 0;
-for (i = 0; i < d->nL; ++i)
- {
- p = d->puL + i;
- if (p->b->core.qual < ap->minMapQ)
- continue;
- bam1_core_t *c = &p->b->core;
- if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
- continue; // read is not paired or unmapped.
- if ((c)->flag & BAM_FMUNMAP)
- continue; // mate is unmapped.
- if ((c)->mtid == (c)->tid)
- continue; // mate is on same chrom. MERGE INTER CODE HERE?
-
- int qstrand = bam1_strand(p->b);
- int mstrand = bam1_mstrand(p->b);
-
- int32_t qstart = (c)->pos;
- int32_t qstop = qstart + (c)->l_qseq;
- int32_t mstart = (c)->mpos;
- int32_t mstop = mstart + (c)->l_qseq;
-
- if (qstart > maxStartL)
- maxStartL = qstart;
-
- if (qstart <= ap->svStartL)
- continue;
- ap->svStartL = qstart;
- nL += 1;
-
- if (interL.tid == -1)
- {
- interL.tid = (c)->tid;
- interL.mtid = (c)->mtid;
- interL.qstart = qstart;
- interL.qstop = qstop;
- interL.mstart = mstart;
- interL.mstop = mstop;
- interL.qstrand = qstrand;
- interL.mstrand = mstrand;
- interL.qual = (c)->qual;
- interL.supp = 1;
- }
- else if (qstart < (interL.qstop + BUF) && qstop > (interL.qstart - BUF) &&
- mstart < (interL.mstop + BUF) && mstop > (interL.mstart - BUF))
- {
- if (interL.qstart > qstart)
- interL.qstart = qstart;
- if (interL.qstop < qstop)
- interL.qstop = qstop;
- if (interL.mstart > mstart)
- interL.mstart = mstart;
- if (interL.mstop < mstop)
- interL.mstop = mstop;
-
- interL.qual += (c)->qual;
- interL.supp += 1;
- }
- }
-
-for (i = 0; i < d->nR; ++i)
- {
- p = d->puR + i;
- if (p->b->core.qual < ap->minMapQ)
- continue;
- bam1_core_t *c = &p->b->core;
- if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
- continue; // read is not paired or unmapped.
- if ((c)->flag & BAM_FMUNMAP)
- continue; // mate is unmapped.
- if ((c)->mtid == (c)->tid)
- continue; // mate is on same chrom.
-
- int qstrand = bam1_strand(p->b);
- int mstrand = bam1_mstrand(p->b);
-
- int32_t qstart = (c)->pos;
- int32_t qstop = qstart + (c)->l_qseq;
- int32_t mstart = (c)->mpos;
- int32_t mstop = mstart + (c)->l_qseq;
-
- if (qstart > maxStartR)
- maxStartR = qstart;
-
- if (qstart <= ap->svStartR)
- continue;
- ap->svStartR = qstart;
- nR += 1;
-
- if (interR.tid == -1)
- {
- interR.tid = (c)->tid;
- interR.mtid = (c)->mtid;
- interR.qstart = qstart;
- interR.qstop = qstop;
- interR.mstart = mstart;
- interR.mstop = mstop;
- interR.mstrand = mstrand;
- interR.qstrand = qstrand;
- interR.qual = (c)->qual;
- interR.supp = 1;
- }
- else if (qstart < (interR.qstop + BUF) && qstop > (interR.qstart - BUF) &&
- mstart < (interR.mstop + BUF) && mstop > (interR.mstart - BUF))
- {
- if (interR.qstart > qstart)
- interR.qstart = qstart;
- if (interR.qstop < qstop)
- interR.qstop = qstop;
- if (interR.mstart > mstart)
- interR.mstart = mstart;
- if (interR.mstop < mstop)
- interR.mstop = mstop;
-
- interR.qual += (c)->qual;
- interR.supp += 1;
- }
- }
-
-if (interL.supp > 0)
- {
- if (maxStartL <= interL.qstop + BUF)
- return 0; // there still might be reads to overlap
-
- /* check average mapping quality of discordants */
- int qual = ceil((double) interL.qual / (double) interL.supp);
- if (interL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
- return 1;
- reset_inter();
- }
-
-return 0;
-}
-
static inline void reset_intra()
{
intraL.tid = -1;
intraL.qstart = -1;
@@ -955,150 +806,203 @@
intraR.qstrand = -1;
intraR.mstrand = -1;
}
-static int intra_struct_variant(dual_pileup_t *d, analysis_params_t *ap)
+static inline void update_sv(const bam_pileup1_t *p, sv_t *sv)
+{
+if (!p)
+ return;
+
+bam1_core_t *c = &p->b->core;
+int qstrand = bam1_strand(p->b);
+int mstrand = bam1_mstrand(p->b);
+int32_t qstart = (c)->pos;
+int32_t qstop = qstart + (c)->l_qseq;
+int32_t mstart = (c)->mpos;
+int32_t mstop = mstart + (c)->l_qseq;
+
+if (sv->tid == -1) // not set yet
+ {
+ sv->tid = (c)->tid;
+ sv->mtid = (c)->mtid;
+ sv->qstart = qstart;
+ sv->qstop = qstop;
+ sv->mstart = mstart;
+ sv->mstop = mstop;
+ sv->mstrand = mstrand;
+ sv->qstrand = qstrand;
+ sv->qual = (c)->qual;
+ sv->supp = 1;
+ }
+else if ((c)->tid == sv->tid && (c)->mtid == sv->mtid &&
+ qstrand == sv->qstrand && mstrand == sv->mstrand &&
+ qstart < (sv->qstop + BUF) && qstop > (sv->qstart - BUF) &&
+ mstart < (sv->mstop + BUF) && mstop > (sv->mstart - BUF))
+ { // overlapping on both sides within +/- BUF
+ if (sv->qstart > qstart)
+ sv->qstart = qstart;
+ if (sv->qstop < qstop)
+ sv->qstop = qstop;
+ if (sv->mstart > mstart)
+ sv->mstart = mstart;
+ if (sv->mstop < mstop)
+ sv->mstop = mstop;
+
+ sv->qual += (c)->qual;
+ sv->supp += 1;
+ }
+}
+
+static inline void update_hq_discordant(const bam_pileup1_t *p, const bam_pileup1_t **inter, const bam_pileup1_t **intra)
+{
+if (!p)
+ return;
+
+bam1_core_t *c = &p->b->core;
+if ((c)->tid != (c)->mtid)
+ {
+ if (!(*inter)) // best inter-chrom not set yet
+ *inter = p;
+ else if ((*inter)->b->core.qual > (c)->qual)
+ *inter = p;
+ }
+else
+ {
+ int qstrand = bam1_strand(p->b);
+ int mstrand = bam1_mstrand(p->b);
+
+ if ((qstrand != mstrand) && abs((c)->isize) <= maxISize)
+ return;
+
+ if (!(*intra))
+ *intra = p;
+ else if ((*intra)->b->core.qual > (c)->qual)
+ *intra = p;
+ }
+}
+
+static inline void struct_var_scan(dual_pileup_t *d, analysis_params_t *ap,
+ int *minStartL, int *minStartR)
{
int i;
-const bam_pileup1_t *p;
-int maxStartL = -1, maxStartR = -1;
-int nL = 0, nR = 0;
+const bam_pileup1_t *p, *pInterL = NULL, *pIntraL = NULL;
+int prevStart = ap->svStartL;
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
if (p->b->core.qual < ap->minMapQ)
continue;
bam1_core_t *c = &p->b->core;
- if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
- continue; // read is not paired or unmapped.
- if ((c)->flag & BAM_FMUNMAP)
- continue; // mate is unmapped.
- if ((c)->mtid != (c)->tid)
- continue; // mate is on different chrom. MERGE INTER CODE HERE?
+ if (!((c)->flag & BAM_FPAIRED))
+ continue; // read is not paired
+ if ((c)->flag & BAM_FUNMAP || ((c)->flag & BAM_FMUNMAP))
+ continue; // read or its mate is unmapped.
+ int32_t qstart = (c)->pos;
+ if (*minStartL == -1 || qstart < *minStartL)
+ *minStartL = qstart;
int qstrand = bam1_strand(p->b);
int mstrand = bam1_mstrand(p->b);
- int orientation = (qstrand == mstrand);
-
- int32_t qstart = (c)->pos;
- int32_t qstop = qstart + (c)->l_qseq;
- int32_t mstart = (c)->mpos;
- int32_t mstop = mstart + (c)->l_qseq;
- if (qstart > maxStartL)
- maxStartL = qstart;
+ if ((c)->mtid == (c)->tid && (qstrand != mstrand) && abs((c)->isize) <= maxISize)
+ continue; // mate is properly paired
- if (orientation || abs((c)->isize) > maxISize)
+ if (ap->svStartL == -1)
{
- if (qstart <= ap->svStartL)
- continue;
ap->svStartL = qstart;
- nL += 1;
+ prevStart = ap->svStartL;
+ }
- if (intraL.tid == -1)
+ if (qstart < ap->svStartL)
+ continue;
+ else if (qstart == ap->svStartL)
+ update_hq_discordant(p, &pInterL, &pIntraL);
+ else
{
- intraL.tid = (c)->tid;
- intraL.qstart = qstart;
- intraL.qstop = qstop;
- intraL.mstart = mstart;
- intraL.mstop = mstop;
- intraL.qstrand = qstrand;
- intraL.mstrand = mstrand;
- intraL.qual = (c)->qual;
- intraL.supp = 1;
- }
- else if (qstart < (intraL.qstop + BUF) && qstop > (intraL.qstart - BUF) &&
- mstart < (intraL.mstop + BUF) && mstop > (intraL.mstart - BUF))
- {
- if (intraL.qstart > qstart)
- intraL.qstart = qstart;
- if (intraL.qstop < qstop)
- intraL.qstop = qstop;
- if (intraL.mstart > mstart)
- intraL.mstart = mstart;
- if (intraL.mstop < mstop)
- intraL.mstop = mstop;
-
- intraL.qual += (c)->qual;
- intraL.supp += 1;
- }
+ ap->svStartL = qstart;
+ break;
}
}
+if (ap->svStartL == prevStart)
+ ap->svStartL += 1;
+prevStart = ap->svStartR;
+const bam_pileup1_t *pInterR = NULL, *pIntraR = NULL;
for (i = 0; i < d->nR; ++i)
{
p = d->puR + i;
if (p->b->core.qual < ap->minMapQ)
continue;
bam1_core_t *c = &p->b->core;
- if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
- continue; // read is not paired or unmapped.
- if ((c)->flag & BAM_FMUNMAP)
- continue; // mate is unmapped.
- if ((c)->mtid != (c)->tid)
- continue; // mate is on different chrom. MERGE INTER CODE HERE?
+ if (!((c)->flag & BAM_FPAIRED))
+ continue; // read is not paired
+ if ((c)->flag & BAM_FUNMAP || ((c)->flag & BAM_FMUNMAP))
+ continue; // read or its mate is unmapped.
+
+ int32_t qstart = (c)->pos;
+ if (*minStartR == -1 || qstart < *minStartR)
+ *minStartR = qstart;
int qstrand = bam1_strand(p->b);
int mstrand = bam1_mstrand(p->b);
- int orientation = (qstrand == mstrand);
-
- int32_t qstart = (c)->pos;
- int32_t qstop = qstart + (c)->l_qseq;
- int32_t mstart = (c)->mpos;
- int32_t mstop = mstart + (c)->l_qseq;
- if (qstart > maxStartR)
- maxStartR = qstart;
+ if ((c)->mtid == (c)->tid && (qstrand != mstrand) && abs((c)->isize) <= maxISize)
+ continue; // mate is properly paired
- if (orientation || abs((c)->isize) > maxISize)
+ if (ap->svStartR == -1)
{
- if (qstart <= ap->svStartR)
- continue;
ap->svStartR = qstart;
- nR += 1;
-
- if (intraR.tid == -1)
- {
- intraR.tid = (c)->tid;
- intraR.qstart = qstart;
- intraR.qstop = qstop;
- intraR.mstart = mstart;
- intraR.mstop = mstop;
- intraR.mstrand = mstrand;
- intraR.qstrand = qstrand;
- intraR.qual = (c)->qual;
- intraR.supp = 1;
+ prevStart = ap->svStartR;
}
- else if (qstart < (intraR.qstop + BUF) && qstop > (intraR.qstart - BUF) &&
- mstart < (intraR.mstop + BUF) && mstop > (intraR.mstart - BUF))
- {
- if (intraR.qstart > qstart)
- intraR.qstart = qstart;
- if (intraR.qstop < qstop)
- intraR.qstop = qstop;
- if (intraR.mstart > mstart)
- intraR.mstart = mstart;
- if (intraR.mstop < mstop)
- intraR.mstop = mstop;
- intraR.qual += (c)->qual;
- intraR.supp += 1;
- }
+ if (qstart < ap->svStartR)
+ continue;
+ else if (qstart == ap->svStartR)
+ update_hq_discordant(p, &pInterR, &pIntraR);
+ else
+ {
+ ap->svStartR = qstart;
+ break;
}
}
+if (ap->svStartR == prevStart)
+ ap->svStartR += 1;
+
+update_sv(pIntraL, &intraL);
+update_sv(pInterL, &interL);
+update_sv(pIntraR, &intraR);
+update_sv(pInterR, &interR);
+}
-if (intraL.supp > 0) // || intraR.supp > 0)
+static inline int is_intra_sv(analysis_params_t *ap, int minStart)
+{
+if (intraL.supp > 0)
{
- if (maxStartL <= intraL.qstop + BUF)
+ if (minStart <= intraL.qstop)
return 0; // there still might be reads to overlap
/* check average mapping quality of discordants */
int qual = ceil((double) intraL.qual / (double) intraL.supp);
if (intraL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
return 1;
reset_intra();
}
+return 0;
+}
+
+static inline int is_inter_sv(analysis_params_t *ap, int minStart)
+{
+if (interL.supp > 0)
+ {
+ if (minStart <= interL.qstop)
+ return 0; // there still might be reads to overlap
+ /* check average mapping quality of discordants */
+ int qual = ceil((double) interL.qual / (double) interL.supp);
+ if (interL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
+ return 1;
+ reset_inter();
+ }
return 0;
}
static int indel_variant(dual_pileup_t *d, analysis_params_t *ap,
@@ -1126,12 +1030,13 @@
return 0;
}
-
static void do_struct_var(dual_pileup_t *d, analysis_params_t *ap)
{
-if (inter_struct_variant(d, ap))
+int minStartL = -1, minStartR = -1;
+struct_var_scan(d, ap, &minStartL, &minStartR);
+if (is_inter_sv(ap, minStartL))
{
printf("SVS\t%s\t%d\t%d\t",
d->hL->target_name[d->tidL], d->posL, d->posL + 1);
@@ -1168,9 +1073,9 @@
reset_inter();
}
-if (intra_struct_variant(d, ap))
+if (is_intra_sv(ap, minStartL))
{
printf("SVS\t%s\t%d\t%d\t",
d->hL->target_name[d->tidL], d->posL, d->posL + 1);