src/hg/tcga/pBamBam/asBamBam.c 1.8

1.8 2010/03/14 23:58:25 jsanborn
updated
Index: src/hg/tcga/pBamBam/asBamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/pBamBam/asBamBam.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -b -B -U 4 -r1.7 -r1.8
--- src/hg/tcga/pBamBam/asBamBam.c	8 Mar 2010 19:56:11 -0000	1.7
+++ src/hg/tcga/pBamBam/asBamBam.c	14 Mar 2010 23:58:25 -0000	1.8
@@ -474,13 +474,8 @@
 	continue;
 
     if (NUCS[i] == majorL.nuc && majorL.nuc != majorR.nuc && majorL.nuc != minorR.nuc)
 	{
-	//      if (countsL1[i] >= 2 && countsL2[i] >= 1)
-	//          retVal1 = 1;
-	//      else if (countsL1[i] > 0 && countsL2[i] > 0 && countsL3[i] > 0)
-	//          retVal1 = 1;
-
 	if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
 	    retVal1 = 1;
 	else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
 	    retVal1 = 1;
@@ -499,13 +494,8 @@
 	}
 
     if (NUCS[i] == minorL.nuc && (minorL.nuc != majorR.nuc && minorL.nuc != minorR.nuc))
 	{
-	//      if (countsL1[i] >= 2 && countsL2[i] >= 1)
-	//          retVal2 = 1;
-	//      else if (countsL1[i] > 0 && countsL2[i] > 0 && countsL3[i] > 0)
-	//          retVal2 = 1;
-
 	if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
 	    retVal2 = 1;
 	else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
 	    retVal2 = 1;
@@ -793,147 +783,8 @@
 interR.qstrand = -1;
 interR.mstrand = -1;
 }
 
-static int inter_struct_variant(dual_pileup_t *d, analysis_params_t *ap)
-{
-int i;
-const bam_pileup1_t *p;
-int maxStartL = -1, maxStartR = -1;
-int nL = 0, nR = 0;
-for (i = 0; i < d->nL; ++i)
-    {
-    p = d->puL + i;
-    if (p->b->core.qual < ap->minMapQ)
-	continue;
-    bam1_core_t *c = &p->b->core;
-    if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
-	continue; // read is not paired or unmapped.
-    if ((c)->flag & BAM_FMUNMAP)
-	continue; // mate is unmapped.
-    if ((c)->mtid == (c)->tid)
-	continue;  // mate is on same chrom. MERGE INTER CODE HERE?
-
-    int qstrand = bam1_strand(p->b);
-    int mstrand = bam1_mstrand(p->b);
-    
-    int32_t qstart = (c)->pos;
-    int32_t qstop  = qstart + (c)->l_qseq;
-    int32_t mstart = (c)->mpos;
-    int32_t mstop = mstart + (c)->l_qseq;
-
-    if (qstart > maxStartL)
-	maxStartL = qstart;
-
-    if (qstart <= ap->svStartL)
-	continue;
-    ap->svStartL = qstart;
-    nL += 1;
-    
-    if (interL.tid == -1)
-	{
-	interL.tid     = (c)->tid;
-	interL.mtid    = (c)->mtid;
-	interL.qstart  = qstart;
-	interL.qstop   = qstop;
-	interL.mstart  = mstart;
-	interL.mstop   = mstop;
-	interL.qstrand = qstrand;
-	interL.mstrand = mstrand;
-	interL.qual    = (c)->qual;
-	interL.supp    = 1;
-	}
-    else if (qstart < (interL.qstop + BUF) && qstop > (interL.qstart - BUF) &&
-	     mstart < (interL.mstop + BUF) && mstop > (interL.mstart - BUF))  
-	{
-	if (interL.qstart > qstart)
-	    interL.qstart = qstart;
-	if (interL.qstop < qstop)
-	    interL.qstop = qstop;
-	if (interL.mstart > mstart)
-	    interL.mstart = mstart;
-	if (interL.mstop < mstop)
-	    interL.mstop = mstop;
-	
-	interL.qual += (c)->qual;
-	interL.supp += 1;
-	}    
-    }
-
-for (i = 0; i < d->nR; ++i)
-    {
-    p = d->puR + i;
-    if (p->b->core.qual < ap->minMapQ)
-	continue;
-    bam1_core_t *c = &p->b->core;
-    if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
-	continue; // read is not paired or unmapped.
-    if ((c)->flag & BAM_FMUNMAP)
-	continue; // mate is unmapped.
-    if ((c)->mtid == (c)->tid)
-	continue;  // mate is on same chrom.
-
-    int qstrand = bam1_strand(p->b);
-    int mstrand = bam1_mstrand(p->b);
-    
-    int32_t qstart = (c)->pos;
-    int32_t qstop  = qstart + (c)->l_qseq;
-    int32_t mstart = (c)->mpos;
-    int32_t mstop = mstart + (c)->l_qseq;
-
-    if (qstart > maxStartR)
-	maxStartR = qstart;
-
-    if (qstart <= ap->svStartR)
-	continue;
-    ap->svStartR = qstart;
-    nR += 1;
-
-    if (interR.tid == -1)
-	{
-	interR.tid     = (c)->tid;
-	interR.mtid    = (c)->mtid;
-	interR.qstart  = qstart;
-	interR.qstop   = qstop;
-	interR.mstart  = mstart;
-	interR.mstop   = mstop;
-	interR.mstrand = mstrand;
-	interR.qstrand = qstrand;
-	interR.qual    = (c)->qual;
-	interR.supp    = 1;
-	}
-    else if (qstart < (interR.qstop + BUF) && qstop > (interR.qstart - BUF) &&
-	     mstart < (interR.mstop + BUF) && mstop > (interR.mstart - BUF))  
-	{
-	if (interR.qstart > qstart)
-	    interR.qstart = qstart;
-	if (interR.qstop < qstop)
-	    interR.qstop = qstop;
-	if (interR.mstart > mstart)
-	    interR.mstart = mstart;
-	if (interR.mstop < mstop)
-	    interR.mstop = mstop;
-	
-	interR.qual += (c)->qual;
-	interR.supp += 1;
-	}
-    }
-
-if (interL.supp > 0)
-    {
-    if (maxStartL <= interL.qstop + BUF)
-	return 0;  // there still might be reads to overlap
-
-    /* check average mapping quality of discordants */
-    int qual = ceil((double) interL.qual / (double) interL.supp); 
-    if (interL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
-	return 1;
-    reset_inter();
-    }
-
-return 0;
-}
-
 static inline void reset_intra()
 { 
 intraL.tid     = -1;
 intraL.qstart  = -1;
@@ -955,150 +806,203 @@
 intraR.qstrand = -1;
 intraR.mstrand = -1;
 }
 
-static int intra_struct_variant(dual_pileup_t *d, analysis_params_t *ap)
+static inline void update_sv(const bam_pileup1_t *p, sv_t *sv)
+{
+if (!p)
+    return;
+
+bam1_core_t *c = &p->b->core;
+int qstrand    = bam1_strand(p->b);
+int mstrand    = bam1_mstrand(p->b);
+int32_t qstart = (c)->pos;
+int32_t qstop  = qstart + (c)->l_qseq;
+int32_t mstart = (c)->mpos;
+int32_t mstop  = mstart + (c)->l_qseq;
+
+if (sv->tid == -1)  // not set yet
+    {
+    sv->tid     = (c)->tid;
+    sv->mtid    = (c)->mtid;
+    sv->qstart  = qstart;
+    sv->qstop   = qstop;
+    sv->mstart  = mstart;
+    sv->mstop   = mstop;
+    sv->mstrand = mstrand;
+    sv->qstrand = qstrand;
+    sv->qual    = (c)->qual;
+    sv->supp    = 1;
+    }
+else if ((c)->tid == sv->tid && (c)->mtid == sv->mtid &&
+	 qstrand == sv->qstrand && mstrand == sv->mstrand &&
+	 qstart < (sv->qstop + BUF) && qstop > (sv->qstart - BUF) &&
+	 mstart < (sv->mstop + BUF) && mstop > (sv->mstart - BUF))
+    {  // overlapping on both sides within +/- BUF
+    if (sv->qstart > qstart)
+	sv->qstart = qstart;
+    if (sv->qstop < qstop)
+	sv->qstop = qstop;
+    if (sv->mstart > mstart)
+	sv->mstart = mstart;
+    if (sv->mstop < mstop)
+	sv->mstop = mstop;
+    
+    sv->qual += (c)->qual;
+    sv->supp += 1;
+    }
+}
+
+static inline void update_hq_discordant(const bam_pileup1_t *p, const bam_pileup1_t **inter, const bam_pileup1_t **intra)
+{
+if (!p)
+    return;
+
+bam1_core_t *c = &p->b->core;
+if ((c)->tid != (c)->mtid)
+    {
+    if (!(*inter))  // best inter-chrom not set yet
+	*inter = p;
+    else if ((*inter)->b->core.qual > (c)->qual)
+	*inter = p;	
+    }
+else
+    {
+    int qstrand = bam1_strand(p->b);
+    int mstrand = bam1_mstrand(p->b);
+ 
+    if ((qstrand != mstrand) && abs((c)->isize) <= maxISize)
+	return;
+	
+    if (!(*intra))
+	*intra = p;
+    else if ((*intra)->b->core.qual > (c)->qual)
+	*intra = p;
+    }
+}
+
+static inline void struct_var_scan(dual_pileup_t *d, analysis_params_t *ap, 
+				   int *minStartL, int *minStartR)
 {
 int i;
-const bam_pileup1_t *p;
-int maxStartL = -1, maxStartR = -1;
-int nL = 0, nR = 0;
+const bam_pileup1_t *p, *pInterL = NULL, *pIntraL = NULL;
+int prevStart = ap->svStartL;
 for (i = 0; i < d->nL; ++i)
     {
     p = d->puL + i;
     if (p->b->core.qual < ap->minMapQ)
 	continue;
     bam1_core_t *c = &p->b->core;
-    if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
-	continue; // read is not paired or unmapped.
-    if ((c)->flag & BAM_FMUNMAP)
-	continue; // mate is unmapped.
-    if ((c)->mtid != (c)->tid)
-	continue;  // mate is on different chrom. MERGE INTER CODE HERE?
+    if (!((c)->flag & BAM_FPAIRED)) 
+	continue; // read is not paired
+    if ((c)->flag & BAM_FUNMAP || ((c)->flag & BAM_FMUNMAP))
+	continue; // read or its mate is unmapped.
 
+    int32_t qstart = (c)->pos;
+    if (*minStartL == -1 || qstart < *minStartL)
+	*minStartL = qstart;
     int qstrand = bam1_strand(p->b);
     int mstrand = bam1_mstrand(p->b);
-    int orientation = (qstrand == mstrand);
-    
-    int32_t qstart = (c)->pos;
-    int32_t qstop  = qstart + (c)->l_qseq;
-    int32_t mstart = (c)->mpos;
-    int32_t mstop = mstart + (c)->l_qseq;
 
-    if (qstart > maxStartL)
-	maxStartL = qstart;
+    if ((c)->mtid == (c)->tid && (qstrand != mstrand) && abs((c)->isize) <= maxISize) 
+	continue;  // mate is properly paired
 
-    if (orientation || abs((c)->isize) > maxISize)
+    if (ap->svStartL == -1)
 	{
-	if (qstart <= ap->svStartL)
-	    continue;
 	ap->svStartL = qstart;
-	nL += 1;
+	prevStart = ap->svStartL;
+	}
 
-	if (intraL.tid == -1)
+    if (qstart < ap->svStartL)
+	continue;
+    else if (qstart == ap->svStartL)
+	update_hq_discordant(p, &pInterL, &pIntraL);
+    else
 	    {
-	    intraL.tid     = (c)->tid;
-	    intraL.qstart  = qstart;
-	    intraL.qstop   = qstop;
-	    intraL.mstart  = mstart;
-	    intraL.mstop   = mstop;
-	    intraL.qstrand = qstrand;
-	    intraL.mstrand = mstrand;
-	    intraL.qual    = (c)->qual;
-	    intraL.supp    = 1;
-	    }
-	else if (qstart < (intraL.qstop + BUF) && qstop > (intraL.qstart - BUF) &&
-		 mstart < (intraL.mstop + BUF) && mstop > (intraL.mstart - BUF))  
-	    {
-	    if (intraL.qstart > qstart)
-		intraL.qstart = qstart;
-	    if (intraL.qstop < qstop)
-		intraL.qstop = qstop;
-	    if (intraL.mstart > mstart)
-		intraL.mstart = mstart;
-	    if (intraL.mstop < mstop)
-		intraL.mstop = mstop;
-
-	    intraL.qual += (c)->qual;
-	    intraL.supp += 1;
-	    }
+	ap->svStartL = qstart;
+	break;
 	}
     }
+if (ap->svStartL == prevStart)
+    ap->svStartL += 1;
 
+prevStart = ap->svStartR;
+const bam_pileup1_t *pInterR = NULL, *pIntraR = NULL;
 for (i = 0; i < d->nR; ++i)
     {
     p = d->puR + i;
     if (p->b->core.qual < ap->minMapQ)
 	continue;
     bam1_core_t *c = &p->b->core;
-    if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
-	continue; // read is not paired or unmapped.
-    if ((c)->flag & BAM_FMUNMAP)
-	continue; // mate is unmapped.
-    if ((c)->mtid != (c)->tid)
-	continue;  // mate is on different chrom. MERGE INTER CODE HERE?
+    if (!((c)->flag & BAM_FPAIRED)) 
+	continue; // read is not paired
+    if ((c)->flag & BAM_FUNMAP || ((c)->flag & BAM_FMUNMAP))
+	continue; // read or its mate is unmapped.
+
+    int32_t qstart = (c)->pos;
+    if (*minStartR == -1 || qstart < *minStartR)
+	*minStartR = qstart;
 
     int qstrand = bam1_strand(p->b);
     int mstrand = bam1_mstrand(p->b);
-    int orientation = (qstrand == mstrand);
-    
-    int32_t qstart = (c)->pos;
-    int32_t qstop  = qstart + (c)->l_qseq;
-    int32_t mstart = (c)->mpos;
-    int32_t mstop = mstart + (c)->l_qseq;
 
-    if (qstart > maxStartR)
-	maxStartR = qstart;
+    if ((c)->mtid == (c)->tid && (qstrand != mstrand) && abs((c)->isize) <= maxISize) 
+	continue;  // mate is properly paired
 
-    if (orientation || abs((c)->isize) > maxISize)
+    if (ap->svStartR == -1)
 	{
-	if (qstart <= ap->svStartR)
-	    continue;
 	ap->svStartR = qstart;
-	nR += 1;
-
-	if (intraR.tid == -1)
-	    {
-	    intraR.tid     = (c)->tid;
-	    intraR.qstart  = qstart;
-	    intraR.qstop   = qstop;
-	    intraR.mstart  = mstart;
-	    intraR.mstop   = mstop;
-	    intraR.mstrand = mstrand;
-	    intraR.qstrand = qstrand;
-	    intraR.qual    = (c)->qual;
-	    intraR.supp    = 1;
+	prevStart = ap->svStartR;
 	    }
-	else if (qstart < (intraR.qstop + BUF) && qstop > (intraR.qstart - BUF) &&
-		 mstart < (intraR.mstop + BUF) && mstop > (intraR.mstart - BUF))  
-	    {
-	    if (intraR.qstart > qstart)
-		intraR.qstart = qstart;
-	    if (intraR.qstop < qstop)
-		intraR.qstop = qstop;
-	    if (intraR.mstart > mstart)
-		intraR.mstart = mstart;
-	    if (intraR.mstop < mstop)
-		intraR.mstop = mstop;
 
-	    intraR.qual += (c)->qual;
-	    intraR.supp += 1;
-	    }
+    if (qstart < ap->svStartR)
+	continue;
+    else if (qstart == ap->svStartR)
+	update_hq_discordant(p, &pInterR, &pIntraR);
+    else
+	{
+	ap->svStartR = qstart;
+	break;
 	}
     }
+if (ap->svStartR == prevStart)
+    ap->svStartR += 1;
+
+update_sv(pIntraL, &intraL);
+update_sv(pInterL, &interL);
+update_sv(pIntraR, &intraR);
+update_sv(pInterR, &interR);
+}
 
-if (intraL.supp > 0) //  || intraR.supp > 0)
+static inline int is_intra_sv(analysis_params_t *ap, int minStart)
+{
+if (intraL.supp > 0)
     {
-    if (maxStartL <= intraL.qstop + BUF)
+    if (minStart <= intraL.qstop)
 	return 0;  // there still might be reads to overlap
 
     /* check average mapping quality of discordants */
     int qual = ceil((double) intraL.qual / (double) intraL.supp); 
     if (intraL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
 	return 1;
     reset_intra();
     }
+return 0;
+}
+
+static inline int is_inter_sv(analysis_params_t *ap, int minStart)
+{
+if (interL.supp > 0)
+    {
+    if (minStart <= interL.qstop)
+	return 0;  // there still might be reads to overlap
 
+    /* check average mapping quality of discordants */
+    int qual = ceil((double) interL.qual / (double) interL.supp); 
+    if (interL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
+	return 1;
+    reset_inter();
+    }
 return 0;
 }
 
 static int indel_variant(dual_pileup_t *d, analysis_params_t *ap,
@@ -1126,12 +1030,13 @@
 
 return 0;
 }
 
-
 static void do_struct_var(dual_pileup_t *d, analysis_params_t *ap)
 {
-if (inter_struct_variant(d, ap))
+int minStartL = -1, minStartR = -1;
+struct_var_scan(d, ap, &minStartL, &minStartR);
+if (is_inter_sv(ap, minStartL))
     {
     printf("SVS\t%s\t%d\t%d\t",
 	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);
 
@@ -1168,9 +1073,9 @@
 
     reset_inter();
     }
 
-if (intra_struct_variant(d, ap))
+if (is_intra_sv(ap, minStartL))
     {
     printf("SVS\t%s\t%d\t%d\t",
 	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);