src/hg/tcga/pBamBam/asBamBam.c 1.4

1.4 2010/03/07 03:26:33 jsanborn
updated
Index: src/hg/tcga/pBamBam/asBamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/pBamBam/asBamBam.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/tcga/pBamBam/asBamBam.c	7 Mar 2010 02:36:46 -0000	1.3
+++ src/hg/tcga/pBamBam/asBamBam.c	7 Mar 2010 03:26:33 -0000	1.4
@@ -134,8 +134,9 @@
 static int32_t insL, delL, insR, delR;
 
 typedef struct {
     int tid;
+    int mtid;
 
     int32_t qstart;
     int32_t qstop;
     int32_t mstart;
@@ -148,9 +149,9 @@
 } sv_t;
 
 #define BUF 100  // 500 base buffer around start/stop
 static sv_t intraL, intraR;
-sv_t interL, interR;
+static sv_t interL, interR;
 
 static int nuc_index(char c)
 {
 switch (c)
@@ -763,59 +764,95 @@
     ap->rstart = ap->rstop;
     }
 }
 
-#define NUM_CHROMS 100
-static int mateChromsL[NUM_CHROMS];
-static int mateChromsR[NUM_CHROMS];
-static int somaticSV[NUM_CHROMS];
-static int germlineSV[NUM_CHROMS];
+static inline void reset_inter()
+{ 
+interL.tid     = -1;
+interL.mtid    = -1;
+interL.qstart  = -1;
+interL.qstop   = -1;
+interL.mstart  = -1;
+interL.mstop   = -1;
+interL.supp    =  0;
+interL.qual    =  0;
+interL.qstrand = -1;
+interL.mstrand = -1;
+
+interR.tid     = -1;
+interR.mtid    = -1;
+interR.qstart  = -1;
+interR.qstop   = -1;
+interR.mstart  = -1;
+interR.mstop   = -1;
+interR.supp    =  0;
+interR.qual    =  0;
+interR.qstrand = -1;
+interR.mstrand = -1;
+}
 
 static int inter_struct_variant(dual_pileup_t *d, analysis_params_t *ap)
 {
 int i;
-for (i = 0; i < NUM_CHROMS; ++i)
-    {
-    mateChromsL[i] = 0;
-    mateChromsR[i] = 0;
-    somaticSV[i]   = 0;
-    germlineSV[i]  = 0;
-    }
-
 const bam_pileup1_t *p;
-int avgMapQ = 0;
+int maxStartL = -1, maxStartR = -1;
+int nL = 0, nR = 0;
 for (i = 0; i < d->nL; ++i)
     {
     p = d->puL + i;
-    avgMapQ += p->b->core.qual;
-    }
-if (((double)avgMapQ / (double) d->nL) < (double)ap->avgMapQ)
-    return 0;
+    if (p->b->core.qual < ap->minMapQ)
+	continue;
+    bam1_core_t *c = &p->b->core;
+    if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
+	continue; // read is not paired or unmapped.
+    if ((c)->flag & BAM_FMUNMAP)
+	continue; // mate is unmapped.
+    if ((c)->mtid == (c)->tid)
+	continue;  // mate is on same chrom. MERGE INTER CODE HERE?
 
-avgMapQ = 0;
-for (i = 0; i < d->nR; ++i)
-    {
-    p = d->puR + i;
-    avgMapQ += p->b->core.qual;
-    }
-if (((double)avgMapQ / (double) d->nR) < (double) ap->avgMapQ)
-    return 0;
+    int qstrand = bam1_strand(p->b);
+    int mstrand = bam1_mstrand(p->b);
 
-for (i = 0; i < d->nL; ++i)
-    {
-    p = d->puL + i;
+    int32_t qstart = (c)->pos;
+    int32_t qstop  = qstart + (c)->l_qseq;
+    int32_t mstart = (c)->mpos;
+    int32_t mstop = mstart + (c)->l_qseq;
 
-    if (p->b->core.qual < ap->minMapQ)
+    if (qstart > maxStartL)
+	maxStartL = qstart;
+
+    if (qstart <= ap->svStartL)
 	continue;
+    ap->svStartL = qstart;
+    nL += 1;
 
-    bam1_core_t *c = &p->b->core;
-    if (((c)->flag & BAM_FPAIRED) && !((c)->flag & BAM_FUNMAP))
-	{ // read is paired and mapped.
-	if (!((c)->flag & BAM_FMUNMAP))
-	    {  // mate is mapped
-	    if ((c)->mtid != (c)->tid) // different chroms.
-		mateChromsL[(c)->mtid]++;
-	    }
+    if (interL.tid == -1)
+	{
+	interL.tid     = (c)->tid;
+	interL.mtid    = (c)->mtid;
+	interL.qstart  = qstart;
+	interL.qstop   = qstop;
+	interL.mstart  = mstart;
+	interL.mstop   = mstop;
+	interL.qstrand = qstrand;
+	interL.mstrand = mstrand;
+	interL.qual    = (c)->qual;
+	interL.supp    = 1;
+	}
+    else if (qstart < (interL.qstop + BUF) && qstop > (interL.qstart - BUF) &&
+	     mstart < (interL.mstop + BUF) && mstop > (interL.mstart - BUF))  
+	{
+	if (interL.qstart > qstart)
+	    interL.qstart = qstart;
+	if (interL.qstop < qstop)
+	    interL.qstop = qstop;
+	if (interL.mstart > mstart)
+	    interL.mstart = mstart;
+	if (interL.mstop < mstop)
+	    interL.mstop = mstop;
+	
+	interL.qual += (c)->qual;
+	interL.supp += 1;
 	}
     }
 
 for (i = 0; i < d->nR; ++i)
@@ -820,36 +857,77 @@
 
 for (i = 0; i < d->nR; ++i)
     {
     p = d->puR + i;
-
     if (p->b->core.qual < ap->minMapQ)
 	continue;
-
     bam1_core_t *c = &p->b->core;
-    if (((c)->flag & BAM_FPAIRED) && !((c)->flag & BAM_FUNMAP))
-	{ // read is paired and mapped.
-	if (!((c)->flag & BAM_FMUNMAP))
-	    {  // mate is mapped
-	    if ((c)->mtid != (c)->tid)  // different chroms
-		mateChromsR[(c)->mtid]++;
-	    }
+    if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
+	continue; // read is not paired or unmapped.
+    if ((c)->flag & BAM_FMUNMAP)
+	continue; // mate is unmapped.
+    if ((c)->mtid == (c)->tid)
+	continue;  // mate is on same chrom.
+
+    int qstrand = bam1_strand(p->b);
+    int mstrand = bam1_mstrand(p->b);
+    
+    int32_t qstart = (c)->pos;
+    int32_t qstop  = qstart + (c)->l_qseq;
+    int32_t mstart = (c)->mpos;
+    int32_t mstop = mstart + (c)->l_qseq;
+
+    if (qstart > maxStartR)
+	maxStartR = qstart;
+
+    if (qstart <= ap->svStartR)
+	continue;
+    ap->svStartR = qstart;
+    nR += 1;
+
+    if (interR.tid == -1)
+	{
+	interR.tid     = (c)->tid;
+	interR.mtid    = (c)->mtid;
+	interR.qstart  = qstart;
+	interR.qstop   = qstop;
+	interR.mstart  = mstart;
+	interR.mstop   = mstop;
+	interR.mstrand = mstrand;
+	interR.qstrand = qstrand;
+	interR.qual    = (c)->qual;
+	interR.supp    = 1;
+	}
+    else if (qstart < (interR.qstop + BUF) && qstop > (interR.qstart - BUF) &&
+	     mstart < (interR.mstop + BUF) && mstop > (interR.mstart - BUF))  
+	{
+	if (interR.qstart > qstart)
+	    interR.qstart = qstart;
+	if (interR.qstop < qstop)
+	    interR.qstop = qstop;
+	if (interR.mstart > mstart)
+	    interR.mstart = mstart;
+	if (interR.mstop < mstop)
+	    interR.mstop = mstop;
+	
+	interR.qual += (c)->qual;
+	interR.supp += 1;
 	}
     }
 
-int retVal = 0;
-for (i = 0; i < NUM_CHROMS; i++)
+if (interL.supp > 0)
     {
-    if (mateChromsL[i] < ap->minSupp)
-	continue;
-    retVal = 1;
-    if (mateChromsR[i] < ap->minSupp)
-	somaticSV[i]  = mateChromsL[i];
-    else
-	germlineSV[i] = mateChromsL[i] + mateChromsR[i];
+    if (maxStartL <= interL.qstop + BUF)
+	return 0;  // there still might be reads to overlap
+
+    /* check average mapping quality of discordants */
+    int qual = ceil((double) interL.qual / (double) interL.supp); 
+    if (interL.supp >= ap->minSupp && qual >= ap->avgMapQ)
+	return 1;
+    reset_inter();
     }
 
-return retVal;
+return 0;
 }
 
 static inline void reset_intra()
 { 
@@ -875,16 +953,12 @@
 }
 
 static int intra_struct_variant(dual_pileup_t *d, analysis_params_t *ap)
 {
-// First check average mapping quality of read pairs
 int i;
 const bam_pileup1_t *p;
-int deltaSupp = intraL.supp + intraR.supp;
-
 int maxStartL = -1, maxStartR = -1;
 int nL = 0, nR = 0;
-//int dupeL = 0, dupeR = 0;
 for (i = 0; i < d->nL; ++i)
     {
     p = d->puL + i;
     if (p->b->core.qual < ap->minMapQ)
@@ -1020,19 +1094,8 @@
     reset_intra();
     }
 
 return 0;
-
-deltaSupp = (intraL.supp + intraR.supp) - deltaSupp; 
-if (deltaSupp == 0 && (intraL.supp > 0 || intraR.supp > 0) )
-    {
-    fprintf(stderr, "SIC pos = %d\n", d->posL);
-    if (intraL.supp >= ap->minSupp)
-	return 1;
-    reset_intra();
-    }
-
-return 0;
 }
 
 static int indel_variant(dual_pileup_t *d, analysis_params_t *ap,
 			 int *isIns, int *isDel, int *insGerm, int *delGerm)
@@ -1062,22 +1125,45 @@
 
 
 static void do_struct_var(dual_pileup_t *d, analysis_params_t *ap)
 {
-int i;
-if (0 && inter_struct_variant(d, ap))
+if (inter_struct_variant(d, ap))
     {
     printf("SVS\t%s\t%d\t%d\t",
 	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);
 
-    for (i = 0; i < NUM_CHROMS; ++i)
-	{
-	if (somaticSV[i] > 0)
-	    printf("SSV:%s(%d),", d->hL->target_name[i], somaticSV[i]);
-	if (germlineSV[i] > 0)
-	    printf("GSV:%s(%d),", d->hL->target_name[i], germlineSV[i]);
-	}
+    char qsL = '+';
+    if (interL.qstrand)
+	qsL = '-';
+    char msL = '+';
+    if (interL.mstrand)
+	msL = '-';
+
+    char qsR = '+';
+    if (interR.qstrand)
+	qsR = '-';
+    char msR = '+';
+    if (interR.mstrand)
+	msR = '-';
+
+    if ( interR.supp > 0 && interL.mtid == interR.mtid &&  
+	(interR.qstart < (interL.qstop + BUF) && interR.qstop > (interL.qstart - BUF)) )
+	printf("GIR\t%d\t%3.1f\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)\t%c(%s:%d-%d)>%c(%s:%d-%d)", 
+	       interR.supp, (double) interR.qual / (double) interR.supp, 
+	       interL.supp, (double) interL.qual / (double) interL.supp, 
+	       qsR, d->hL->target_name[interR.tid], interR.qstart, interR.qstop,
+	       msR, d->hL->target_name[interR.mtid], interR.mstart, interR.mstop,
+	       qsL, d->hL->target_name[interL.tid], interL.qstart, interL.qstop,
+	       msL, d->hL->target_name[interL.mtid], interL.mstart, interL.mstop);
+    else
+	printf("SIR\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)", 
+	       interL.supp, (double) interL.qual / (double) interL.supp, 
+	       qsL, d->hL->target_name[interL.tid], interL.qstart, interL.qstop, 
+	       msL, d->hL->target_name[interL.mtid], interL.mstart, interL.mstop);
+
     putchar('\n');
+
+    reset_inter();
     }
 
 if (intra_struct_variant(d, ap))
     {
@@ -1097,20 +1183,19 @@
     char msR = '+';
     if (intraR.mstrand)
 	msR = '-';
 
-    
     if ( intraR.supp > 0 && 
 	(intraR.qstart < (intraL.qstop + BUF) && intraR.qstop > (intraL.qstart - BUF)) )
-	printf("GIC\t%d\t%3.1f\t%d\t%3.1f\t%c(%d-%d)>%c(%d-%d)\t%c(%d-%d)>%c(%d-%d)", 
+	printf("GIA\t%d\t%3.1f\t%d\t%3.1f\t%c(%d-%d)>%c(%d-%d)\t%c(%d-%d)>%c(%d-%d)", 
 	       intraR.supp, (double) intraR.qual / (double) intraR.supp, 
 	       intraL.supp, (double) intraL.qual / (double) intraL.supp, 
 	       qsR, intraR.qstart, intraR.qstop,
 	       msR, intraR.mstart, intraR.mstop,
 	       qsL, intraL.qstart, intraL.qstop,
 	       msL, intraL.mstart, intraL.mstop);
     else
-	printf("SIC\t%d\t%3.1f\t%c(%d-%d)>%c(%d-%d)", 
+	printf("SIA\t%d\t%3.1f\t%c(%d-%d)>%c(%d-%d)", 
 	       intraL.supp, (double) intraL.qual / (double) intraL.supp, 
 	       qsL, intraL.qstart, intraL.qstop, 
 	       msL, intraL.mstart, intraL.mstop);
 
@@ -1333,8 +1418,9 @@
 ap.svStartR = -1;
 ap.svStopR  = -1;
 
 reset_intra();
+reset_inter();
 
 bam_bam_file(inbamL, inbamR, position, mask, perform_analysis, &ap);
 }