src/hg/tcga/bamBam/bamBam.c 1.3

1.3 2009/09/25 18:44:27 jsanborn
updated
Index: src/hg/tcga/bamBam/bamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamBam/bamBam.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/tcga/bamBam/bamBam.c	25 Sep 2009 18:08:57 -0000	1.2
+++ src/hg/tcga/bamBam/bamBam.c	25 Sep 2009 18:44:27 -0000	1.3
@@ -1,295 +1,263 @@
 /* bamBam -- discovering variants from BAM file
  *   code adapted from Heng Li's Samtools library functions
  */
 
 #ifdef USE_BAM
 
 #include "common.h"
 #include "hCommon.h"
 #include "linefile.h"
 #include "hash.h"
 #include "hdb.h"
 #include "options.h"
 #include "jksql.h"
 
 #define _IOLIB 2
 #include "bam.h"
 #include "sam.h"
 #include "bam_dual_pileup.h"
 
 static char const rcsid[] = "$Id";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bamBam - Perform analysis on two BAM files.\n"
   "usage:\n"
   "   bamBam [options] <in.bam1> <in.bam2>\n"
   "options:\n"
   "\n"
   );
 }
 
 static struct optionSpec options[] = {
     {NULL, 0}
 };
 
 /* Option variables */
 char *position = NULL;
 
 typedef struct {
     int beg, end;
     samfile_t *in;
 } tmpstruct_t;
 
 typedef struct {
     uint32_t format;
     int tid, len, last_pos;
     int mask;
     char *ref;
 
     /* Left pileup */
-    bam_header_t *leftH;
-    int32_t leftTid;
-    int32_t leftPos;
-    int leftN;
-    bam_pileup1_t *leftPu;
+    bam_header_t *hL;
+    int32_t tidL;
+    int32_t posL;
+    int nL;
+    bam_pileup1_t *puL;
 
     /* Right pileup */
-    bam_header_t *rightH;
-    int32_t rightTid;
-    int32_t rightPos;
-    int rightN;
-    bam_pileup1_t *rightPu;
+    bam_header_t *hR;
+    int32_t tidR;
+    int32_t posR;
+    int nR;
+    bam_pileup1_t *puR;
 
 } pileup_data_t;
 
 static void reset_pileups(pileup_data_t *d)
 {
-d->leftPos = -1;
-d->leftTid = -1;
-d->leftN = -1;
-d->leftPu = NULL;
-
-d->rightPos = -1;
-d->rightTid = -1;
-d->rightN = -1;
-d->rightPu = NULL;
+d->posL = -1;
+d->tidL = -1;
+d->nL   = -1;
+d->puL  = NULL;
+
+d->posR = -1;
+d->tidR = -1;
+d->nR   = -1;
+d->puR  = NULL;
 }
 
 static int perform_analysis(pileup_data_t *d)
 {
-//return 0;
-printf("%d\t%d\t%d\t", d->leftTid, d->leftPos, d->leftN);
+printf("%d\t%d\t%d\t", d->tidL, d->posL, d->nL);
 
 int i;
 const bam_pileup1_t *p;
-for (i = 0; i < d->leftN; ++i)
+for (i = 0; i < d->nL; ++i)
     {
-    p = d->leftPu + i;
+    p = d->puL + i;
     int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
     putchar(c);
     }
 putchar('\t');
-printf("%d\t", d->rightN);
+printf("%d\t", d->nR);
 
-for (i = 0; i < d->rightN; ++i)
+for (i = 0; i < d->nR; ++i)
     {
-    p = d->rightPu + i;
+    p = d->puR + i;
     int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
     putchar(c);
     }
 putchar('\n');
     
 reset_pileups(d);
 
 return 0;
 }
 
 static int pileup_func_left(uint32_t tid, uint32_t pos, int n, 
 			    const bam_pileup1_t *pu, void *data)
-{  // will put everything to stdout.
+{
 pileup_data_t *d = (pileup_data_t*)data;
 
-d->leftTid = tid;
-d->leftPos = pos;
-d->leftN   = n;
-d->leftPu  = (bam_pileup1_t*)pu;
-
-if (!d->rightPu) 
-    return 1;  // return '1' to stop reading data to let right gather some data
-else if (d->rightTid < d->leftTid)
-    return 1;
-else if (d->rightTid > d->leftTid)
-    return 0;
-else if (d->rightPos < d->leftPos)
+d->tidL = tid;
+d->posL = pos;
+d->nL   = n;
+d->puL  = (bam_pileup1_t*)pu;
+
+if (!d->puR) 
+    return 1;  // return '1' to stop reading to let right get some data
+else if (d->tidR < d->tidL)
+    return 1;  // return '1' to stop reading to let right catch up to chrom
+else if (d->tidR > d->tidL)
+    return 0;  // return '0' to catch up to right's chrom 
+else if (d->posR < d->posL)
     return 1;  // return '1' to stop reading data to let right catch up
-else if (d->rightPos > d->leftPos)
-    return 0;  // return '0' to continue reading on left side data
+else if (d->posR > d->posL)
+    return 0;  // return '0' to catch up to left's pos
 
+// only perform analysis when L & R positions line up.
 return perform_analysis(d);
 }
 
 static int pileup_func_right(uint32_t tid, uint32_t pos, int n, 
 			     const bam_pileup1_t *pu, void *data)
-{  // will put everything to stdout.
+{
 pileup_data_t *d = (pileup_data_t*)data;
 
-d->rightTid = tid;
-d->rightPos = pos;
-d->rightN   = n;
-d->rightPu  = (bam_pileup1_t*)pu;
-
-if (!d->leftPu) 
-    return 1;  // return '1' to stop reading data to let left gather some data
-else if (d->leftTid < d->rightTid)
-    return 1;
-else if (d->leftTid > d->rightTid)
-    return 0;
-else if (d->leftPos < d->rightPos)
+d->tidR = tid;
+d->posR = pos;
+d->nR   = n;
+d->puR  = (bam_pileup1_t*)pu;
+
+if (!d->puL) 
+    return 1;  // return '1' to stop reading data to let left get data
+else if (d->tidL < d->tidR)
+    return 1;  // return '1' to stop reading data to let left catch up to chrom
+else if (d->tidR > d->tidR)
+    return 0; // return '0' to catch up to left's chrom.
+else if (d->posL < d->posR)
     return 1;  // return '1' to stop reading data to let left catch up
-else if (d->leftPos > d->rightPos)
-    return 0;  // return '0' to continue reading on right side data
+else if (d->posL > d->posR)
+    return 0;  // return '0' to catch up to left's pos
 
+// only perform analysis when L & R positions line up.
 return perform_analysis(d);
 }
 
-void bamBam(char *inbam1, char *inbam2)
+void bamBam(char *inbamL, char *inbamR)
 {
 tmpstruct_t left, right;  
 left.beg = 0; left.end = 0x7fffffff;
-left.in = samopen(inbam1, "rb", NULL);
+left.in = samopen(inbamL, "rb", NULL);
 if (left.in == NULL)
-    errAbort("samopen(%s, \"rb\") returned NULL", inbam1);
+    errAbort("samopen(%s, \"rb\") returned NULL", inbamL);
 
 right.beg = 0; right.end = 0x7fffffff;
-right.in = samopen(inbam2, "rb", NULL);
+right.in = samopen(inbamR, "rb", NULL);
 if (right.in == NULL)
-    errAbort("samopen(%s, \"rb\") returned NULL", inbam2);
+    errAbort("samopen(%s, \"rb\") returned NULL", inbamR);
 
+// Init pileup_data object
 pileup_data_t *d = (pileup_data_t*)calloc(1, sizeof(pileup_data_t));
-
-/* Init pileup_data object */
 d->tid = -1; 
 d->mask = BAM_DEF_MASK;
-d->leftH = left.in->header;
-d->rightH = right.in->header;
+d->hL = left.in->header;
+d->hR = right.in->header;
 reset_pileups(d);
 
-bam1_t *bl = bam_init1();
-bam_plbuf_t *bufLeft = bam_plbuf_init(pileup_func_left, d);
-bam_plbuf_set_mask(bufLeft, -1);  // mask = -1, no masking?
-
-bam1_t *br = bam_init1();
-bam_plbuf_t *bufRight = bam_plbuf_init(pileup_func_right, d);
-bam_plbuf_set_mask(bufRight, -1);  // mask = -1, no masking?
-
-int retLeft = 1; 
-int retRight = 1;
-int stopLeft = 0;
-int stopRight = 0;
-int count = 0;
-int debug = 0;
-do 
-    {
-
-    if (d->leftPos >= 247194573 || d->rightPos >= 247194573)
-	debug = 0;
+// Initialize read & pileup buffers for left side
+bam1_t *bL = bam_init1();
+bam_plbuf_t *bufL = bam_plbuf_init(pileup_func_left, d);
+bam_plbuf_set_mask(bufL, -1);  // mask = -1, no masking?
+
+// Initialize read & pileup buffers for right side
+bam1_t *bR = bam_init1();
+bam_plbuf_t *bufR = bam_plbuf_init(pileup_func_right, d);
+bam_plbuf_set_mask(bufR, -1);  // mask = -1, no masking?
+
+int retL = 1; 
+int retR = 1;
+int pauseL = 0;
+int pauseR = 0;
        
-    if (debug)
+do 
 	{
-	count++;
-	printf("tid:  %d, %d\n", d->leftTid, d->rightTid);
-	printf("pos:  %d, %d\n", d->leftPos, d->rightPos);
-	printf("stop: %d, %d\n", stopLeft, stopRight);
-	}
+    while (!pauseL && ((retL = samread(left.in, bL)) >= 0))
+        pauseL = bam_dual_plbuf_push(bL, bufL);
 	
-    while (!stopLeft && ((retLeft = samread(left.in, bl)) >= 0))
-	stopLeft = bam_dual_plbuf_push(bl, bufLeft);
+    while (!pauseR && ((retR = samread(right.in, bR)) >= 0))
+	pauseR = bam_dual_plbuf_push(bR, bufR);
     
-    while (!stopRight && ((retRight = samread(right.in, br)) >= 0))
-	stopRight = bam_dual_plbuf_push(br, bufRight);
-
-    if (debug)
-	{
-	count++;
-	printf("tid:  %d, %d\n", d->leftTid, d->rightTid);
-	printf("pos:  %d, %d\n", d->leftPos, d->rightPos);
-	printf("stop: %d, %d\n", stopLeft, stopRight);
-	}
-
-    while (stopLeft && stopRight && count <= 50)
-	{
-    if (debug)
-	{
-	count++;
-	printf("loop tid:  %d, %d\n", d->leftTid, d->rightTid);
-	printf("loop pos:  %d, %d\n", d->leftPos, d->rightPos);
-	printf("loop stop: %d, %d\n", stopLeft, stopRight);
-	}
-//	if (!d->leftPu && !d->rightPu)
-//	    {
-//	    stopLeft = bam_dual_plbuf_resume(bufLeft);
-//	    stopRight = bam_dual_plbuf_resume(bufRight);
-//	    }
-	if (!d->leftPu)
-	    stopLeft = bam_dual_plbuf_resume(bufLeft);
-	else if (!d->rightPu)
-	    stopRight = bam_dual_plbuf_resume(bufRight);
-	else if (d->leftTid < d->rightTid)
-	    stopLeft = bam_dual_plbuf_resume(bufLeft);
-	else if (d->leftTid > d->rightTid)
-	    stopRight = bam_dual_plbuf_resume(bufRight);
-	else if (d->leftPos > 0 && d->leftPos < d->rightPos)
-	    stopLeft = bam_dual_plbuf_resume(bufLeft);
-	else if (d->rightPos > 0 && d->rightPos < d->leftPos)
-	    stopRight = bam_dual_plbuf_resume(bufRight);
+    while (pauseL && pauseR)
+	{  // only allow one change per round.
+	if (!d->puL)                // L needs data
+	    pauseL = bam_dual_plbuf_resume(bufL);
+	else if (!d->puR)           // R needs data
+	    pauseR = bam_dual_plbuf_resume(bufR);
+	else if (d->tidL < d->tidR) // L needs to catch up to R's chrom
+	    pauseL = bam_dual_plbuf_resume(bufL);
+	else if (d->tidL > d->tidR) // R needs to catch up to L's chrom
+	    pauseR = bam_dual_plbuf_resume(bufR);
+	else if (d->posL > 0 && d->posL < d->posR) // L needs to catch up to R's pos
+	    pauseL = bam_dual_plbuf_resume(bufL);
+	else if (d->posR > 0 && d->posR < d->posL) // R needs to catch up to L's pos
+	    pauseR = bam_dual_plbuf_resume(bufR);
 	}
 
-//    count++;
-    } while (retLeft >= 0 && retRight >= 0 && count <= 500);
+    } while (retL >= 0 && retR >= 0);
 
-if (retLeft < 0)
+if (retL < 0)
     {
-    bam_dual_plbuf_push(0, bufLeft);       // finalize pileup
-    bam_dual_plbuf_destroy(bufLeft);
+    bam_dual_plbuf_push(0, bufL);  // finalize pileup
+    bam_dual_plbuf_destroy(bufL);
     }
 
-if (retRight < 0)
+if (retR < 0)
     {
-    bam_dual_plbuf_push(0, bufRight);       // finalize pileup
-    bam_dual_plbuf_destroy(bufRight);
+    bam_dual_plbuf_push(0, bufR);  // finalize pileup
+    bam_dual_plbuf_destroy(bufR);
     }
 
-bam_destroy1(bl);
-bam_destroy1(br);
+bam_destroy1(bL);
+bam_destroy1(bR);
 
 samclose(left.in);
 samclose(right.in);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 
-char *inbam1 = argv[1];
-char *inbam2 = argv[2];
-bamBam(inbam1, inbam2);
+char *inbamL = argv[1];
+char *inbamR = argv[2];
+bamBam(inbamL, inbamR);
 
 return 0;
 }
 
 #else // USE_BAM not defined
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 printf("BAM not installed");
 
 return 0;
 }
 #endif