src/hg/tcga/bamBam/bamBam.c 1.4

1.4 2009/09/25 21:16:13 jsanborn
moved stuff around to make it more flexible
Index: src/hg/tcga/bamBam/bamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamBam/bamBam.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/tcga/bamBam/bamBam.c	25 Sep 2009 18:44:27 -0000	1.3
+++ src/hg/tcga/bamBam/bamBam.c	25 Sep 2009 21:16:13 -0000	1.4
@@ -1,263 +1,108 @@
 /* bamBam -- discovering variants from BAM file
  *   code adapted from Heng Li's Samtools library functions
  */
 
 #ifdef USE_BAM
 
 #include "common.h"
 #include "hCommon.h"
 #include "linefile.h"
 #include "hash.h"
 #include "hdb.h"
 #include "options.h"
 #include "jksql.h"
 
 #define _IOLIB 2
 #include "bam.h"
 #include "sam.h"
 #include "bam_dual_pileup.h"
 
 static char const rcsid[] = "$Id";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bamBam - Perform analysis on two BAM files.\n"
   "usage:\n"
-  "   bamBam [options] <in.bam1> <in.bam2>\n"
+  "   bamBam [options] <left.bam> <right.bam>\n"
   "options:\n"
   "\n"
   );
 }
 
 static struct optionSpec options[] = {
     {NULL, 0}
 };
 
 /* Option variables */
 char *position = NULL;
 
 typedef struct {
-    int beg, end;
-    samfile_t *in;
-} tmpstruct_t;
+    int minQual;
+    int minMapQual;
+} analysis_params;
 
-typedef struct {
-    uint32_t format;
-    int tid, len, last_pos;
-    int mask;
-    char *ref;
-
-    /* Left pileup */
-    bam_header_t *hL;
-    int32_t tidL;
-    int32_t posL;
-    int nL;
-    bam_pileup1_t *puL;
-
-    /* Right pileup */
-    bam_header_t *hR;
-    int32_t tidR;
-    int32_t posR;
-    int nR;
-    bam_pileup1_t *puR;
-
-} pileup_data_t;
-
-static void reset_pileups(pileup_data_t *d)
+static void perform_analysis(void *data)
 {
-d->posL = -1;
-d->tidL = -1;
-d->nL   = -1;
-d->puL  = NULL;
-
-d->posR = -1;
-d->tidR = -1;
-d->nR   = -1;
-d->puR  = NULL;
-}
+pileup_data_t *d = (pileup_data_t*)data;
+analysis_params *ap = (analysis_params*)d->func_data;
 
-static int perform_analysis(pileup_data_t *d)
-{
-printf("%d\t%d\t%d\t", d->tidL, d->posL, d->nL);
+printf("%d\t%d\t%d\t%d/%d\t", d->tidL, d->posL, d->nL, ap->minQual, ap->minMapQual);
 
 int i;
 const bam_pileup1_t *p;
 for (i = 0; i < d->nL; ++i)
     {
     p = d->puL + i;
     int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
     putchar(c);
     }
 putchar('\t');
 printf("%d\t", d->nR);
 
 for (i = 0; i < d->nR; ++i)
     {
     p = d->puR + i;
     int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
     putchar(c);
     }
 putchar('\n');
-    
-reset_pileups(d);
-
-return 0;
 }
 
-static int pileup_func_left(uint32_t tid, uint32_t pos, int n, 
-			    const bam_pileup1_t *pu, void *data)
-{
-pileup_data_t *d = (pileup_data_t*)data;
-
-d->tidL = tid;
-d->posL = pos;
-d->nL   = n;
-d->puL  = (bam_pileup1_t*)pu;
-
-if (!d->puR) 
-    return 1;  // return '1' to stop reading to let right get some data
-else if (d->tidR < d->tidL)
-    return 1;  // return '1' to stop reading to let right catch up to chrom
-else if (d->tidR > d->tidL)
-    return 0;  // return '0' to catch up to right's chrom 
-else if (d->posR < d->posL)
-    return 1;  // return '1' to stop reading data to let right catch up
-else if (d->posR > d->posL)
-    return 0;  // return '0' to catch up to left's pos
-
-// only perform analysis when L & R positions line up.
-return perform_analysis(d);
-}
-
-static int pileup_func_right(uint32_t tid, uint32_t pos, int n, 
-			     const bam_pileup1_t *pu, void *data)
-{
-pileup_data_t *d = (pileup_data_t*)data;
-
-d->tidR = tid;
-d->posR = pos;
-d->nR   = n;
-d->puR  = (bam_pileup1_t*)pu;
-
-if (!d->puL) 
-    return 1;  // return '1' to stop reading data to let left get data
-else if (d->tidL < d->tidR)
-    return 1;  // return '1' to stop reading data to let left catch up to chrom
-else if (d->tidR > d->tidR)
-    return 0; // return '0' to catch up to left's chrom.
-else if (d->posL < d->posR)
-    return 1;  // return '1' to stop reading data to let left catch up
-else if (d->posL > d->posR)
-    return 0;  // return '0' to catch up to left's pos
-
-// only perform analysis when L & R positions line up.
-return perform_analysis(d);
-}
 
 void bamBam(char *inbamL, char *inbamR)
 {
-tmpstruct_t left, right;  
-left.beg = 0; left.end = 0x7fffffff;
-left.in = samopen(inbamL, "rb", NULL);
-if (left.in == NULL)
-    errAbort("samopen(%s, \"rb\") returned NULL", inbamL);
-
-right.beg = 0; right.end = 0x7fffffff;
-right.in = samopen(inbamR, "rb", NULL);
-if (right.in == NULL)
-    errAbort("samopen(%s, \"rb\") returned NULL", inbamR);
-
-// Init pileup_data object
-pileup_data_t *d = (pileup_data_t*)calloc(1, sizeof(pileup_data_t));
-d->tid = -1; 
-d->mask = BAM_DEF_MASK;
-d->hL = left.in->header;
-d->hR = right.in->header;
-reset_pileups(d);
-
-// Initialize read & pileup buffers for left side
-bam1_t *bL = bam_init1();
-bam_plbuf_t *bufL = bam_plbuf_init(pileup_func_left, d);
-bam_plbuf_set_mask(bufL, -1);  // mask = -1, no masking?
-
-// Initialize read & pileup buffers for right side
-bam1_t *bR = bam_init1();
-bam_plbuf_t *bufR = bam_plbuf_init(pileup_func_right, d);
-bam_plbuf_set_mask(bufR, -1);  // mask = -1, no masking?
-
-int retL = 1; 
-int retR = 1;
-int pauseL = 0;
-int pauseR = 0;
-
-do 
-    {
-    while (!pauseL && ((retL = samread(left.in, bL)) >= 0))
-        pauseL = bam_dual_plbuf_push(bL, bufL);
-    
-    while (!pauseR && ((retR = samread(right.in, bR)) >= 0))
-	pauseR = bam_dual_plbuf_push(bR, bufR);
-
-    while (pauseL && pauseR)
-	{  // only allow one change per round.
-	if (!d->puL)                // L needs data
-	    pauseL = bam_dual_plbuf_resume(bufL);
-	else if (!d->puR)           // R needs data
-	    pauseR = bam_dual_plbuf_resume(bufR);
-	else if (d->tidL < d->tidR) // L needs to catch up to R's chrom
-	    pauseL = bam_dual_plbuf_resume(bufL);
-	else if (d->tidL > d->tidR) // R needs to catch up to L's chrom
-	    pauseR = bam_dual_plbuf_resume(bufR);
-	else if (d->posL > 0 && d->posL < d->posR) // L needs to catch up to R's pos
-	    pauseL = bam_dual_plbuf_resume(bufL);
-	else if (d->posR > 0 && d->posR < d->posL) // R needs to catch up to L's pos
-	    pauseR = bam_dual_plbuf_resume(bufR);
-	}
 
-    } while (retL >= 0 && retR >= 0);
+analysis_params aps;
+aps.minQual = 50;
+aps.minMapQual = 25;
 
-if (retL < 0)
-    {
-    bam_dual_plbuf_push(0, bufL);  // finalize pileup
-    bam_dual_plbuf_destroy(bufL);
-    }
-
-if (retR < 0)
-    {
-    bam_dual_plbuf_push(0, bufR);  // finalize pileup
-    bam_dual_plbuf_destroy(bufR);
-    }
-
-bam_destroy1(bL);
-bam_destroy1(bR);
-
-samclose(left.in);
-samclose(right.in);
+bam_bam_file(inbamL, inbamR, perform_analysis, &aps);
 }
 
+
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 
 char *inbamL = argv[1];
 char *inbamR = argv[2];
 bamBam(inbamL, inbamR);
 
 return 0;
 }
 
 #else // USE_BAM not defined
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 printf("BAM not installed");
 
 return 0;
 }
 #endif