src/hg/tcga/bamBam/bamBam.c 1.3
1.3 2009/09/25 18:44:27 jsanborn
updated
Index: src/hg/tcga/bamBam/bamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamBam/bamBam.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/tcga/bamBam/bamBam.c 25 Sep 2009 18:08:57 -0000 1.2
+++ src/hg/tcga/bamBam/bamBam.c 25 Sep 2009 18:44:27 -0000 1.3
@@ -1,295 +1,263 @@
/* bamBam -- discovering variants from BAM file
* code adapted from Heng Li's Samtools library functions
*/
#ifdef USE_BAM
#include "common.h"
#include "hCommon.h"
#include "linefile.h"
#include "hash.h"
#include "hdb.h"
#include "options.h"
#include "jksql.h"
#define _IOLIB 2
#include "bam.h"
#include "sam.h"
#include "bam_dual_pileup.h"
static char const rcsid[] = "$Id";
void usage()
/* Explain usage and exit. */
{
errAbort(
"bamBam - Perform analysis on two BAM files.\n"
"usage:\n"
" bamBam [options] <in.bam1> <in.bam2>\n"
"options:\n"
"\n"
);
}
static struct optionSpec options[] = {
{NULL, 0}
};
/* Option variables */
char *position = NULL;
typedef struct {
int beg, end;
samfile_t *in;
} tmpstruct_t;
typedef struct {
uint32_t format;
int tid, len, last_pos;
int mask;
char *ref;
/* Left pileup */
- bam_header_t *leftH;
- int32_t leftTid;
- int32_t leftPos;
- int leftN;
- bam_pileup1_t *leftPu;
+ bam_header_t *hL;
+ int32_t tidL;
+ int32_t posL;
+ int nL;
+ bam_pileup1_t *puL;
/* Right pileup */
- bam_header_t *rightH;
- int32_t rightTid;
- int32_t rightPos;
- int rightN;
- bam_pileup1_t *rightPu;
+ bam_header_t *hR;
+ int32_t tidR;
+ int32_t posR;
+ int nR;
+ bam_pileup1_t *puR;
} pileup_data_t;
static void reset_pileups(pileup_data_t *d)
{
-d->leftPos = -1;
-d->leftTid = -1;
-d->leftN = -1;
-d->leftPu = NULL;
-
-d->rightPos = -1;
-d->rightTid = -1;
-d->rightN = -1;
-d->rightPu = NULL;
+d->posL = -1;
+d->tidL = -1;
+d->nL = -1;
+d->puL = NULL;
+
+d->posR = -1;
+d->tidR = -1;
+d->nR = -1;
+d->puR = NULL;
}
static int perform_analysis(pileup_data_t *d)
{
-//return 0;
-printf("%d\t%d\t%d\t", d->leftTid, d->leftPos, d->leftN);
+printf("%d\t%d\t%d\t", d->tidL, d->posL, d->nL);
int i;
const bam_pileup1_t *p;
-for (i = 0; i < d->leftN; ++i)
+for (i = 0; i < d->nL; ++i)
{
- p = d->leftPu + i;
+ p = d->puL + i;
int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
putchar(c);
}
putchar('\t');
-printf("%d\t", d->rightN);
+printf("%d\t", d->nR);
-for (i = 0; i < d->rightN; ++i)
+for (i = 0; i < d->nR; ++i)
{
- p = d->rightPu + i;
+ p = d->puR + i;
int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
putchar(c);
}
putchar('\n');
reset_pileups(d);
return 0;
}
static int pileup_func_left(uint32_t tid, uint32_t pos, int n,
const bam_pileup1_t *pu, void *data)
-{ // will put everything to stdout.
+{
pileup_data_t *d = (pileup_data_t*)data;
-d->leftTid = tid;
-d->leftPos = pos;
-d->leftN = n;
-d->leftPu = (bam_pileup1_t*)pu;
-
-if (!d->rightPu)
- return 1; // return '1' to stop reading data to let right gather some data
-else if (d->rightTid < d->leftTid)
- return 1;
-else if (d->rightTid > d->leftTid)
- return 0;
-else if (d->rightPos < d->leftPos)
+d->tidL = tid;
+d->posL = pos;
+d->nL = n;
+d->puL = (bam_pileup1_t*)pu;
+
+if (!d->puR)
+ return 1; // return '1' to stop reading to let right get some data
+else if (d->tidR < d->tidL)
+ return 1; // return '1' to stop reading to let right catch up to chrom
+else if (d->tidR > d->tidL)
+ return 0; // return '0' to catch up to right's chrom
+else if (d->posR < d->posL)
return 1; // return '1' to stop reading data to let right catch up
-else if (d->rightPos > d->leftPos)
- return 0; // return '0' to continue reading on left side data
+else if (d->posR > d->posL)
+ return 0; // return '0' to catch up to left's pos
+// only perform analysis when L & R positions line up.
return perform_analysis(d);
}
static int pileup_func_right(uint32_t tid, uint32_t pos, int n,
const bam_pileup1_t *pu, void *data)
-{ // will put everything to stdout.
+{
pileup_data_t *d = (pileup_data_t*)data;
-d->rightTid = tid;
-d->rightPos = pos;
-d->rightN = n;
-d->rightPu = (bam_pileup1_t*)pu;
-
-if (!d->leftPu)
- return 1; // return '1' to stop reading data to let left gather some data
-else if (d->leftTid < d->rightTid)
- return 1;
-else if (d->leftTid > d->rightTid)
- return 0;
-else if (d->leftPos < d->rightPos)
+d->tidR = tid;
+d->posR = pos;
+d->nR = n;
+d->puR = (bam_pileup1_t*)pu;
+
+if (!d->puL)
+ return 1; // return '1' to stop reading data to let left get data
+else if (d->tidL < d->tidR)
+ return 1; // return '1' to stop reading data to let left catch up to chrom
+else if (d->tidR > d->tidR)
+ return 0; // return '0' to catch up to left's chrom.
+else if (d->posL < d->posR)
return 1; // return '1' to stop reading data to let left catch up
-else if (d->leftPos > d->rightPos)
- return 0; // return '0' to continue reading on right side data
+else if (d->posL > d->posR)
+ return 0; // return '0' to catch up to left's pos
+// only perform analysis when L & R positions line up.
return perform_analysis(d);
}
-void bamBam(char *inbam1, char *inbam2)
+void bamBam(char *inbamL, char *inbamR)
{
tmpstruct_t left, right;
left.beg = 0; left.end = 0x7fffffff;
-left.in = samopen(inbam1, "rb", NULL);
+left.in = samopen(inbamL, "rb", NULL);
if (left.in == NULL)
- errAbort("samopen(%s, \"rb\") returned NULL", inbam1);
+ errAbort("samopen(%s, \"rb\") returned NULL", inbamL);
right.beg = 0; right.end = 0x7fffffff;
-right.in = samopen(inbam2, "rb", NULL);
+right.in = samopen(inbamR, "rb", NULL);
if (right.in == NULL)
- errAbort("samopen(%s, \"rb\") returned NULL", inbam2);
+ errAbort("samopen(%s, \"rb\") returned NULL", inbamR);
+// Init pileup_data object
pileup_data_t *d = (pileup_data_t*)calloc(1, sizeof(pileup_data_t));
-
-/* Init pileup_data object */
d->tid = -1;
d->mask = BAM_DEF_MASK;
-d->leftH = left.in->header;
-d->rightH = right.in->header;
+d->hL = left.in->header;
+d->hR = right.in->header;
reset_pileups(d);
-bam1_t *bl = bam_init1();
-bam_plbuf_t *bufLeft = bam_plbuf_init(pileup_func_left, d);
-bam_plbuf_set_mask(bufLeft, -1); // mask = -1, no masking?
-
-bam1_t *br = bam_init1();
-bam_plbuf_t *bufRight = bam_plbuf_init(pileup_func_right, d);
-bam_plbuf_set_mask(bufRight, -1); // mask = -1, no masking?
-
-int retLeft = 1;
-int retRight = 1;
-int stopLeft = 0;
-int stopRight = 0;
-int count = 0;
-int debug = 0;
-do
- {
-
- if (d->leftPos >= 247194573 || d->rightPos >= 247194573)
- debug = 0;
+// Initialize read & pileup buffers for left side
+bam1_t *bL = bam_init1();
+bam_plbuf_t *bufL = bam_plbuf_init(pileup_func_left, d);
+bam_plbuf_set_mask(bufL, -1); // mask = -1, no masking?
+
+// Initialize read & pileup buffers for right side
+bam1_t *bR = bam_init1();
+bam_plbuf_t *bufR = bam_plbuf_init(pileup_func_right, d);
+bam_plbuf_set_mask(bufR, -1); // mask = -1, no masking?
+
+int retL = 1;
+int retR = 1;
+int pauseL = 0;
+int pauseR = 0;
- if (debug)
+do
{
- count++;
- printf("tid: %d, %d\n", d->leftTid, d->rightTid);
- printf("pos: %d, %d\n", d->leftPos, d->rightPos);
- printf("stop: %d, %d\n", stopLeft, stopRight);
- }
+ while (!pauseL && ((retL = samread(left.in, bL)) >= 0))
+ pauseL = bam_dual_plbuf_push(bL, bufL);
- while (!stopLeft && ((retLeft = samread(left.in, bl)) >= 0))
- stopLeft = bam_dual_plbuf_push(bl, bufLeft);
+ while (!pauseR && ((retR = samread(right.in, bR)) >= 0))
+ pauseR = bam_dual_plbuf_push(bR, bufR);
- while (!stopRight && ((retRight = samread(right.in, br)) >= 0))
- stopRight = bam_dual_plbuf_push(br, bufRight);
-
- if (debug)
- {
- count++;
- printf("tid: %d, %d\n", d->leftTid, d->rightTid);
- printf("pos: %d, %d\n", d->leftPos, d->rightPos);
- printf("stop: %d, %d\n", stopLeft, stopRight);
- }
-
- while (stopLeft && stopRight && count <= 50)
- {
- if (debug)
- {
- count++;
- printf("loop tid: %d, %d\n", d->leftTid, d->rightTid);
- printf("loop pos: %d, %d\n", d->leftPos, d->rightPos);
- printf("loop stop: %d, %d\n", stopLeft, stopRight);
- }
-// if (!d->leftPu && !d->rightPu)
-// {
-// stopLeft = bam_dual_plbuf_resume(bufLeft);
-// stopRight = bam_dual_plbuf_resume(bufRight);
-// }
- if (!d->leftPu)
- stopLeft = bam_dual_plbuf_resume(bufLeft);
- else if (!d->rightPu)
- stopRight = bam_dual_plbuf_resume(bufRight);
- else if (d->leftTid < d->rightTid)
- stopLeft = bam_dual_plbuf_resume(bufLeft);
- else if (d->leftTid > d->rightTid)
- stopRight = bam_dual_plbuf_resume(bufRight);
- else if (d->leftPos > 0 && d->leftPos < d->rightPos)
- stopLeft = bam_dual_plbuf_resume(bufLeft);
- else if (d->rightPos > 0 && d->rightPos < d->leftPos)
- stopRight = bam_dual_plbuf_resume(bufRight);
+ while (pauseL && pauseR)
+ { // only allow one change per round.
+ if (!d->puL) // L needs data
+ pauseL = bam_dual_plbuf_resume(bufL);
+ else if (!d->puR) // R needs data
+ pauseR = bam_dual_plbuf_resume(bufR);
+ else if (d->tidL < d->tidR) // L needs to catch up to R's chrom
+ pauseL = bam_dual_plbuf_resume(bufL);
+ else if (d->tidL > d->tidR) // R needs to catch up to L's chrom
+ pauseR = bam_dual_plbuf_resume(bufR);
+ else if (d->posL > 0 && d->posL < d->posR) // L needs to catch up to R's pos
+ pauseL = bam_dual_plbuf_resume(bufL);
+ else if (d->posR > 0 && d->posR < d->posL) // R needs to catch up to L's pos
+ pauseR = bam_dual_plbuf_resume(bufR);
}
-// count++;
- } while (retLeft >= 0 && retRight >= 0 && count <= 500);
+ } while (retL >= 0 && retR >= 0);
-if (retLeft < 0)
+if (retL < 0)
{
- bam_dual_plbuf_push(0, bufLeft); // finalize pileup
- bam_dual_plbuf_destroy(bufLeft);
+ bam_dual_plbuf_push(0, bufL); // finalize pileup
+ bam_dual_plbuf_destroy(bufL);
}
-if (retRight < 0)
+if (retR < 0)
{
- bam_dual_plbuf_push(0, bufRight); // finalize pileup
- bam_dual_plbuf_destroy(bufRight);
+ bam_dual_plbuf_push(0, bufR); // finalize pileup
+ bam_dual_plbuf_destroy(bufR);
}
-bam_destroy1(bl);
-bam_destroy1(br);
+bam_destroy1(bL);
+bam_destroy1(bR);
samclose(left.in);
samclose(right.in);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc != 3)
usage();
-char *inbam1 = argv[1];
-char *inbam2 = argv[2];
-bamBam(inbam1, inbam2);
+char *inbamL = argv[1];
+char *inbamR = argv[2];
+bamBam(inbamL, inbamR);
return 0;
}
#else // USE_BAM not defined
int main(int argc, char *argv[])
/* Process command line. */
{
printf("BAM not installed");
return 0;
}
#endif