src/hg/tcga/bamBam/bamBamCNV.c 1.2
1.2 2009/11/06 20:29:26 jsanborn
added file
Index: src/hg/tcga/bamBam/bamBamCNV.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamBam/bamBamCNV.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/tcga/bamBam/bamBamCNV.c 6 Nov 2009 20:27:29 -0000 1.1
+++ src/hg/tcga/bamBam/bamBamCNV.c 6 Nov 2009 20:29:26 -0000 1.2
@@ -1,271 +1,266 @@
/* bamBam -- discovering variants from BAM file
* code adapted from Heng Li's Samtools library functions
*/
#ifdef USE_BAM
#include "common.h"
#include "hCommon.h"
#include "linefile.h"
#include "hash.h"
#include "hdb.h"
#include "options.h"
#include "jksql.h"
#define _IOLIB 2
#include "bam.h"
#include "sam.h"
#include "bam_dual_pileup.h"
static char const rcsid[] = "$Id";
void usage()
/* Explain usage and exit. */
{
errAbort(
"bamBamCNV - Perform copy number analysis on two BAM files.\n"
"usage:\n"
" bamBamCNV [options] <left.bam> <right.bam>\n"
"options:\n"
" -position=chrX:1-100 - Position to do analysis. [None]\n"
" -minMapQ=INT - Minimum acceptable mapping quality. [0]\n"
" -readsPerBin - Number of RIGHT reads to put in each cnv bin. [500]\n"
"\n"
);
}
static struct optionSpec options[] = {
{"position", OPTION_STRING},
{"minMapQ", OPTION_INT},
{"readsPerBin", OPTION_INT},
{NULL, 0}
};
#define BAD_TID -9999
/* Option variables */
char *position = NULL;
int minMapQ = 0;
int readsPerBin = 500;
typedef struct {
int minMapQ;
int readsPerBin;
int sites;
int32_t tid;
int32_t lstart;
int32_t lstop;
int lcounts;
int32_t rstart;
int32_t rstop;
int rcounts;
int32_t prevTid;
int32_t prevStart;
int32_t prevStop;
} analysis_params_t;
static int calc_cnv(pileup_data_t *d, analysis_params_t *ap)
{
if ((ap->tid != BAD_TID) && (ap->tid != d->tidL)) // onto next chromosome
return 1;
int i;
const bam_pileup1_t *p;
if ((ap->prevTid != BAD_TID) && (ap->prevTid == d->tidL))
{
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
bam1_core_t *c = &p->b->core;
if ((c)->pos < ap->prevStop)
return 0;
}
for (i = 0; i < d->nR; ++i)
{
p = d->puR + i;
bam1_core_t *c = &p->b->core;
if ((c)->pos < ap->prevStop)
return 0;
}
}
ap->tid = d->tidL;
int32_t start, stop, rstart = ap->rstart, rstop = ap->rstop;
int32_t prevPos = -1;
for (i = 0; i < d->nR; ++i)
{
p = d->puR + i;
bam1_core_t *c = &p->b->core;
if ((c)->qual < ap->minMapQ)
continue;
if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
{ // read is mapped and not an optical/PCR dupe
start = (c)->pos;
stop = start + (c)->l_qseq;
if (rstart == -1)
{
rstart = start;
rstop = stop;
ap->rcounts += 1;
}
else if ((start > prevPos) && (start > rstart) && (stop > rstop))
{ // make sure there are no possible dupes by removing all by
// comparing left-most position of each read, only allowing one per pos.
prevPos = start;
rstop = stop;
ap->rcounts += 1;
}
}
}
int32_t lstart = ap->lstart, lstop = ap->lstop;
prevPos = -1;
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
bam1_core_t *c = &p->b->core;
if ((c)->qual < ap->minMapQ)
continue;
if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
{ // read is mapped and not an optical/PCR dupe
start = (c)->pos;
stop = start + (c)->l_qseq;
if (!((start >= rstart) && (stop <= rstop)))
continue;
if (lstart == -1)
{
lstart = start;
lstop = stop;
ap->lcounts += 1;
}
else if ((start > prevPos) && (start > lstart && stop > lstop))
{ // make sure there are no possible dupes by removing all by
// comparing left-most position of each read, only allowing one per pos.
prevPos = start;
lstop = stop;
ap->lcounts += 1;
}
}
}
ap->lstart = lstart;
ap->lstop = lstop;
ap->rstart = rstart;
ap->rstop = rstop;
if (ap->rcounts >= ap->readsPerBin)
return 1;
return 0;
}
static void perform_analysis(void *data)
{
pileup_data_t *d = (pileup_data_t*)data;
analysis_params_t *ap = (analysis_params_t*)d->func_data;
if (calc_cnv(d, ap))
{
- //int32_t start = min(ap->lstart, ap->rstart);
- //int32_t stop = max(ap->lstop, ap->rstop);
- //int32_t len = (stop - start) + 1;
-
printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\n",
d->hL->target_name[ap->tid],
- ap->lstart, ap->lstop + 1,
- ap->rstart, ap->rstop + 1,
- ap->lcounts, ap->rcounts);
+ ap->lstart, ap->lstop + 1, ap->lcounts,
+ ap->rstart, ap->rstop + 1, ap->rcounts);
fflush(stdout);
ap->prevTid = ap->tid;
ap->prevStart = ap->rstart;
ap->prevStop = ap->rstop;
ap->tid = BAD_TID;
ap->lstart = -1;
ap->lstop = -1;
ap->lcounts = 0;
ap->rstart = -1;
ap->rstop = -1;
ap->rcounts = 0;
}
ap->sites += 1;
}
void bamBam(char *inbamL, char *inbamR)
{
analysis_params_t ap;
ap.minMapQ = minMapQ;
ap.sites = 0;
ap.readsPerBin = readsPerBin;
ap.tid = BAD_TID;
ap.lstart = -1;
ap.lstop = -1;
ap.lcounts = 0;
ap.rstart = -1;
ap.rstop = -1;
ap.rcounts = 0;
ap.prevTid = BAD_TID;
ap.prevStart = -1;
ap.prevStop = -1;
bam_bam_file(inbamL, inbamR, position, perform_analysis, &ap);
fprintf(stderr, "# qualified sites = %d\n", ap.sites);
}
int main(int argc, char *argv[])
/* Process command line. */
{
// Print out command line as a comment.
int i;
putchar('#');
for (i = 0; i < argc; i++)
printf(" %s", argv[i]);
putchar('\n');
optionInit(&argc, argv, options);
if (argc < 3)
usage();
if (optionExists("minMapQ"))
minMapQ = optionInt("minMapQ", 0);
if (optionExists("readsPerBin"))
readsPerBin = optionInt("readsPerBin", 0);
if (optionExists("position"))
position = optionVal("position", NULL);
char *inbamL = argv[1];
char *inbamR = argv[2];
bamBam(inbamL, inbamR);
return 0;
}
#else // USE_BAM not defined
int main(int argc, char *argv[])
/* Process command line. */
{
printf("BAM not installed");
return 0;
}
#endif