src/hg/tcga/pBamBam/asBamBam.c 1.4
1.4 2010/03/07 03:26:33 jsanborn
updated
Index: src/hg/tcga/pBamBam/asBamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/pBamBam/asBamBam.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/tcga/pBamBam/asBamBam.c 7 Mar 2010 02:36:46 -0000 1.3
+++ src/hg/tcga/pBamBam/asBamBam.c 7 Mar 2010 03:26:33 -0000 1.4
@@ -134,8 +134,9 @@
static int32_t insL, delL, insR, delR;
typedef struct {
int tid;
+ int mtid;
int32_t qstart;
int32_t qstop;
int32_t mstart;
@@ -148,9 +149,9 @@
} sv_t;
#define BUF 100 // 500 base buffer around start/stop
static sv_t intraL, intraR;
-sv_t interL, interR;
+static sv_t interL, interR;
static int nuc_index(char c)
{
switch (c)
@@ -763,59 +764,95 @@
ap->rstart = ap->rstop;
}
}
-#define NUM_CHROMS 100
-static int mateChromsL[NUM_CHROMS];
-static int mateChromsR[NUM_CHROMS];
-static int somaticSV[NUM_CHROMS];
-static int germlineSV[NUM_CHROMS];
+static inline void reset_inter()
+{
+interL.tid = -1;
+interL.mtid = -1;
+interL.qstart = -1;
+interL.qstop = -1;
+interL.mstart = -1;
+interL.mstop = -1;
+interL.supp = 0;
+interL.qual = 0;
+interL.qstrand = -1;
+interL.mstrand = -1;
+
+interR.tid = -1;
+interR.mtid = -1;
+interR.qstart = -1;
+interR.qstop = -1;
+interR.mstart = -1;
+interR.mstop = -1;
+interR.supp = 0;
+interR.qual = 0;
+interR.qstrand = -1;
+interR.mstrand = -1;
+}
static int inter_struct_variant(dual_pileup_t *d, analysis_params_t *ap)
{
int i;
-for (i = 0; i < NUM_CHROMS; ++i)
- {
- mateChromsL[i] = 0;
- mateChromsR[i] = 0;
- somaticSV[i] = 0;
- germlineSV[i] = 0;
- }
-
const bam_pileup1_t *p;
-int avgMapQ = 0;
+int maxStartL = -1, maxStartR = -1;
+int nL = 0, nR = 0;
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
- avgMapQ += p->b->core.qual;
- }
-if (((double)avgMapQ / (double) d->nL) < (double)ap->avgMapQ)
- return 0;
+ if (p->b->core.qual < ap->minMapQ)
+ continue;
+ bam1_core_t *c = &p->b->core;
+ if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
+ continue; // read is not paired or unmapped.
+ if ((c)->flag & BAM_FMUNMAP)
+ continue; // mate is unmapped.
+ if ((c)->mtid == (c)->tid)
+ continue; // mate is on same chrom. MERGE INTER CODE HERE?
-avgMapQ = 0;
-for (i = 0; i < d->nR; ++i)
- {
- p = d->puR + i;
- avgMapQ += p->b->core.qual;
- }
-if (((double)avgMapQ / (double) d->nR) < (double) ap->avgMapQ)
- return 0;
+ int qstrand = bam1_strand(p->b);
+ int mstrand = bam1_mstrand(p->b);
-for (i = 0; i < d->nL; ++i)
- {
- p = d->puL + i;
+ int32_t qstart = (c)->pos;
+ int32_t qstop = qstart + (c)->l_qseq;
+ int32_t mstart = (c)->mpos;
+ int32_t mstop = mstart + (c)->l_qseq;
- if (p->b->core.qual < ap->minMapQ)
+ if (qstart > maxStartL)
+ maxStartL = qstart;
+
+ if (qstart <= ap->svStartL)
continue;
+ ap->svStartL = qstart;
+ nL += 1;
- bam1_core_t *c = &p->b->core;
- if (((c)->flag & BAM_FPAIRED) && !((c)->flag & BAM_FUNMAP))
- { // read is paired and mapped.
- if (!((c)->flag & BAM_FMUNMAP))
- { // mate is mapped
- if ((c)->mtid != (c)->tid) // different chroms.
- mateChromsL[(c)->mtid]++;
- }
+ if (interL.tid == -1)
+ {
+ interL.tid = (c)->tid;
+ interL.mtid = (c)->mtid;
+ interL.qstart = qstart;
+ interL.qstop = qstop;
+ interL.mstart = mstart;
+ interL.mstop = mstop;
+ interL.qstrand = qstrand;
+ interL.mstrand = mstrand;
+ interL.qual = (c)->qual;
+ interL.supp = 1;
+ }
+ else if (qstart < (interL.qstop + BUF) && qstop > (interL.qstart - BUF) &&
+ mstart < (interL.mstop + BUF) && mstop > (interL.mstart - BUF))
+ {
+ if (interL.qstart > qstart)
+ interL.qstart = qstart;
+ if (interL.qstop < qstop)
+ interL.qstop = qstop;
+ if (interL.mstart > mstart)
+ interL.mstart = mstart;
+ if (interL.mstop < mstop)
+ interL.mstop = mstop;
+
+ interL.qual += (c)->qual;
+ interL.supp += 1;
}
}
for (i = 0; i < d->nR; ++i)
@@ -820,36 +857,77 @@
for (i = 0; i < d->nR; ++i)
{
p = d->puR + i;
-
if (p->b->core.qual < ap->minMapQ)
continue;
-
bam1_core_t *c = &p->b->core;
- if (((c)->flag & BAM_FPAIRED) && !((c)->flag & BAM_FUNMAP))
- { // read is paired and mapped.
- if (!((c)->flag & BAM_FMUNMAP))
- { // mate is mapped
- if ((c)->mtid != (c)->tid) // different chroms
- mateChromsR[(c)->mtid]++;
- }
+ if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
+ continue; // read is not paired or unmapped.
+ if ((c)->flag & BAM_FMUNMAP)
+ continue; // mate is unmapped.
+ if ((c)->mtid == (c)->tid)
+ continue; // mate is on same chrom.
+
+ int qstrand = bam1_strand(p->b);
+ int mstrand = bam1_mstrand(p->b);
+
+ int32_t qstart = (c)->pos;
+ int32_t qstop = qstart + (c)->l_qseq;
+ int32_t mstart = (c)->mpos;
+ int32_t mstop = mstart + (c)->l_qseq;
+
+ if (qstart > maxStartR)
+ maxStartR = qstart;
+
+ if (qstart <= ap->svStartR)
+ continue;
+ ap->svStartR = qstart;
+ nR += 1;
+
+ if (interR.tid == -1)
+ {
+ interR.tid = (c)->tid;
+ interR.mtid = (c)->mtid;
+ interR.qstart = qstart;
+ interR.qstop = qstop;
+ interR.mstart = mstart;
+ interR.mstop = mstop;
+ interR.mstrand = mstrand;
+ interR.qstrand = qstrand;
+ interR.qual = (c)->qual;
+ interR.supp = 1;
+ }
+ else if (qstart < (interR.qstop + BUF) && qstop > (interR.qstart - BUF) &&
+ mstart < (interR.mstop + BUF) && mstop > (interR.mstart - BUF))
+ {
+ if (interR.qstart > qstart)
+ interR.qstart = qstart;
+ if (interR.qstop < qstop)
+ interR.qstop = qstop;
+ if (interR.mstart > mstart)
+ interR.mstart = mstart;
+ if (interR.mstop < mstop)
+ interR.mstop = mstop;
+
+ interR.qual += (c)->qual;
+ interR.supp += 1;
}
}
-int retVal = 0;
-for (i = 0; i < NUM_CHROMS; i++)
+if (interL.supp > 0)
{
- if (mateChromsL[i] < ap->minSupp)
- continue;
- retVal = 1;
- if (mateChromsR[i] < ap->minSupp)
- somaticSV[i] = mateChromsL[i];
- else
- germlineSV[i] = mateChromsL[i] + mateChromsR[i];
+ if (maxStartL <= interL.qstop + BUF)
+ return 0; // there still might be reads to overlap
+
+ /* check average mapping quality of discordants */
+ int qual = ceil((double) interL.qual / (double) interL.supp);
+ if (interL.supp >= ap->minSupp && qual >= ap->avgMapQ)
+ return 1;
+ reset_inter();
}
-return retVal;
+return 0;
}
static inline void reset_intra()
{
@@ -875,16 +953,12 @@
}
static int intra_struct_variant(dual_pileup_t *d, analysis_params_t *ap)
{
-// First check average mapping quality of read pairs
int i;
const bam_pileup1_t *p;
-int deltaSupp = intraL.supp + intraR.supp;
-
int maxStartL = -1, maxStartR = -1;
int nL = 0, nR = 0;
-//int dupeL = 0, dupeR = 0;
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
if (p->b->core.qual < ap->minMapQ)
@@ -1020,19 +1094,8 @@
reset_intra();
}
return 0;
-
-deltaSupp = (intraL.supp + intraR.supp) - deltaSupp;
-if (deltaSupp == 0 && (intraL.supp > 0 || intraR.supp > 0) )
- {
- fprintf(stderr, "SIC pos = %d\n", d->posL);
- if (intraL.supp >= ap->minSupp)
- return 1;
- reset_intra();
- }
-
-return 0;
}
static int indel_variant(dual_pileup_t *d, analysis_params_t *ap,
int *isIns, int *isDel, int *insGerm, int *delGerm)
@@ -1062,22 +1125,45 @@
static void do_struct_var(dual_pileup_t *d, analysis_params_t *ap)
{
-int i;
-if (0 && inter_struct_variant(d, ap))
+if (inter_struct_variant(d, ap))
{
printf("SVS\t%s\t%d\t%d\t",
d->hL->target_name[d->tidL], d->posL, d->posL + 1);
- for (i = 0; i < NUM_CHROMS; ++i)
- {
- if (somaticSV[i] > 0)
- printf("SSV:%s(%d),", d->hL->target_name[i], somaticSV[i]);
- if (germlineSV[i] > 0)
- printf("GSV:%s(%d),", d->hL->target_name[i], germlineSV[i]);
- }
+ char qsL = '+';
+ if (interL.qstrand)
+ qsL = '-';
+ char msL = '+';
+ if (interL.mstrand)
+ msL = '-';
+
+ char qsR = '+';
+ if (interR.qstrand)
+ qsR = '-';
+ char msR = '+';
+ if (interR.mstrand)
+ msR = '-';
+
+ if ( interR.supp > 0 && interL.mtid == interR.mtid &&
+ (interR.qstart < (interL.qstop + BUF) && interR.qstop > (interL.qstart - BUF)) )
+ printf("GIR\t%d\t%3.1f\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)\t%c(%s:%d-%d)>%c(%s:%d-%d)",
+ interR.supp, (double) interR.qual / (double) interR.supp,
+ interL.supp, (double) interL.qual / (double) interL.supp,
+ qsR, d->hL->target_name[interR.tid], interR.qstart, interR.qstop,
+ msR, d->hL->target_name[interR.mtid], interR.mstart, interR.mstop,
+ qsL, d->hL->target_name[interL.tid], interL.qstart, interL.qstop,
+ msL, d->hL->target_name[interL.mtid], interL.mstart, interL.mstop);
+ else
+ printf("SIR\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)",
+ interL.supp, (double) interL.qual / (double) interL.supp,
+ qsL, d->hL->target_name[interL.tid], interL.qstart, interL.qstop,
+ msL, d->hL->target_name[interL.mtid], interL.mstart, interL.mstop);
+
putchar('\n');
+
+ reset_inter();
}
if (intra_struct_variant(d, ap))
{
@@ -1097,20 +1183,19 @@
char msR = '+';
if (intraR.mstrand)
msR = '-';
-
if ( intraR.supp > 0 &&
(intraR.qstart < (intraL.qstop + BUF) && intraR.qstop > (intraL.qstart - BUF)) )
- printf("GIC\t%d\t%3.1f\t%d\t%3.1f\t%c(%d-%d)>%c(%d-%d)\t%c(%d-%d)>%c(%d-%d)",
+ printf("GIA\t%d\t%3.1f\t%d\t%3.1f\t%c(%d-%d)>%c(%d-%d)\t%c(%d-%d)>%c(%d-%d)",
intraR.supp, (double) intraR.qual / (double) intraR.supp,
intraL.supp, (double) intraL.qual / (double) intraL.supp,
qsR, intraR.qstart, intraR.qstop,
msR, intraR.mstart, intraR.mstop,
qsL, intraL.qstart, intraL.qstop,
msL, intraL.mstart, intraL.mstop);
else
- printf("SIC\t%d\t%3.1f\t%c(%d-%d)>%c(%d-%d)",
+ printf("SIA\t%d\t%3.1f\t%c(%d-%d)>%c(%d-%d)",
intraL.supp, (double) intraL.qual / (double) intraL.supp,
qsL, intraL.qstart, intraL.qstop,
msL, intraL.mstart, intraL.mstop);
@@ -1333,8 +1418,9 @@
ap.svStartR = -1;
ap.svStopR = -1;
reset_intra();
+reset_inter();
bam_bam_file(inbamL, inbamR, position, mask, perform_analysis, &ap);
}