src/hg/tcga/pBamBam/asBamBam.c 1.11
1.11 2010/06/01 21:46:13 jsanborn
added new mutation caller
Index: src/hg/tcga/pBamBam/asBamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/pBamBam/asBamBam.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -b -B -U 1000000 -r1.10 -r1.11
--- src/hg/tcga/pBamBam/asBamBam.c 22 Apr 2010 18:10:50 -0000 1.10
+++ src/hg/tcga/pBamBam/asBamBam.c 1 Jun 2010 21:46:13 -0000 1.11
@@ -1,1509 +1,1811 @@
/* asBamBam -- discovering variants from BAM file
* code adapted from Heng Li's Samtools library functions
*/
#ifdef USE_BAM
#include "common.h"
#include "hCommon.h"
#include "linefile.h"
#include "hash.h"
#include "hdb.h"
#include "options.h"
#include "jksql.h"
#define _IOLIB 2
#include "bam.h"
#include "sam.h"
#include "bam_helper.h"
static char const rcsid[] = "$Id";
void usage()
/* Explain usage and exit. */
{
errAbort(
"asBamBam - Allele-Specific BamBam.\n"
"usage:\n"
" asBamBam [options] <left.bam> <right.bam>\n"
"options:\n"
" -fa=file - FASTA filename of reference [NULL]\n"
" -position=chr1:1-100 - Position to do analysis. [NULL=process all reads]\n"
" -minSuppSNP=INT - Minimum Support for SNP/Het Calling. [4]\n"
" -minSuppSV=INT - Minimum Support for struct vars. [2]\n"
" -minQ=INT - Minimum acceptable base quality. [10]\n"
" -minMapQ=INT - Minimum acceptable mapping quality. [20]\n"
" -avgMapQ=INT - Minimum acceptable average mapping quality. [30]\n"
" -minChiSq=INT - Minimum acceptable chi square of nucleotide freqs. [250]\n"
" -maxISize=INT - Maximum Insert Size. [2000]\n"
" -readsPerBin=INT - Reads per CNV bin [2500]\n"
"\n"
);
}
static struct optionSpec options[] = {
{"fa", OPTION_STRING},
{"position", OPTION_STRING},
{"minSuppSNP", OPTION_INT},
{"minSuppSV", OPTION_INT},
{"minQ", OPTION_INT},
{"minMapQ", OPTION_INT},
{"avgMapQ", OPTION_INT},
{"maxISize", OPTION_INT},
{"minChiSq", OPTION_INT},
{"readsPerBin", OPTION_INT},
{NULL, 0}
};
+#define IN_PROB 0.94 // probability of getting base that's member of genotype
+ // (1.0 - IN_PROB) / 3.0 = prob of getting error
#define BAD_TID -999
/* Option variables */
char *faidx = NULL;
char *position = NULL;
int minSuppSNP = 4;
int minSuppSV = 2;
int minQ = 10;
int minMapQ = 20;
int avgMapQ = 30;
int maxISize = 2000;
int minChiSq = 250;
int readsPerBin = 2500;
typedef struct {
int minQ;
int minMapQ;
int avgMapQ;
int maxISize;
int minChiSq;
int minSuppSNP;
int minSuppSV;
int sites;
int readsPerBin;
int pStartPrev;
int pStopPrev;
int pStart;
int pStop;
/* Overall Copy Number */
int32_t tid;
int32_t lstart;
int32_t lstop;
int lcounts;
int32_t rstart;
int32_t rstop;
int rcounts;
int32_t prevTid;
int32_t prevStart;
int32_t prevStop;
/* ASCN */
int32_t tidAS;
int32_t startAS;
int32_t stopAS;
int lcountsMin;
int lcountsMax;
int rcountsMin;
int rcountsMax;
/* SV */
int svStartL;
int svStopL;
int svStartR;
int svStopR;
+ long double fracGerm;
+
} analysis_params_t;
static int32_t countsL1[4];
static int32_t countsL2[4];
static int32_t countsL3[4];
static int32_t countsR1[4];
static int32_t countsR2[4];
static int32_t countsR3[4];
static int32_t votesL[4];
static int32_t countsL[4];
static int32_t votesR[4];
static int32_t countsR[4];
typedef struct {
char nuc;
int count;
int vote;
} variant_t;
variant_t minorL, majorL;
variant_t minorR, majorR;
static int32_t insL, delL, insR, delR;
typedef struct {
int tid;
int mtid;
int32_t qstart;
int32_t qstop;
int32_t mstart;
int32_t mstop;
int supp;
int qual;
int mstrand;
int qstrand;
int mdir; // trend of mate directions
int pmstart; // previos start
} sv_t;
#define BUF 100 // buffer around reads' starts/stops
static sv_t intraL, intraR;
static sv_t interL, interR;
static int nuc_index(char c)
{
switch (c)
{
case 'A':
return 0;
case 'G':
return 1;
case 'C':
return 2;
case 'T':
return 3;
default:
return -1;
}
return -1;
}
#define NUCS "AGCT"
static inline void reset_globals(analysis_params_t *ap)
{
/* reset nuc counts */
int i;
for (i = 0; i < 4; i++)
{ // sum up the qualities of each base
votesL[i] = 0;
countsL[i] = 0;
votesR[i] = 0;
countsR[i] = 0;
countsL1[i] = 0;
countsL2[i] = 0;
countsL3[i] = 0;
countsR1[i] = 0;
countsR2[i] = 0;
countsR3[i] = 0;
}
/* reset indel stats */
insL = 0;
insR = 0;
delL = 0;
delR = 0;
/* reset pileup info */
ap->pStart = -1, ap->pStop = -1;
}
static inline int check_read(const bam_pileup1_t *p, analysis_params_t *ap)
{
bam1_core_t *c = &p->b->core;
/* Check if unmapped or PCR optical dupe */
if (((c)->flag & BAM_FUNMAP) || ((c)->flag & BAM_FDUP) )
return 0;
/* check base quality */
if (bam1_qual(p->b)[p->qpos] < ap->minQ)
return 0;
/* check mapping quality */
if ((c)->qual < ap->minMapQ)
return 0;
/* all good */
return 1;
}
static inline void nuc_counts(dual_pileup_t *d, analysis_params_t *ap)
{
/* reset global vars */
reset_globals(ap);
int i, index;
int32_t start, stop;
int32_t prevStart = -1;
const bam_pileup1_t *p;
char best_b1 = 'N';
int best_q1 = -1;
char best_b2 = 'N';
int best_q2 = -1;
int insertion = 0, deletion = 0;
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
if (!check_read(p, ap))
continue;
bam1_core_t *c = &p->b->core;
int qual = bam1_qual(p->b)[p->qpos];
/* Check if start & stop has already been visited, to dupes missed in prev step*/
start = (c)->pos;
stop = start + (c)->l_qseq;
if (ap->pStart == -1)
{
ap->pStart = start;
ap->pStop = stop;
}
else
{
if (start < ap->pStart)
ap->pStart = start;
if (stop > ap->pStop)
ap->pStop = stop;
}
if (prevStart == -1)
prevStart = start;
if (start < prevStart)
continue;
if (start == prevStart)
{
if (p->is_del)
{
if (p->indel > 0)
insertion = 1;
else if (p->indel < 0)
deletion = 1;
}
else
{
int b = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
if (best_b1 == 'N')
{
best_b1 = b;
best_q1 = qual;
}
else if (qual > best_q1)
{
best_b2 = best_b1;
best_q2 = best_q1;
best_b1 = b;
best_q1 = qual;
}
else if (qual > best_q2)
{
best_b2 = b;
best_q2 = qual;
}
}
continue;
}
prevStart = start;
if (insertion)
insL += 1;
else if (deletion)
delL += 1;
double lseq = (double) p->b->core.l_qseq;
if ((index = nuc_index(best_b1)) >= 0)
{
votesL[index] += best_q1;
countsL[index]++;
if (p->qpos < round(0.33 * lseq))
countsL1[index]++;
else if (p->qpos < round(0.67 * lseq))
countsL2[index]++;
else
countsL3[index]++;
}
if ((index = nuc_index(best_b2)) >= 0)
{
votesL[index] += best_q2;
countsL[index]++;
if (p->qpos < round(0.33 * lseq))
countsL1[index]++;
else if (p->qpos < round(0.67 * lseq))
countsL2[index]++;
else
countsL3[index]++;
}
best_b1 = 'N';
best_q1 = -1;
best_b2 = 'N';
best_q2 = -1;
insertion = 0, deletion = 0;
if (p->is_del)
{
if (p->indel > 0)
insertion = 1;
else if (p->indel < 0)
deletion = 1;
}
else
{
int b = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
if (best_b1 == 'N')
{
best_b1 = b;
best_q1 = qual;
}
else if (qual > best_q1)
{
best_b2 = best_b1;
best_q2 = best_q1;
best_b1 = b;
best_q1 = qual;
}
else if (qual > best_q2)
{
best_b2 = b;
best_q2 = qual;
}
}
}
prevStart = -1;
for (i = 0; i < d->nR; ++i)
{
p = d->puR + i;
if (!check_read(p, ap))
continue;
bam1_core_t *c = &p->b->core;
int qual = bam1_qual(p->b)[p->qpos];
/* Check if start & stop has already been visited, to dupes missed in prev step*/
start = (c)->pos;
stop = start + (c)->l_qseq;
if (start <= prevStart)
continue;
prevStart = start;
if (ap->pStart == -1)
{
ap->pStart = start;
ap->pStop = stop;
}
if (start < ap->pStart)
ap->pStart = start;
if (stop > ap->pStop)
ap->pStop = stop;
if (p->indel > 0)
insR += 1;
else if (p->indel < 0)
delR += 1;
if (!p->is_del && p->indel == 0)
{
double lseq = (double) p->b->core.l_qseq;
int b = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
if ((index = nuc_index(b)) < 0)
continue;
votesR[index] += qual;
countsR[index]++;
if (p->qpos < round(0.33 * lseq))
countsR1[index]++;
else if (p->qpos < round(0.67 * lseq))
countsR2[index]++;
else
countsR3[index]++;
}
}
int iL1 = -1, iL2 = -1, maxL1 = 0, maxL2 = 0;
int iR1 = -1, iR2 = -1, maxR1 = 0, maxR2 = 0;
int countL1 = -1, countL2 = -1, countR1 = -1, countR2 = -1;
for (i = 0; i < 4; i++)
{
if (countsL[i] > countL1)
{
iL2 = iL1; // set prev max to max2
maxL2 = maxL1;
countL2 = countL1;
iL1 = i;
maxL1 = votesL[i];
countL1 = countsL[i];
}
else if (countsL[i] > countL2)
{
iL2 = i;
maxL2 = votesL[i];
countL2 = countsL[i];
}
if (countsR[i] > countR1)
{
iR2 = iR1;
maxR2 = maxR1;
countR2 = countR1;
iR1 = i;
maxR1 = votesR[i];
countR1 = countsR[i];
}
else if (countsR[i] > countR2)
{
iR2 = i;
maxR2 = votesR[i];
countR2 = countsR[i];
}
}
majorL.nuc = 'N';
majorL.vote = 0;
majorL.count = 0;
if (iL1 != -1 && countL1 >= ap->minSuppSNP)
{
majorL.nuc = NUCS[iL1];
majorL.vote = maxL1;
majorL.count = countL1;
}
minorL.nuc = 'N';
minorL.vote = 0;
minorL.count = 0;
if (iL2 != -1 && countL2 >= ap->minSuppSNP)
{
minorL.nuc = NUCS[iL2];
minorL.vote = maxL2;
minorL.count = countL2;
}
majorR.nuc = 'N';
majorR.vote = 0;
majorR.count = 0;
if (iR1 != -1 && countR1 >= ap->minSuppSNP)
{
majorR.nuc = NUCS[iR1];
majorR.vote = maxR1;
majorR.count = countR1;
}
minorR.nuc = 'N';
minorR.vote = 0;
minorR.count = 0;
if (iR2 != -1 && countR2 >= ap->minSuppSNP)
{
minorR.nuc = NUCS[iR2];
minorR.vote = maxR2;
minorR.count = countR2;
}
}
double chi_square(int obs1, int obs2, double p) // p = binomial prob of getting 1
{
double pseudo = 1.0;
int count = obs1 + obs2;
double exp1 = p * ((double) count) + pseudo;
double exp2 = (1.0 - p) * ((double) count) + pseudo;
// apply Yates correction... will often have small observed values.
return pow(abs((double)obs1 - exp1) - 0.5, 2.0)/exp1 + pow(abs((double)obs2 - exp2) - 0.5, 2.0)/exp2;
}
static int sig_diff(dual_pileup_t *d, analysis_params_t *ap)
{
int i;
double pseudo = 1.0;
int totalL = 0, totalR = 0;
for (i = 0; i < 4; i++)
{
totalL += countsL[i];
totalR += countsR[i];
}
double obs, exp, chi_sq = 0.0;
for (i = 0; i < 4; i++)
{
obs = ((double) countsL[i] * 100.0) / (double) totalL + pseudo;
exp = ((double) countsR[i] * 100.0) / (double) totalR + pseudo;
chi_sq += pow(abs(obs - exp) - 0.5, 2.0) / exp;
}
return chi_sq;
}
static int is_mut(dual_pileup_t *d, analysis_params_t *ap, char *ref,
char *callL1, char *callL2, char *callR1, char *callR2)
{
if (majorR.vote == 0 || majorL.vote == 0)
return 0; // most likely a bad quality region.
if (sig_diff(d, ap) < ap->minChiSq)
return 0;
int i = 0;
int rCount = 0;
for (i = 0; i < 4; i++)
rCount += countsR1[i] + countsR2[i] + countsR3[i];
if (rCount < 10)
return 0;
char rb = (d->ref && (int)d->posL < d->len)? d->ref[d->posL] : 'N';
int retVal1 = 0, retVal2 = 0;
for (i = 0; i < 4; i++)
{
if (NUCS[i] != majorL.nuc && NUCS[i] != minorL.nuc)
continue;
if (NUCS[i] == majorL.nuc && majorL.nuc != majorR.nuc && majorL.nuc != minorR.nuc)
{
if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
retVal1 = 1;
else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
retVal1 = 1;
else if (countsL2[i] > 0 && countsL3[i] > 0 && countsL2[i] + countsL3[i] >= 3)
retVal1 = 1;
if (retVal1)
{
if (rCount >= 30 && countsR[i] <= 1)
retVal1 = 1;
else if (rCount < 30 && countsR[i] == 0)
retVal1 = 1;
else
retVal1 = 0;
}
}
if (NUCS[i] == minorL.nuc && (minorL.nuc != majorR.nuc && minorL.nuc != minorR.nuc))
{
if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
retVal2 = 1;
else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
retVal2 = 1;
else if (countsL2[i] > 0 && countsL3[i] > 0 && countsL2[i] + countsL3[i] >= 3)
retVal2 = 1;
if (retVal2)
{
if (rCount >= 30 && countsR[i] <= 1)
retVal2 = 1;
else if (rCount < 30 && countsR[i] == 0)
retVal2 = 1;
else
retVal2 = 0;
}
}
}
if (!retVal1 && !retVal2)
return 0;
*ref = rb;
*callL1 = majorL.nuc;
*callL2 = minorL.nuc;
*callR1 = majorR.nuc;
*callR2 = minorR.nuc;
if (majorL.nuc != majorR.nuc && majorL.nuc != minorR.nuc)
return 1;
if (minorL.nuc != 'N' && (minorL.nuc != majorR.nuc && minorL.nuc != minorR.nuc))
return 1;
return 0;
}
static int calc_cnv(dual_pileup_t *d, analysis_params_t *ap)
{
if ((ap->tid != BAD_TID) && (ap->tid != d->tidL)) // onto next chromosome
return 2;
int i;
const bam_pileup1_t *p;
if ((ap->prevTid != BAD_TID) && (ap->prevTid == d->tidL))
{
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
bam1_core_t *c = &p->b->core;
if ((c)->pos <= ap->prevStop)
return 0;
}
for (i = 0; i < d->nR; ++i)
{
p = d->puR + i;
bam1_core_t *c = &p->b->core;
if ((c)->pos <= ap->prevStop)
return 0;
}
}
ap->tid = d->tidL;
int32_t start, stop;
int32_t lstart = ap->lstart, lstop = ap->lstop;
int32_t prevPos = lstop - 1;
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
bam1_core_t *c = &p->b->core;
if ((c)->qual < ap->minMapQ)
continue;
if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
{ // read is mapped and not an optical/PCR dupe
start = (c)->pos;
stop = start + (c)->l_qseq;
if (lstart == -1)
{
lstart = start;
lstop = start + 1;
ap->lcounts += 1;
}
else if ((start > prevPos) && (start > lstart)) // && stop > lstop))
{ // make sure there are no possible dupes by removing all by
// comparing left-most position of each read, only allowing one per pos.
prevPos = start;
lstop = start + 1;
ap->lcounts += 1;
}
}
}
int32_t rstart = ap->rstart, rstop = ap->rstop;
prevPos = rstop - 1;
for (i = 0; i < d->nR; ++i)
{
p = d->puR + i;
bam1_core_t *c = &p->b->core;
if ((c)->qual < ap->minMapQ)
continue;
if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
{ // read is mapped and not an optical/PCR dupe
start = (c)->pos;
stop = start + (c)->l_qseq;
if (rstart == -1)
{
rstart = start;
rstop = start + 1;
ap->rcounts += 1;
}
else if ((start > prevPos) && (start > rstart)) // && (stop > rstop))
{ // make sure there are no possible dupes by removing all by
// comparing left-most position of each read, only allowing one per pos.
prevPos = start;
rstop = start + 1;
ap->rcounts += 1;
}
}
}
ap->lstart = lstart;
ap->lstop = lstop;
ap->rstart = rstart;
ap->rstop = rstop;
if ((ap->lcounts >= ap->readsPerBin) || (ap->rcounts >= ap->readsPerBin))
return 1;
return 0;
}
static int calc_ascn(dual_pileup_t *d, analysis_params_t *ap,
int maxS, int minS, int maxG, int minG)
{
if ((ap->tidAS != BAD_TID) && (ap->tidAS != d->tidL)) // onto next chromosome
return 2;
if ( ap->pStopPrev != -1 && (d->posL < ap->pStopPrev))
return 0; // within previous position's territory, avoid double-counting
ap->tidAS = d->tidL;
if (ap->startAS == -1)
{
ap->startAS = d->posL;
ap->stopAS = d->posL;
}
if (d->posL > ap->stopAS)
ap->stopAS = d->posL;
ap->lcountsMin += minS;
ap->lcountsMax += maxS;
ap->rcountsMin += minG;
ap->rcountsMax += maxG;
int totalL = ap->lcountsMin + ap->lcountsMax;
int totalR = ap->rcountsMin + ap->rcountsMax;
if (totalL >= ap->readsPerBin || totalR >= ap->readsPerBin)
return 1;
return 0;
}
static int is_het(dual_pileup_t *d, analysis_params_t *ap)
{
int i = 0;
int rCount = 0, maxCountR = 0;
for (i = 0; i < 4; i++)
{
if (countsR[i] < ap->minSuppSNP)
continue;
rCount += countsR[i];
if (countsR[i] > maxCountR)
maxCountR = countsR[i];
}
double het = (double) maxCountR / (double) rCount;
if (het > 0.4 && het < 0.6)
return 1; // need a smarter test here.
return 0;
}
-
static void print_nuc_info()
{
int i, first = 1;
for (i = 0; i < 4; i++)
{
if (countsR[i] == 0)
continue;
if (!first)
putchar(' ');
printf("%c:%d/%2.1f", NUCS[i], countsR[i], (double) votesR[i] / (double) countsR[i]);
first = 0;
}
putchar('\t');
first = 1;
for (i = 0; i < 4; i++)
{
if (countsL[i] == 0)
continue;
if (!first)
putchar(' ');
printf("%c:%d/%2.1f", NUCS[i], countsL[i], (double) votesL[i] / (double) countsL[i]);
first = 0;
}
putchar('\n');
}
+
+
static void do_cnv(dual_pileup_t *d, analysis_params_t *ap)
{
int ret;
if ((ret = calc_cnv(d, ap)) == 0)
return;
int start = ap->rstart;
int stop = ap->rstop;
if (ap->lstart < start)
start = ap->lstart;
if (ap->lstop > stop)
stop = ap->lstop;
printf("CNV\t%s\t%d\t%d\t%f\t%d\t%d\n",
d->hL->target_name[ap->tid],
start, stop, (double) ap->lcounts / (double) ap->rcounts,
ap->rcounts, ap->lcounts);
ap->prevTid = ap->tid;
ap->prevStart = ap->lstart;
ap->prevStop = ap->lstop;
ap->lcounts = 0;
ap->rcounts = 0;
if (ret == 2)
{ // switching chroms
ap->tid = BAD_TID;
ap->lstart = -1;
ap->lstop = -1;
ap->rstart = -1;
ap->rstop = -1;
}
else
{ // on same chrom
ap->lstart = ap->lstop;
ap->rstart = ap->rstop;
}
}
static inline void reset_inter()
{
interL.tid = -1;
interL.mtid = -1;
interL.qstart = -1;
interL.qstop = -1;
interL.mstart = -1;
interL.mstop = -1;
interL.supp = 0;
interL.qual = 0;
interL.qstrand = -1;
interL.mstrand = -1;
interL.mdir = 0;
interL.pmstart = -1;
interR.tid = -1;
interR.mtid = -1;
interR.qstart = -1;
interR.qstop = -1;
interR.mstart = -1;
interR.mstop = -1;
interR.supp = 0;
interR.qual = 0;
interR.qstrand = -1;
interR.mstrand = -1;
interR.mdir = 0;
interR.pmstart = -1;
}
static inline void reset_intra()
{
intraL.tid = -1;
intraL.qstart = -1;
intraL.qstop = -1;
intraL.mstart = -1;
intraL.mstop = -1;
intraL.supp = 0;
intraL.qual = 0;
intraL.qstrand = -1;
intraL.mstrand = -1;
intraL.mdir = 0;
intraL.pmstart = -1;
intraR.tid = -1;
intraR.qstart = -1;
intraR.qstop = -1;
intraR.mstart = -1;
intraR.mstop = -1;
intraR.supp = 0;
intraR.qual = 0;
intraR.qstrand = -1;
intraR.mstrand = -1;
intraR.mdir = 0;
intraR.pmstart = -1;
}
static inline void update_sv(const bam_pileup1_t *p, sv_t *sv)
{
if (!p)
return;
bam1_core_t *c = &p->b->core;
-int qstrand = bam1_strand(p->b);
-int mstrand = bam1_mstrand(p->b);
int32_t qstart = (c)->pos;
int32_t qstop = qstart + (c)->l_qseq;
int32_t mstart = (c)->mpos;
int32_t mstop = mstart + (c)->l_qseq;
+int qstrand = bam1_strand(p->b);
+int mstrand = bam1_mstrand(p->b);
if (sv->tid == -1) // not set yet
{
sv->tid = (c)->tid;
sv->mtid = (c)->mtid;
sv->qstart = qstart;
sv->qstop = qstop;
sv->mstart = mstart;
sv->mstop = mstop;
sv->mstrand = mstrand;
sv->qstrand = qstrand;
sv->mdir = 0;
sv->pmstart = mstart;
sv->qual = (c)->qual;
sv->supp = 1;
}
else if ((c)->tid == sv->tid && (c)->mtid == sv->mtid &&
qstrand == sv->qstrand && mstrand == sv->mstrand &&
qstart < (sv->qstop + BUF) && qstop > (sv->qstart - BUF) &&
mstart < (sv->mstop + BUF) && mstop > (sv->mstart - BUF))
{ // overlapping on both sides within +/- BUF
if (sv->qstart > qstart)
sv->qstart = qstart;
if (sv->qstop < qstop)
sv->qstop = qstop;
if (sv->mstart > mstart)
sv->mstart = mstart;
if (sv->mstop < mstop)
sv->mstop = mstop;
sv->mdir += (mstart - sv->pmstart);
sv->pmstart = mstart;
sv->qual += (c)->qual;
sv->supp += 1;
}
}
static inline void update_hq_discordant(const bam_pileup1_t *p, const bam_pileup1_t **inter, const bam_pileup1_t **intra)
{
if (!p)
return;
bam1_core_t *c = &p->b->core;
if ((c)->tid != (c)->mtid)
{
if (!(*inter)) // best inter-chrom not set yet
*inter = p;
else if ((*inter)->b->core.qual > (c)->qual)
*inter = p;
}
else
{
int qstrand = bam1_strand(p->b);
int mstrand = bam1_mstrand(p->b);
if ((qstrand != mstrand) && abs((c)->isize) <= maxISize)
return;
if (!(*intra))
*intra = p;
else if ((*intra)->b->core.qual > (c)->qual)
*intra = p;
}
}
static inline void struct_var_scan(dual_pileup_t *d, analysis_params_t *ap,
int *minStartL, int *minStartR)
{
int i;
const bam_pileup1_t *p, *pInterL = NULL, *pIntraL = NULL;
int prevStart = ap->svStartL;
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
if (p->b->core.qual < ap->minMapQ)
continue;
bam1_core_t *c = &p->b->core;
if (!((c)->flag & BAM_FPAIRED))
continue; // read is not paired
if ((c)->flag & BAM_FUNMAP || ((c)->flag & BAM_FMUNMAP))
continue; // read or its mate is unmapped.
int32_t qstart = (c)->pos;
if (*minStartL == -1 || qstart < *minStartL)
*minStartL = qstart;
int qstrand = bam1_strand(p->b);
int mstrand = bam1_mstrand(p->b);
if ((c)->mtid == (c)->tid && (qstrand != mstrand) && abs((c)->isize) <= maxISize)
continue; // mate is properly paired
if (ap->svStartL == -1)
{
ap->svStartL = qstart;
prevStart = ap->svStartL;
}
if (qstart < ap->svStartL)
continue;
else if (qstart == ap->svStartL)
update_hq_discordant(p, &pInterL, &pIntraL);
else
{
ap->svStartL = qstart;
break;
}
}
if (ap->svStartL == prevStart)
ap->svStartL += 1;
prevStart = ap->svStartR;
const bam_pileup1_t *pInterR = NULL, *pIntraR = NULL;
for (i = 0; i < d->nR; ++i)
{
p = d->puR + i;
if (p->b->core.qual < ap->minMapQ)
continue;
bam1_core_t *c = &p->b->core;
if (!((c)->flag & BAM_FPAIRED))
continue; // read is not paired
if ((c)->flag & BAM_FUNMAP || ((c)->flag & BAM_FMUNMAP))
continue; // read or its mate is unmapped.
int32_t qstart = (c)->pos;
if (*minStartR == -1 || qstart < *minStartR)
*minStartR = qstart;
int qstrand = bam1_strand(p->b);
int mstrand = bam1_mstrand(p->b);
if ((c)->mtid == (c)->tid && (qstrand != mstrand) && abs((c)->isize) <= maxISize)
continue; // mate is properly paired
if (ap->svStartR == -1)
{
ap->svStartR = qstart;
prevStart = ap->svStartR;
}
if (qstart < ap->svStartR)
continue;
else if (qstart == ap->svStartR)
update_hq_discordant(p, &pInterR, &pIntraR);
else
{
ap->svStartR = qstart;
break;
}
}
if (ap->svStartR == prevStart)
ap->svStartR += 1;
update_sv(pIntraL, &intraL);
update_sv(pInterL, &interL);
update_sv(pIntraR, &intraR);
update_sv(pInterR, &interR);
}
static inline int is_intra_sv(analysis_params_t *ap, int minStart)
{
if (intraL.supp > 0)
{
if (minStart <= intraL.qstop)
return 0; // there still might be reads to overlap
/* check average mapping quality of discordants */
int qual = ceil((double) intraL.qual / (double) intraL.supp);
if (intraL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
{ // check mate pair direction trend... should be positive for properly paired (opposite strand)
-
- //return 1;
-
- if ((intraL.qstrand == intraL.mstrand && intraL.mdir < 0) ||
- (intraL.qstrand != intraL.mstrand && intraL.mdir > 0))
+ if ((intraL.qstrand && intraL.mstrand && intraL.mdir < 0) || // + +
+ (!intraL.qstrand && !intraL.mstrand && intraL.mdir < 0) || // - -
+ (intraL.qstrand != intraL.mstrand && intraL.mdir > 0)) // + - or - +
return 1;
}
reset_intra();
}
return 0;
}
static inline int is_inter_sv(analysis_params_t *ap, int minStart)
{
if (interL.supp > 0)
{
if (minStart <= interL.qstop)
return 0; // there still might be reads to overlap
/* check average mapping quality of discordants */
int qual = ceil((double) interL.qual / (double) interL.supp);
if (interL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
{ // check mate pair direction trend... should be positive for properly paired (opposite strand)
-
- //return 1;
-
- if ((interL.qstrand == interL.mstrand && interL.mdir < 0) ||
- (interL.qstrand != interL.mstrand && interL.mdir > 0))
+ if ((interL.qstrand && interL.mstrand && interL.mdir < 0) || // + +
+ (!interL.qstrand && !interL.mstrand && interL.mdir < 0) || // - -
+ (interL.qstrand != interL.mstrand && interL.mdir > 0)) // + - or - +
return 1;
}
reset_inter();
}
return 0;
}
static int indel_variant(dual_pileup_t *d, analysis_params_t *ap,
int *isIns, int *isDel, int *insGerm, int *delGerm)
{
if (insL >= ap->minSuppSNP)
{
*isIns = 1;
if (insR > 0) // if you see even once, believe it.
*insGerm = 1;
else
*insGerm = 0;
return 1;
}
if (delL >= ap->minSuppSNP)
{
*isDel = 1;
if (delR > 0) // if you see even once, believe it.
*delGerm = 1;
else
*delGerm = 0;
return 1;
}
return 0;
}
static void do_struct_var(dual_pileup_t *d, analysis_params_t *ap)
{
int minStartL = -1, minStartR = -1;
struct_var_scan(d, ap, &minStartL, &minStartR);
if (is_inter_sv(ap, minStartL))
{
printf("SVS\t%s\t%d\t%d\t",
d->hL->target_name[d->tidL], d->posL, d->posL + 1);
char qsL = '+';
if (interL.qstrand)
qsL = '-';
char msL = '+';
if (interL.mstrand)
msL = '-';
char qsR = '+';
if (interR.qstrand)
qsR = '-';
char msR = '+';
if (interR.mstrand)
msR = '-';
if ( interR.supp > 0 && interL.mtid == interR.mtid &&
- (interR.qstart < (interL.qstop + BUF) && interR.qstop > (interL.qstart - BUF)) )
+ (interR.qstart < (interL.qstop + BUF) && interR.qstop > (interL.qstart - BUF)) &&
+ (interR.mstart < (interL.mstop + BUF) && interR.mstop > (interL.mstart - BUF)) )
printf("GIR\t%d\t%3.1f\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)\t%c(%s:%d-%d)>%c(%s:%d-%d)",
interR.supp, (double) interR.qual / (double) interR.supp,
interL.supp, (double) interL.qual / (double) interL.supp,
qsR, d->hL->target_name[interR.tid], interR.qstart, interR.qstop,
msR, d->hL->target_name[interR.mtid], interR.mstart, interR.mstop,
qsL, d->hL->target_name[interL.tid], interL.qstart, interL.qstop,
msL, d->hL->target_name[interL.mtid], interL.mstart, interL.mstop);
else
printf("SIR\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)",
interL.supp, (double) interL.qual / (double) interL.supp,
qsL, d->hL->target_name[interL.tid], interL.qstart, interL.qstop,
msL, d->hL->target_name[interL.mtid], interL.mstart, interL.mstop);
putchar('\n');
reset_inter();
}
if (is_intra_sv(ap, minStartL))
{
printf("SVS\t%s\t%d\t%d\t",
d->hL->target_name[d->tidL], d->posL, d->posL + 1);
char qsL = '+';
if (intraL.qstrand)
qsL = '-';
char msL = '+';
if (intraL.mstrand)
msL = '-';
char qsR = '+';
if (intraR.qstrand)
qsR = '-';
char msR = '+';
if (intraR.mstrand)
msR = '-';
if ( intraR.supp > 0 &&
- (intraR.qstart < (intraL.qstop + BUF) && intraR.qstop > (intraL.qstart - BUF)) )
+ (intraR.qstart < (intraL.qstop + BUF) && intraR.qstop > (intraL.qstart - BUF)) &&
+ (intraR.mstart < (intraL.mstop + BUF) && intraR.mstop > (intraL.mstart - BUF)) )
printf("GIA\t%d\t%3.1f\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)\t%c(%s:%d-%d)>%c(%s:%d-%d)",
intraR.supp, (double) intraR.qual / (double) intraR.supp,
intraL.supp, (double) intraL.qual / (double) intraL.supp,
qsR, d->hL->target_name[intraR.tid], intraR.qstart, intraR.qstop,
msR, d->hL->target_name[intraR.tid], intraR.mstart, intraR.mstop,
qsL, d->hL->target_name[intraL.tid], intraL.qstart, intraL.qstop,
msL, d->hL->target_name[intraL.tid], intraL.mstart, intraL.mstop);
else
printf("SIA\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)",
intraL.supp, (double) intraL.qual / (double) intraL.supp,
qsL, d->hL->target_name[intraL.tid], intraL.qstart, intraL.qstop,
msL, d->hL->target_name[intraL.tid], intraL.mstart, intraL.mstop);
putchar('\n');
reset_intra();
}
int isIns = 0, isDel = 0, insGerm = 0, delGerm = 0;
if (indel_variant(d, ap, &isIns, &isDel, &insGerm, &delGerm))
{
printf("IDL\t%s\t%d\t%d\t",
d->hL->target_name[d->tidL], d->posL, d->posL + 1);
if (isIns && insGerm)
printf("GIns\t(%d,%d),", insR, insL);
else if (isIns && !insGerm)
printf("SIns\t(%d,%d),", insR, insL);
if (isDel && delGerm)
printf("GDel\t(%d,%d)", delR, delL);
else if (isDel && !delGerm)
printf("SDel\t(%d,%d)", delR, delL);
putchar('\n');
}
}
static void do_het_analysis(dual_pileup_t *d, analysis_params_t *ap)
{
if (!is_het(d, ap))
return;
// do ascn & phasing
char minGa = 'N', maxGa = 'N', minSa = 'N', maxSa = 'N';
int i, minGc = 0, maxGc = 0, minSc = 0, maxSc = 0;
for (i = 0; i < 4; i++)
{
int cr = countsR[i];
int cl = countsL[i];
if (cr > maxGc)
{
minGc = maxGc;
minGa = maxGa;
maxGc = cr;
maxGa = NUCS[i];
}
else if (cr > minGc)
{
minGc = cr;
minGa = NUCS[i];
}
if (cl > maxSc)
{
minSc = maxSc;
minSa = maxSa;
maxSc = cl;
maxSa = NUCS[i];
}
else if (cl > minSc)
{
minSc = cl;
minSa = NUCS[i];
}
}
if (minSc < ap->minSuppSNP)
{
minSc = 0;
minSa = 'N';
}
char g1 = 'N', g2 = 'N';
if (maxSa == maxGa)
{
g1 = maxGa;
g2 = minGa;
}
else if (maxSa == minGa)
{
g1 = minGa;
g2 = maxGa;
}
// else, weird case!
if (g1 == 'N' || g2 == 'N')
return;
printf("HAP\t%s\t%d\t%d\t%c %c\t%c %c\t",
d->hL->target_name[d->tidL], d->posL, d->posL + 1,
g1, g2, maxSa, minSa);
print_nuc_info();
int ret;
if ((ret = calc_ascn(d, ap, maxSc, minSc, maxGc, minGc)) == 0)
return;
double minCN = (double) (ap->lcountsMin) / (double) (ap->rcountsMin + ap->rcountsMax);
double maxCN = (double) (ap->lcountsMax) / (double) (ap->rcountsMin + ap->rcountsMax);
printf("ASC\t%s\t%d\t%d\t%f\t%f\t%d\t%d\t%d\t%d\n",
d->hL->target_name[ap->tidAS],
ap->startAS, ap->stopAS,
maxCN, minCN,
ap->rcountsMax, ap->rcountsMin,
ap->lcountsMax, ap->lcountsMin);
ap->startAS = -1;
ap->stopAS = -1;
ap->lcountsMax = 0;
ap->lcountsMin = 0;
ap->rcountsMax = 0;
ap->rcountsMin = 0;
ap->pStartPrev = ap->pStart;
ap->pStopPrev = ap->pStop;
if (ret == 2)
{
ap->tidAS = BAD_TID;
ap->pStartPrev = -1;
ap->pStopPrev = -1;
}
}
static void do_mut(dual_pileup_t *d, analysis_params_t *ap)
{
char ref, callL1, callL2, callR1, callR2;
if (!is_mut(d, ap, &ref, &callL1, &callL2, &callR1, &callR2))
return;
-printf("MUT\t%s\t%d\t%d\t",
+printf("MUT1\t%s\t%d\t%d\t",
d->hL->target_name[d->tidL], d->posL, d->posL + 1);
printf("%c\t", ref);
if (callR2 == 'N')
printf("%c>", callR1);
else
printf("%c/%c>", callR1, callR2);
if (callL2 == 'N')
printf("%c\t", callL1);
else
printf("%c/%c\t", callL1, callL2);
double chi_sq = sig_diff(d, ap);
printf("%f\t", chi_sq);
print_nuc_info();
}
+#define A 0
+#define G 1
+#define T 2
+#define C 3
+
+#define NUM_NUC 4
+
+#define AA 0
+#define AG 1
+#define AT 2
+#define AC 3
+#define GA 1
+#define GG 4
+#define GT 5
+#define GC 6
+#define TA 2
+#define TG 5
+#define TT 7
+#define TC 8
+#define CA 3
+#define CG 6
+#define CT 8
+#define CC 9
+
+#define NUM_GENO 10
+
+#define MAX_COUNT 1000
+
+static long double *logvals;
+static long double **genoPrior;
+static long double **mutProb;
+static long double **mutProbGeno;
+static long double **baseProb;
+
+#define GENO_1 "AAAAGGGTTC"
+#define GENO_2 "AGTCGTCTCC"
+
+
+void initProbTables()
+{
+int i, j;
+
+logvals = (long double *)(malloc(MAX_COUNT * sizeof(long double)));
+for (i = 0; i < MAX_COUNT; i++)
+ logvals[i] = logl((long double) i + 1.0);
+
+genoPrior = (long double **)(malloc(NUM_GENO * sizeof(long double*)));
+for (i = 0; i < NUM_GENO; i++)
+ genoPrior[i] = (long double *)(malloc(NUM_NUC * sizeof(long double)));
+
+for (i = 0; i < NUM_GENO; i++)
+ {
+ int i1 = nuc_index(GENO_1[i]);
+ int i2 = nuc_index(GENO_2[i]);
+ for (j = 0; j < NUM_NUC; j++)
+ {
+ long double val;
+ if (i1 == j && i2 == j)
+ val = 0.80;
+ else if (i1 == j || i2 == j)
+ val = 0.10;
+ else
+ val = 0.05;
+
+ genoPrior[i][j] = logl(val);
+ }
+ }
+mutProb = (long double **)(malloc(NUM_NUC * sizeof(long double*)));
+for (i = 0; i < NUM_NUC; i++)
+ mutProb[i] = (long double *)(malloc(NUM_NUC * sizeof(long double)));
+
+long double val, tot;
+for (i = 0; i < NUM_NUC; i++)
+ {
+ tot = 0.0;
+ for (j = 0; j < NUM_NUC; j++)
+ {
+ /* else, transitions 4x more likely than transitions */
+ if (i == j)
+ val = 1.0;
+ else if ((NUCS[i] == 'A' && NUCS[j] == 'G') || (NUCS[i] == 'G' && NUCS[j] == 'A'))
+ val = 0.1;
+ else if ((NUCS[i] == 'C' && NUCS[j] == 'T') || (NUCS[i] == 'T' && NUCS[j] == 'C'))
+ val = 0.1;
+ else
+ val = 0.025;
+
+ mutProb[i][j] = val;
+ tot += val;
+ }
+
+ /* normalize probs, conditioned on germline call */
+ for (j = 0; j < NUM_NUC; j++)
+ {
+ mutProb[i][j] = logl(mutProb[i][j]/tot);
+ }
+ }
+
+mutProbGeno = (long double **)(malloc(NUM_GENO * sizeof(long double*)));
+for (i = 0; i < NUM_GENO; i++)
+ mutProbGeno[i] = (long double *)(malloc(NUM_GENO * sizeof(long double)));
+
+for (i = 0; i < NUM_GENO; i++)
+ {
+ int i1 = nuc_index(GENO_1[i]);
+ int i2 = nuc_index(GENO_2[i]);
+ for (j = 0; j < NUM_GENO; j++)
+ {
+ int j1 = nuc_index(GENO_1[j]);
+ int j2 = nuc_index(GENO_2[j]);
+
+ long double val1 = mutProb[i1][j1] + mutProb[i2][j2];
+ long double val2 = mutProb[i1][j2] + mutProb[i2][j1];
+
+ if (val1 > val2)
+ mutProbGeno[i][j] = val1;
+ else
+ mutProbGeno[i][j] = val2;
+ }
+ }
+baseProb = (long double **)(malloc(NUM_GENO * sizeof(long double*)));
+for (i = 0; i < NUM_GENO; i++)
+ baseProb[i] = (long double *)(malloc(NUM_NUC * sizeof(long double)));
+
+long double inprob = IN_PROB;
+long double outprob = (1.0 - inprob) / 3.0;
+
+for (i = 0; i < NUM_GENO; i++)
+ {
+ int i1 = nuc_index(GENO_1[i]);
+ int i2 = nuc_index(GENO_2[i]);
+ for (j = 0; j < NUM_NUC; j++)
+ {
+ if (i1 == j && i2 == j)
+ baseProb[i][j] = inprob;
+ else if (i1 == j || i2 == j)
+ baseProb[i][j] = (inprob + outprob) / 2.0;
+ else
+ baseProb[i][j] = outprob;
+
+ baseProb[i][j] = logl(baseProb[i][j]);
+ fprintf(stderr, "%c%c-%c %Lf\t", GENO_1[i], GENO_2[i], NUCS[j], baseProb[i][j]);
+ }
+ fprintf(stderr, "\n");
+ }
+}
+
+static long double dataProbGerm(int geno)
+{
+int i, j, tot = 0;
+long double p = 0.0, scale = 0.0;
+
+for (i = 0; i < NUM_NUC; i++)
+ {
+ p += (long double) countsR[i] * baseProb[geno][i];
+
+ tot += countsR[i];
+ for (j = 0; j < countsR[i]; j++)
+ scale -= logvals[j];
+ }
+
+for (i = 0; i < tot; i++)
+ scale += logvals[i];
+
+return p + scale;
+}
+
+static long double dataProbTum(int genoG, int genoT, long double alpha)
+{
+int i, j, tot = 0;
+long double p = 0.0, scale = 0.0;
+
+for (i = 0; i < NUM_NUC; i++)
+ {
+ p += (long double) countsL[i] * (alpha * baseProb[genoG][i] + (1.0 - alpha) * baseProb[genoT][i]);
+
+ tot += countsL[i];
+ for (j = 0; j < countsL[i]; j++)
+ scale -= logvals[j];
+ }
+
+for (i = 0; i < tot; i++)
+ scale += logvals[i];
+
+return p + scale;
+}
+
+
+static int is_mut_prob(dual_pileup_t *d, analysis_params_t *ap, char *ref,
+ char *callL1, char *callL2, char *callR1, char *callR2,
+ long double *conf)
+{
+if (majorR.vote == 0 || majorL.vote == 0)
+ return 0; // most likely a bad quality region.
+
+int i, j, imax = -1, jmax = -1;
+int totalL = 0, totalR = 0;
+for (i = 0; i < NUM_NUC; i++)
+ {
+ totalL += countsL[i];
+ totalR += countsR[i];
+ }
+
+if (totalL < ap->minSuppSNP || totalR < ap->minSuppSNP)
+ return 0;
+
+/* rescale counts to total to less than MAX_COUNT, if exceeded */
+if (totalL > MAX_COUNT)
+ for (i = 0; i < NUM_NUC; i++)
+ countsL[i] = (int) floor((float) countsL[i] * (float) MAX_COUNT / (float) totalL);
+if (totalR > MAX_COUNT)
+ for (i = 0; i < NUM_NUC; i++)
+ countsR[i] = (int) floor((float) countsR[i] * (float) MAX_COUNT / (float) totalR);
+
+long double alpha = (ap->fracGerm * (long double) totalR) / (long double) totalL;
+if (alpha > ap->fracGerm)
+ alpha = ap->fracGerm;
+
+char rb = (d->ref && (int)d->posL < d->len) ? toupper(d->ref[d->posL]) : 'N';
+int ri = nuc_index(rb);
+int i1, i2, j1, j2;
+
+long double p, ptot = 0.0, pmax = -999999999.99;
+for (i = 0; i < NUM_GENO; i++)
+ { // loop germline genotypes
+ i1 = nuc_index(GENO_1[i]);
+ i2 = nuc_index(GENO_2[i]);
+ if (!countsL[i1] && !countsL[i2] && !countsR[i1] && !countsR[i2])
+ continue; // skip genotypes
+
+ long double germP = 0.0;
+ if (ri >= 0)
+ germP = genoPrior[i][ri];
+ germP += dataProbGerm(i);
+
+ for (j = 0; j < NUM_GENO; j++)
+ { // loop tumor genotypes
+ j1 = nuc_index(GENO_1[j]);
+ j2 = nuc_index(GENO_2[j]);
+ if (!countsL[j1] && !countsL[j2] && !countsR[j1] && !countsR[j2])
+ continue;
+
+ p = germP;
+ p += mutProbGeno[i][j];
+ p += dataProbTum(i, j, alpha);
+
+ ptot += expl(p);
+
+ if (p > pmax)
+ {
+ pmax = p;
+ imax = i;
+ jmax = j;
+ }
+ }
+ }
+
+if (imax < 0 || jmax < 0)
+ return 0;
+
+*ref = rb;
+*callL1 = GENO_1[jmax];
+*callL2 = GENO_2[jmax];
+*callR1 = GENO_1[imax];
+*callR2 = GENO_2[imax];
+*conf = expl(pmax) / ptot;
+
+if (imax != jmax)
+ return 1;
+if (GENO_1[imax] != GENO_2[imax] || (rb != GENO_1[imax] && rb != GENO_2[imax]))
+ return 2;
+
+return 0;
+}
+
+
+static void do_mut_prob(dual_pileup_t *d, analysis_params_t *ap)
+{
+int ret;
+char ref, callL1, callL2, callR1, callR2;
+long double conf;
+if ((ret = is_mut_prob(d, ap, &ref, &callL1, &callL2, &callR1, &callR2, &conf)) == 0)
+ return;
+
+printf("MUT2\t%s\t%d\t%d\t",
+ d->hL->target_name[d->tidL], d->posL, d->posL + 1);
+if (ret == 1)
+ printf("SOM\t");
+else
+ printf("GERM\t");
+
+printf("%c\t", ref);
+printf("%c%c>", callR1, callR2);
+printf("%c%c\t", callL1, callL2);
+
+printf("%Lf\t", conf);
+print_nuc_info();
+}
static void perform_analysis(dual_pileup_t *d, void *func_data)
{
analysis_params_t *ap = (analysis_params_t *)func_data;
/* processes pileup for het and mut analysis */
nuc_counts(d, ap);
/* overall cnv */
do_cnv(d, ap);
/* structural variation */
do_struct_var(d, ap);
/* het analysis: ASCN & phasing */
do_het_analysis(d, ap);
/* mutation discovery */
-do_mut(d, ap);
+do_mut(d, ap); // old way
+do_mut_prob(d, ap); // new way
fflush(stdout);
}
void bamBam(char *inbamL, char *inbamR, char *faidx, int mask)
{
analysis_params_t ap;
ap.minQ = minQ;
ap.minMapQ = minMapQ;
ap.avgMapQ = avgMapQ;
ap.maxISize = maxISize;
ap.minChiSq = minChiSq;
ap.minSuppSNP = minSuppSNP;
ap.minSuppSV = minSuppSV;
ap.sites = 0;
ap.readsPerBin = readsPerBin;
ap.pStart = -1;
ap.pStop = -1;
ap.pStartPrev = -1;
ap.pStopPrev = -1;
/* Overall Copy Number */
ap.tid = BAD_TID;
ap.lstart = -1;
ap.lstop = -1;
ap.lcounts = 0;
ap.rstart = -1;
ap.rstop = -1;
ap.rcounts = 0;
ap.prevTid = BAD_TID;
ap.prevStart = -1;
ap.prevStop = -1;
/* ASCN */
ap.tidAS = BAD_TID;
ap.startAS = -1;
ap.stopAS = -1;
ap.lcountsMin = 0;
ap.lcountsMax = 0;
ap.rcountsMin = 0;
ap.rcountsMax = 0;
/* SV */
ap.svStartL = -1;
ap.svStopL = -1;
ap.svStartR = -1;
ap.svStopR = -1;
reset_intra();
reset_inter();
bam_bam_file(inbamL, inbamR, faidx, position, mask, perform_analysis, &ap);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc < 3)
usage();
if (optionExists("fa"))
faidx = optionVal("fa", NULL);
if (optionExists("position"))
position = optionVal("position", NULL);
if (optionExists("minSuppSNP"))
minSuppSNP = optionInt("minSuppSNP", 4);
if (optionExists("minSuppSV"))
minSuppSV = optionInt("minSuppSV", 2);
if (optionExists("minQ"))
minQ = optionInt("minQ", 10);
if (optionExists("minMapQ"))
minMapQ = optionInt("minMapQ", 20);
if (optionExists("avgMapQ"))
avgMapQ = optionInt("avgMapQ", 30);
if (optionExists("maxISize"))
maxISize = optionInt("maxISize", 2000);
if (optionExists("minChiSq"))
minChiSq = optionInt("minChiSq", 250);
if (optionExists("readsPerBin"))
readsPerBin = optionInt("readsPerBin", 2500);
char *inbamL = argv[1];
char *inbamR = argv[2];
bamBam(inbamL, inbamR, faidx, BAM_DEF_MASK);
return 0;
}
#else // USE_BAM not defined
int main(int argc, char *argv[])
/* Process command line. */
{
printf("BAM not installed");
return 0;
}
#endif