src/hg/tcga/pBamBam/asBamBam.c 1.1
1.1 2010/03/05 19:53:24 jsanborn
initial commit
Index: src/hg/tcga/pBamBam/asBamBam.c
===================================================================
RCS file: src/hg/tcga/pBamBam/asBamBam.c
diff -N src/hg/tcga/pBamBam/asBamBam.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/tcga/pBamBam/asBamBam.c 5 Mar 2010 19:53:24 -0000 1.1
@@ -0,0 +1,944 @@
+/* asBamBam -- discovering variants from BAM file
+ * code adapted from Heng Li's Samtools library functions
+ */
+
+#ifdef USE_BAM
+
+#include "common.h"
+#include "hCommon.h"
+#include "linefile.h"
+#include "hash.h"
+#include "hdb.h"
+#include "options.h"
+#include "jksql.h"
+
+#define _IOLIB 2
+#include "bam.h"
+#include "sam.h"
+#include "bam_helper.h"
+
+static char const rcsid[] = "$Id";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+ "asBamBam - Allele-specific BamBam.\n"
+ "usage:\n"
+ " asBamBam [options] <left.bam> <right.bam>\n"
+ "options:\n"
+ " -position=chrX:1-100 - Position to do analysis. [None]\n"
+ " -minSupport=INT - Minimum Support in Germline. [10]\n"
+ " -minQ=INT - Minimum acceptable base quality. [0]\n"
+ " -minMapQ=INT - Minimum acceptable mapping quality. [0]\n"
+ " -minChiSq=INT - Minimum acceptable chi square of nucleotide freqs. [0]\n"
+ " -readsPerBin=INT - Reads per CNV bin [500]\n"
+ " -readLength=INT - Length of read [100]\n"
+ "\n"
+ );
+}
+
+static struct optionSpec options[] = {
+ {"position", OPTION_STRING},
+ {"minSupport", OPTION_INT},
+ {"minQ", OPTION_INT},
+ {"minMapQ", OPTION_INT},
+ {"minChiSq", OPTION_INT},
+ {"readsPerBin", OPTION_INT},
+ {"readLength", OPTION_INT},
+ {NULL, 0}
+};
+
+#define BAD_TID -999
+
+/* Option variables */
+char *position = NULL;
+int minSupport = 2;
+int minQ = 0;
+int minMapQ = 0;
+int minChiSq = 0;
+int readsPerBin = 500;
+int readLength = 100;
+
+
+typedef struct {
+ int minQ;
+ int minMapQ;
+ int minChiSq;
+ int minSupp;
+ int sites;
+
+ int readLength;
+ int readsPerBin;
+
+ int pStartPrev;
+ int pStopPrev;
+ int pStart;
+ int pStop;
+
+ /* Overall Copy Number */
+ int32_t tid;
+ int32_t lstart;
+ int32_t lstop;
+ int lcounts;
+ int32_t rstart;
+ int32_t rstop;
+ int rcounts;
+
+ int32_t prevTid;
+ int32_t prevStart;
+ int32_t prevStop;
+
+ /* ASCN */
+ int32_t tidAS;
+ int32_t startAS;
+ int32_t stopAS;
+ int lcountsMin;
+ int lcountsMax;
+ int rcountsMin;
+ int rcountsMax;
+
+} analysis_params_t;
+
+static int32_t countsL1[4];
+static int32_t countsL2[4];
+static int32_t countsL3[4];
+
+static int32_t countsR1[4];
+static int32_t countsR2[4];
+static int32_t countsR3[4];
+
+static int32_t votesL[4];
+static int32_t countsL[4];
+static int32_t votesR[4];
+static int32_t countsR[4];
+
+typedef struct {
+ char nuc;
+ int count;
+ int vote;
+} variant_t;
+
+variant_t minorL, majorL;
+variant_t minorR, majorR;
+
+static int nuc_index(char c)
+{
+switch (c)
+ {
+ case 'A':
+ return 0;
+ case 'G':
+ return 1;
+ case 'C':
+ return 2;
+ case 'T':
+ return 3;
+ default:
+ return -1;
+ }
+return -1;
+}
+#define NUCS "AGCT"
+
+static inline void nuc_counts(dual_pileup_t *d, analysis_params_t *ap)
+{
+int i, index;
+for (i = 0; i < 4; i++)
+ { // sum up the qualities of each base
+ votesL[i] = 0;
+ countsL[i] = 0;
+ votesR[i] = 0;
+ countsR[i] = 0;
+
+ countsL1[i] = 0;
+ countsL2[i] = 0;
+ countsL3[i] = 0;
+ countsR1[i] = 0;
+ countsR2[i] = 0;
+ countsR3[i] = 0;
+ }
+
+/* reset pileup info */
+ap->pStart = -1, ap->pStop = -1;
+
+int32_t start, stop;
+int32_t prevStart = -1, prevStop = -1;
+const bam_pileup1_t *p;
+for (i = 0; i < d->nL; ++i)
+ {
+ p = d->puL + i;
+ bam1_core_t *c = &p->b->core;
+
+ if ((c)->qual < ap->minMapQ)
+ continue;
+ int qual = bam1_qual(p->b)[p->qpos];
+ if (qual < ap->minQ)
+ continue;
+
+ /* Check if unmapped or PCR optical dupe */
+ if (((c)->flag & BAM_FUNMAP) || ((c)->flag & BAM_FDUP) )
+ continue;
+
+ /* Check if start & stop has already been visited, to dupes missed in prev step*/
+ start = (c)->pos;
+ stop = start + (c)->l_qseq;
+ if (start <= prevStart && stop <= prevStop)
+ continue;
+ prevStart = start;
+ prevStop = stop;
+
+ if (ap->pStart == -1)
+ {
+ ap->pStart = start;
+ ap->pStop = stop;
+ }
+ if (start < ap->pStart)
+ ap->pStart = start;
+ if (stop > ap->pStop)
+ ap->pStop = stop;
+
+ if (!p->is_del && p->indel == 0)
+ {
+ double lseq = (double) p->b->core.l_qseq;
+ int b = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
+ if ((index = nuc_index(b)) < 0)
+ continue;
+
+ votesL[index] += qual;
+ countsL[index]++;
+
+ if (p->qpos < round(0.33 * lseq))
+ countsL1[index]++;
+ else if (p->qpos < round(0.67 * lseq))
+ countsL2[index]++;
+ else
+ countsL3[index]++;
+ }
+ }
+
+prevStart = -1;
+prevStop = -1;
+for (i = 0; i < d->nR; ++i)
+ {
+ p = d->puR + i;
+ bam1_core_t *c = &p->b->core;
+
+ if ((c)->qual < ap->minMapQ)
+ continue;
+ int qual = bam1_qual(p->b)[p->qpos];
+ if (qual < ap->minQ)
+ continue;
+
+ /* Check if unmapped or PCR optical dupe */
+ if (((c)->flag & BAM_FUNMAP) || ((c)->flag & BAM_FDUP) )
+ continue;
+
+ /* Check if start & stop has already been visited, to dupes missed in prev step*/
+ start = (c)->pos;
+ stop = start + (c)->l_qseq;
+ if (start <= prevStart && stop <= prevStop)
+ continue;
+ prevStart = start;
+ prevStop = stop;
+
+ if (ap->pStart == -1)
+ {
+ ap->pStart = start;
+ ap->pStop = stop;
+ }
+ if (start < ap->pStart)
+ ap->pStart = start;
+ if (stop > ap->pStop)
+ ap->pStop = stop;
+
+ if (!p->is_del && p->indel == 0)
+ {
+ double lseq = (double) p->b->core.l_qseq;
+ int b = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
+ if ((index = nuc_index(b)) < 0)
+ continue;
+ votesR[index] += qual;
+ countsR[index]++;
+
+ if (p->qpos < round(0.33 * lseq))
+ countsR1[index]++;
+ else if (p->qpos < round(0.67 * lseq))
+ countsR2[index]++;
+ else
+ countsR3[index]++;
+ }
+ }
+
+int iL1 = -1, iL2 = -1, maxL1 = 0, maxL2 = 0;
+int iR1 = -1, iR2 = -1, maxR1 = 0, maxR2 = 0;
+int countL1 = -1, countL2 = -1, countR1 = -1, countR2 = -1;
+for (i = 0; i < 4; i++)
+ {
+ if (countsL[i] > countL1)
+ {
+ iL2 = iL1; // set prev max to max2
+ maxL2 = maxL1;
+ countL2 = countL1;
+
+ iL1 = i;
+ maxL1 = votesL[i];
+ countL1 = countsL[i];
+ }
+ else if (countsL[i] > countL2)
+ {
+ iL2 = i;
+ maxL2 = votesL[i];
+ countL2 = countsL[i];
+ }
+ if (countsR[i] > countR1)
+ {
+ iR2 = iR1;
+ maxR2 = maxR1;
+ countR2 = countR1;
+
+ iR1 = i;
+ maxR1 = votesR[i];
+ countR1 = countsR[i];
+ }
+ else if (countsR[i] > countR2)
+ {
+ iR2 = i;
+ maxR2 = votesR[i];
+ countR2 = countsR[i];
+ }
+ }
+
+majorL.nuc = 'N';
+majorL.vote = 0;
+majorL.count = 0;
+if (iL1 != -1 && countL1 >= ap->minSupp)
+ {
+ majorL.nuc = NUCS[iL1];
+ majorL.vote = maxL1;
+ majorL.count = countL1;
+ }
+
+minorL.nuc = 'N';
+minorL.vote = 0;
+minorL.count = 0;
+if (iL2 != -1 && countL2 >= ap->minSupp)
+ {
+ minorL.nuc = NUCS[iL2];
+ minorL.vote = maxL2;
+ minorL.count = countL2;
+ }
+
+majorR.nuc = 'N';
+majorR.vote = 0;
+majorR.count = 0;
+if (iR1 != -1 && countR1 >= ap->minSupp)
+ {
+ majorR.nuc = NUCS[iR1];
+ majorR.vote = maxR1;
+ majorR.count = countR1;
+ }
+
+minorR.nuc = 'N';
+minorR.vote = 0;
+minorR.count = 0;
+if (iR2 != -1 && countR2 >= ap->minSupp)
+ {
+ minorR.nuc = NUCS[iR2];
+ minorR.vote = maxR2;
+ minorR.count = countR2;
+ }
+}
+
+double chi_square(int obs1, int obs2, double p) // p = binomial prob of getting 1
+{
+double pseudo = 1.0;
+int count = obs1 + obs2;
+
+double exp1 = p * ((double) count) + pseudo;
+double exp2 = (1.0 - p) * ((double) count) + pseudo;
+
+// apply Yates correction... will often have small observed values.
+return pow(abs((double)obs1 - exp1) - 0.5, 2.0)/exp1 + pow(abs((double)obs2 - exp2) - 0.5, 2.0)/exp2;
+}
+
+static int sig_diff(dual_pileup_t *d, analysis_params_t *ap)
+{
+int i;
+double pseudo = 1.0;
+int totalL = 0, totalR = 0;
+
+for (i = 0; i < 4; i++)
+ {
+ totalL += countsL[i];
+ totalR += countsR[i];
+ }
+
+double obs, exp, chi_sq = 0.0;
+for (i = 0; i < 4; i++)
+ {
+ obs = ((double) countsL[i] * 100.0) / (double) totalL + pseudo;
+ exp = ((double) countsR[i] * 100.0) / (double) totalR + pseudo;
+ chi_sq += pow(abs(obs - exp) - 0.5, 2.0) / exp;
+ }
+
+return chi_sq;
+}
+
+static int is_mut(dual_pileup_t *d, analysis_params_t *ap,
+ char *callL1, char *callL2, char *callR1, char *callR2,
+ float *hetL, float *hetR, int *stateL, int *stateR, int *sites)
+{
+*sites += 1;
+
+if (majorR.vote == 0 || majorL.vote == 0)
+ return 0; // most likely a bad quality region.
+
+if (sig_diff(d, ap) < ap->minChiSq)
+ return 0;
+
+int i = 0;
+int rCount = 0;
+for (i = 0; i < 4; i++)
+ rCount += countsR1[i] + countsR2[i] + countsR3[i];
+
+if (rCount < 10)
+ return 0;
+
+
+int retVal1 = 0, retVal2 = 0;
+for (i = 0; i < 4; i++)
+ {
+ if (NUCS[i] != majorL.nuc && NUCS[i] != minorL.nuc)
+ continue;
+
+ if (NUCS[i] == majorL.nuc && majorL.nuc != majorR.nuc && majorL.nuc != minorR.nuc)
+ {
+ // if (countsL1[i] >= 2 && countsL2[i] >= 1)
+ // retVal1 = 1;
+ // else if (countsL1[i] > 0 && countsL2[i] > 0 && countsL3[i] > 0)
+ // retVal1 = 1;
+
+ if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
+ retVal1 = 1;
+ else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
+ retVal1 = 1;
+ else if (countsL2[i] > 0 && countsL3[i] > 0 && countsL2[i] + countsL3[i] >= 3)
+ retVal1 = 1;
+
+ if (retVal1)
+ {
+ if (rCount >= 30 && countsR[i] <= 1)
+ retVal1 = 1;
+ else if (rCount < 30 && countsR[i] == 0)
+ retVal1 = 1;
+ else
+ retVal1 = 0;
+ }
+ }
+
+ if (NUCS[i] == minorL.nuc && (minorL.nuc != majorR.nuc && minorL.nuc != minorR.nuc))
+ {
+ // if (countsL1[i] >= 2 && countsL2[i] >= 1)
+ // retVal2 = 1;
+ // else if (countsL1[i] > 0 && countsL2[i] > 0 && countsL3[i] > 0)
+ // retVal2 = 1;
+
+ if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
+ retVal2 = 1;
+ else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
+ retVal2 = 1;
+ else if (countsL2[i] > 0 && countsL3[i] > 0 && countsL2[i] + countsL3[i] >= 3)
+ retVal2 = 1;
+
+ if (retVal2)
+ {
+ if (rCount >= 30 && countsR[i] <= 1)
+ retVal2 = 1;
+ else if (rCount < 30 && countsR[i] == 0)
+ retVal2 = 1;
+ else
+ retVal2 = 0;
+ }
+ }
+ }
+
+if (!retVal1 && !retVal2)
+ return 0;
+
+
+float freqL = ((float) minorL.count) / ((float) majorL.count + (float) minorL.count);
+float freqR = ((float) minorR.count) / ((float) majorR.count + (float) minorR.count);
+
+*hetL = freqL;
+*hetR = freqR;
+
+*callL1 = majorL.nuc;
+*callL2 = minorL.nuc;
+*callR1 = majorR.nuc;
+*callR2 = minorR.nuc;
+
+if (majorL.nuc != majorR.nuc && majorL.nuc != minorR.nuc)
+ return 1;
+if (minorL.nuc != 'N' && (minorL.nuc != majorR.nuc && minorL.nuc != minorR.nuc))
+ return 1;
+
+return 0;
+}
+
+
+
+
+static int calc_cnv(dual_pileup_t *d, analysis_params_t *ap)
+{
+if ((ap->tid != BAD_TID) && (ap->tid != d->tidL)) // onto next chromosome
+ return 2;
+
+int i;
+const bam_pileup1_t *p;
+if ((ap->prevTid != BAD_TID) && (ap->prevTid == d->tidL))
+ {
+ for (i = 0; i < d->nL; ++i)
+ {
+ p = d->puL + i;
+ bam1_core_t *c = &p->b->core;
+ if ((c)->pos <= ap->prevStop)
+ return 0;
+ }
+
+ for (i = 0; i < d->nR; ++i)
+ {
+ p = d->puR + i;
+ bam1_core_t *c = &p->b->core;
+ if ((c)->pos <= ap->prevStop)
+ return 0;
+ }
+ }
+
+ap->tid = d->tidL;
+
+int32_t start, stop;
+int32_t lstart = ap->lstart, lstop = ap->lstop;
+int32_t prevPos = lstop - 1;
+for (i = 0; i < d->nL; ++i)
+ {
+ p = d->puL + i;
+ bam1_core_t *c = &p->b->core;
+ if ((c)->qual < ap->minMapQ)
+ continue;
+
+ if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
+ { // read is mapped and not an optical/PCR dupe
+ start = (c)->pos;
+ stop = start + (c)->l_qseq;
+
+ if (lstart == -1)
+ {
+ lstart = start;
+ lstop = start + 1;
+ ap->lcounts += 1;
+ }
+ else if ((start > prevPos) && (start > lstart)) // && stop > lstop))
+ { // make sure there are no possible dupes by removing all by
+ // comparing left-most position of each read, only allowing one per pos.
+ prevPos = start;
+ lstop = start + 1;
+ ap->lcounts += 1;
+ }
+ }
+ }
+
+int32_t rstart = ap->rstart, rstop = ap->rstop;
+prevPos = rstop - 1;
+for (i = 0; i < d->nR; ++i)
+ {
+ p = d->puR + i;
+ bam1_core_t *c = &p->b->core;
+ if ((c)->qual < ap->minMapQ)
+ continue;
+
+ if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
+ { // read is mapped and not an optical/PCR dupe
+ start = (c)->pos;
+ stop = start + (c)->l_qseq;
+
+ if (rstart == -1)
+ {
+ rstart = start;
+ rstop = start + 1;
+ ap->rcounts += 1;
+ }
+ else if ((start > prevPos) && (start > rstart)) // && (stop > rstop))
+ { // make sure there are no possible dupes by removing all by
+ // comparing left-most position of each read, only allowing one per pos.
+ prevPos = start;
+ rstop = start + 1;
+ ap->rcounts += 1;
+ }
+ }
+ }
+
+ap->lstart = lstart;
+ap->lstop = lstop;
+
+ap->rstart = rstart;
+ap->rstop = rstop;
+
+if ((ap->lcounts >= ap->readsPerBin) || (ap->rcounts >= ap->readsPerBin))
+ return 1;
+
+return 0;
+}
+
+static int calc_ascn(dual_pileup_t *d, analysis_params_t *ap,
+ int maxS, int minS, int maxG, int minG)
+{
+if ((ap->tidAS != BAD_TID) && (ap->tidAS != d->tidL)) // onto next chromosome
+ return 2;
+
+if ( ap->pStopPrev != -1 && (d->posL < ap->pStopPrev))
+ return 0; // within previous position's territory, avoid double-counting
+
+ap->tidAS = d->tidL;
+
+if (ap->startAS == -1)
+ {
+ ap->startAS = d->posL;
+ ap->stopAS = d->posL;
+ }
+
+if (d->posL > ap->stopAS)
+ ap->stopAS = d->posL;
+
+ap->lcountsMin += minS;
+ap->lcountsMax += maxS;
+
+ap->rcountsMin += minG;
+ap->rcountsMax += maxG;
+
+int totalL = ap->lcountsMin + ap->lcountsMax;
+int totalR = ap->rcountsMin + ap->rcountsMax;
+
+if (totalL >= ap->readsPerBin || totalR >= ap->readsPerBin)
+ return 1;
+
+return 0;
+}
+
+
+static int is_het(dual_pileup_t *d, analysis_params_t *ap)
+{
+int i = 0;
+int rCount = 0, maxCountR = 0;
+for (i = 0; i < 4; i++)
+ {
+ if (countsR[i] < ap->minSupp)
+ continue;
+ rCount += countsR[i];
+ if (countsR[i] > maxCountR)
+ maxCountR = countsR[i];
+ }
+
+double het = (double) maxCountR / (double) rCount;
+if (het > 0.4 && het < 0.6)
+ return 1; // need a smarter test here.
+
+return 0;
+}
+
+static int is_sv(dual_pileup_t *d, analysis_params_t *ap)
+{
+return 0;
+}
+
+
+static void print_nuc_info()
+{
+int i, first = 1;
+for (i = 0; i < 4; i++)
+ {
+ if (countsR[i] == 0)
+ continue;
+ if (!first)
+ putchar(' ');
+ printf("%c:%d/%2.1f", NUCS[i], countsR[i], (double) votesR[i] / (double) countsR[i]);
+ first = 0;
+ }
+putchar('\t');
+first = 1;
+for (i = 0; i < 4; i++)
+ {
+ if (countsL[i] == 0)
+ continue;
+ if (!first)
+ putchar(' ');
+ printf("%c:%d/%2.1f", NUCS[i], countsL[i], (double) votesL[i] / (double) countsL[i]);
+ first = 0;
+ }
+
+putchar('\n');
+}
+
+
+static void perform_analysis(dual_pileup_t *d, void *func_data)
+{
+analysis_params_t *ap = (analysis_params_t *)func_data;
+
+int i, ret;
+if ((ret = calc_cnv(d, ap)) > 0)
+ { // do overall cnv
+ int start = ap->rstart;
+ int stop = ap->rstop;
+ if (ap->lstart < start)
+ start = ap->lstart;
+ if (ap->lstop > stop)
+ stop = ap->lstop;
+
+ printf("CNV\t%s\t%d\t%d\t%f\t%d\t%d\n",
+ d->hL->target_name[ap->tid],
+ start, stop, (double) ap->lcounts / (double) ap->rcounts,
+ ap->rcounts, ap->lcounts);
+
+ ap->prevTid = ap->tid;
+ ap->prevStart = ap->lstart;
+ ap->prevStop = ap->lstop;
+
+ ap->lcounts = 0;
+ ap->rcounts = 0;
+
+ if (ret == 2)
+ { // switching chroms
+ ap->tid = BAD_TID;
+
+ ap->lstart = -1;
+ ap->lstop = -1;
+ ap->rstart = -1;
+ ap->rstop = -1;
+ }
+ else
+ { // on same chrom
+ ap->lstart = ap->lstop;
+ ap->rstart = ap->rstop;
+ }
+ }
+
+if (is_sv(d, ap))
+ { // do struct vars
+ printf("SVS\t%s\t%d\t%d\t",
+ d->hL->target_name[d->tidL], d->posL, d->posL + 1);
+
+ print_nuc_info();
+ }
+
+nuc_counts(d, ap);
+if (is_het(d, ap))
+ { // do ascn & phasing
+ char minGa = 'N';
+ char maxGa = 'N';
+ char minSa = 'N';
+ char maxSa = 'N';
+ int minGc = 0;
+ int maxGc = 0;
+ int minSc = 0;
+ int maxSc = 0;
+ for (i = 0; i < 4; i++)
+ {
+ int cr = countsR[i];
+ int cl = countsL[i];
+ if (cr > maxGc)
+ {
+ minGc = maxGc;
+ minGa = maxGa;
+
+ maxGc = cr;
+ maxGa = NUCS[i];
+ }
+ else if (cr > minGc)
+ {
+ minGc = cr;
+ minGa = NUCS[i];
+ }
+
+ if (cl > maxSc)
+ {
+ minSc = maxSc;
+ minSa = maxSa;
+
+ maxSc = cl;
+ maxSa = NUCS[i];
+ }
+ else if (cl > minSc)
+ {
+ minSc = cl;
+ minSa = NUCS[i];
+ }
+ }
+
+ if (minSc < ap->minSupp)
+ {
+ minSc = 0;
+ minSa = 'N';
+ }
+
+ char g1 = 'N';
+ char g2 = 'N';
+ if (maxSa == maxGa)
+ {
+ g1 = maxGa;
+ g2 = minGa;
+ }
+ else if (maxSa == minGa)
+ {
+ g1 = minGa;
+ g2 = maxGa;
+ }
+ // else, weird case!
+
+ if (g1 != 'N' && g2 != 'N')
+ {
+ printf("HAP\t%s\t%d\t%d\t%c %c\t%c %c\t",
+ d->hL->target_name[d->tidL], d->posL, d->posL + 1,
+ g1, g2, maxSa, minSa);
+ print_nuc_info();
+
+ if ((ret = calc_ascn(d, ap, maxSc, minSc, maxGc, minGc)) > 0)
+ {
+ double minCN = (double) (ap->lcountsMin) / (double) (ap->rcountsMin + ap->rcountsMax);
+ double maxCN = (double) (ap->lcountsMax) / (double) (ap->rcountsMin + ap->rcountsMax);
+
+ printf("ASC\t%s\t%d\t%d\t%f\t%f\t%d\t%d\t%d\t%d\n",
+ d->hL->target_name[ap->tidAS],
+ ap->startAS, ap->stopAS,
+ maxCN, minCN,
+ ap->rcountsMax, ap->rcountsMin,
+ ap->lcountsMax, ap->lcountsMin);
+
+ ap->startAS = -1;
+ ap->stopAS = -1;
+ ap->lcountsMax = 0;
+ ap->lcountsMin = 0;
+ ap->rcountsMax = 0;
+ ap->rcountsMin = 0;
+
+ if (ret == 2)
+ {
+ ap->tidAS = BAD_TID;
+ ap->pStartPrev = -1;
+ ap->pStopPrev = -1;
+ }
+ }
+ ap->pStartPrev = ap->pStart;
+ ap->pStopPrev = ap->pStop;
+ }
+ }
+
+int stateL, stateR;
+char callL1, callL2, callR1, callR2;
+float hetL, hetR;
+
+if (is_mut(d, ap, &callL1, &callL2, &callR1, &callR2,
+ &hetL, &hetR, &stateL, &stateR, &ap->sites))
+ {
+ printf("MUT\t%s\t%d\t%d\t",
+ d->hL->target_name[d->tidL], d->posL, d->posL + 1);
+
+ if (callR2 == 'N')
+ printf("%c>", callR1);
+ else
+ printf("%c/%c>", callR1, callR2);
+
+ if (callL2 == 'N')
+ printf("%c\t", callL1);
+ else
+ printf("%c/%c\t", callL1, callL2);
+
+ double chi_sq = sig_diff(d, ap);
+ printf("%f\t", chi_sq);
+ print_nuc_info();
+ }
+
+fflush(stdout);
+}
+
+void bamBam(char *inbamL, char *inbamR, int mask)
+{
+analysis_params_t ap;
+ap.minQ = minQ;
+ap.minMapQ = minMapQ;
+ap.minChiSq = minChiSq;
+ap.minSupp = minSupport;
+ap.sites = 0;
+
+ap.readLength = readLength;
+ap.readsPerBin = readsPerBin;
+
+ap.pStart = -1;
+ap.pStop = -1;
+ap.pStartPrev = -1;
+ap.pStopPrev = -1;
+
+/* Overall Copy Number */
+ap.tid = BAD_TID;
+ap.lstart = -1;
+ap.lstop = -1;
+ap.lcounts = 0;
+ap.rstart = -1;
+ap.rstop = -1;
+ap.rcounts = 0;
+ap.prevTid = BAD_TID;
+ap.prevStart = -1;
+ap.prevStop = -1;
+
+/* ASCN */
+ap.tidAS = BAD_TID;
+ap.startAS = -1;
+ap.stopAS = -1;
+ap.lcountsMin = 0;
+ap.lcountsMax = 0;
+ap.rcountsMin = 0;
+ap.rcountsMax = 0;
+
+bam_bam_file(inbamL, inbamR, position, mask, perform_analysis, &ap);
+fprintf(stderr, "# qualified sites = %d\n", ap.sites);
+}
+
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc < 3)
+ usage();
+
+if (optionExists("minSupport"))
+ minSupport = optionInt("minSupport", 2);
+if (optionExists("minQ"))
+ minQ = optionInt("minQ", 0);
+if (optionExists("minMapQ"))
+ minMapQ = optionInt("minMapQ", 0);
+if (optionExists("minChiSq"))
+ minChiSq = optionInt("minChiSq", 0);
+if (optionExists("position"))
+ position = optionVal("position", NULL);
+if (optionExists("readsPerBin"))
+ readsPerBin = optionInt("readsPerBin", 500);
+if (optionExists("readLength"))
+ readLength = optionInt("readLength", 100);
+
+char *inbamL = argv[1];
+char *inbamR = argv[2];
+bamBam(inbamL, inbamR, BAM_DEF_MASK);
+
+return 0;
+}
+
+#else // USE_BAM not defined
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+printf("BAM not installed");
+
+return 0;
+}
+#endif