src/hg/tcga/pBamBam/asBamBam.c 1.11

1.11 2010/06/01 21:46:13 jsanborn
added new mutation caller
Index: src/hg/tcga/pBamBam/asBamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/pBamBam/asBamBam.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -b -B -U 1000000 -r1.10 -r1.11
--- src/hg/tcga/pBamBam/asBamBam.c	22 Apr 2010 18:10:50 -0000	1.10
+++ src/hg/tcga/pBamBam/asBamBam.c	1 Jun 2010 21:46:13 -0000	1.11
@@ -1,1509 +1,1811 @@
 /* asBamBam -- discovering variants from BAM file
  *   code adapted from Heng Li's Samtools library functions
  */
 
 #ifdef USE_BAM
 
 #include "common.h"
 #include "hCommon.h"
 #include "linefile.h"
 #include "hash.h"
 #include "hdb.h"
 #include "options.h"
 #include "jksql.h"
 
 #define _IOLIB 2
 #include "bam.h"
 #include "sam.h"
 #include "bam_helper.h"
 
 static char const rcsid[] = "$Id";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "asBamBam - Allele-Specific BamBam.\n"
   "usage:\n"
   "   asBamBam [options] <left.bam> <right.bam>\n"
   "options:\n"
   "   -fa=file          - FASTA filename of reference [NULL]\n"
   "   -position=chr1:1-100 - Position to do analysis. [NULL=process all reads]\n" 
   "   -minSuppSNP=INT   - Minimum Support for SNP/Het Calling. [4]\n"
   "   -minSuppSV=INT    - Minimum Support for struct vars. [2]\n"
   "   -minQ=INT         - Minimum acceptable base quality. [10]\n"
   "   -minMapQ=INT      - Minimum acceptable mapping quality. [20]\n"
   "   -avgMapQ=INT      - Minimum acceptable average mapping quality. [30]\n"
   "   -minChiSq=INT     - Minimum acceptable chi square of nucleotide freqs. [250]\n"
   "   -maxISize=INT     - Maximum Insert Size. [2000]\n"
   "   -readsPerBin=INT  - Reads per CNV bin [2500]\n"
   "\n"
   );
 }
 
 static struct optionSpec options[] = {
     {"fa", OPTION_STRING},
     {"position", OPTION_STRING},
     {"minSuppSNP", OPTION_INT},
     {"minSuppSV", OPTION_INT},
     {"minQ", OPTION_INT},
     {"minMapQ", OPTION_INT},
     {"avgMapQ", OPTION_INT},
     {"maxISize", OPTION_INT},
     {"minChiSq", OPTION_INT}, 
     {"readsPerBin", OPTION_INT},
     {NULL, 0}
 };
 
+#define IN_PROB 0.94     // probability of getting base that's member of genotype
+                         // (1.0 - IN_PROB) / 3.0 = prob of getting error  
 #define BAD_TID -999
 
 /* Option variables */
 char *faidx     = NULL;
 char *position  = NULL;
 int minSuppSNP  = 4;
 int minSuppSV   = 2;
 int minQ        = 10;
 int minMapQ     = 20;
 int avgMapQ     = 30;
 int maxISize    = 2000;
 int minChiSq    = 250;
 int readsPerBin = 2500;
 
 typedef struct {
     int minQ;
     int minMapQ;
     int avgMapQ;
     int maxISize;
     int minChiSq;
     int minSuppSNP;
     int minSuppSV;
     int sites;
 
     int readsPerBin;
 
     int pStartPrev;
     int pStopPrev;
     int pStart;
     int pStop;
 
     /* Overall Copy Number */
     int32_t tid;
     int32_t lstart;
     int32_t lstop;
     int lcounts;
     int32_t rstart;
     int32_t rstop;
     int rcounts;
 
     int32_t prevTid;
     int32_t prevStart;
     int32_t prevStop;
 
     /* ASCN */
     int32_t tidAS;
     int32_t startAS;
     int32_t stopAS;
     int lcountsMin;
     int lcountsMax;
     int rcountsMin;
     int rcountsMax;
 
     /* SV */
     int svStartL;
     int svStopL;
     int svStartR;
     int svStopR;
 
+    long double fracGerm;
+
 } analysis_params_t;
 
 static int32_t countsL1[4];
 static int32_t countsL2[4];
 static int32_t countsL3[4];
 
 static int32_t countsR1[4];
 static int32_t countsR2[4];
 static int32_t countsR3[4];
 
 static int32_t votesL[4];
 static int32_t countsL[4];
 static int32_t votesR[4];
 static int32_t countsR[4];
 
 typedef struct {
     char nuc;
     int count;
     int vote;
 } variant_t;
 
 variant_t minorL, majorL;
 variant_t minorR, majorR;
 
 static int32_t insL, delL, insR, delR;
 
 typedef struct {
     int tid;
     int mtid;
 
     int32_t qstart;
     int32_t qstop;
     int32_t mstart;
     int32_t mstop;
 
     int supp;
     int qual;
     int mstrand;
     int qstrand;
 
     int mdir;      // trend of mate directions 
     int pmstart;   // previos start
 } sv_t;
 
 #define BUF 100  // buffer around reads' starts/stops
 
 static sv_t intraL, intraR;
 static sv_t interL, interR;
 
 static int nuc_index(char c)
 {
 switch (c)
     {
     case 'A':
 	return 0;
     case 'G':
 	return 1;
     case 'C':
 	return 2;
     case 'T':
 	return 3;
     default:
 	return -1;
     }
 return -1;
 }
 #define NUCS "AGCT"
 
 static inline void reset_globals(analysis_params_t *ap)
 {
 /* reset nuc counts */
 int i;
 for (i = 0; i < 4; i++)
     { // sum up the qualities of each base
     votesL[i]  = 0;
     countsL[i] = 0;
     votesR[i]  = 0; 
     countsR[i] = 0;
 
     countsL1[i] = 0;
     countsL2[i] = 0;
     countsL3[i] = 0;
     countsR1[i] = 0;
     countsR2[i] = 0;
     countsR3[i] = 0;
     }
 
 /* reset indel stats */
 insL = 0;
 insR = 0;
 delL = 0;
 delR = 0;
 
 /* reset pileup info */
 ap->pStart = -1, ap->pStop = -1;
 }
 
 static inline int check_read(const bam_pileup1_t *p, analysis_params_t *ap)
 {
 bam1_core_t *c = &p->b->core;
 /* Check if unmapped or PCR optical dupe */
 if (((c)->flag & BAM_FUNMAP) || ((c)->flag & BAM_FDUP) )
     return 0;
 
 /* check base quality */
 if (bam1_qual(p->b)[p->qpos] < ap->minQ)
     return 0;
 
 /* check mapping quality */
 if ((c)->qual < ap->minMapQ)
     return 0;
 
 /* all good */
 return 1;
 }
 
 
 static inline void nuc_counts(dual_pileup_t *d, analysis_params_t *ap)
 { 
 /* reset global vars */
 reset_globals(ap);
 
 int i, index;
 int32_t start, stop;
 int32_t prevStart = -1;
 const bam_pileup1_t *p;
 
 char best_b1 = 'N';
 int best_q1  = -1;
 char best_b2 = 'N';
 int best_q2  = -1;
 int insertion = 0, deletion = 0;
  
 for (i = 0; i < d->nL; ++i)
     { 
     p = d->puL + i;
     if (!check_read(p, ap))
 	continue;
     bam1_core_t *c = &p->b->core;
     int qual = bam1_qual(p->b)[p->qpos];    
 
     /* Check if start & stop has already been visited, to dupes missed in prev step*/    
     start = (c)->pos;
     stop  = start + (c)->l_qseq;    
 
     if (ap->pStart == -1)
 	{
 	ap->pStart = start;
 	ap->pStop  = stop;
 	}
     else 
 	{
 	if (start < ap->pStart)
 	    ap->pStart = start;
 	if (stop  > ap->pStop)
 	    ap->pStop = stop;
 	}
 
     if (prevStart == -1)
 	prevStart = start;
 
     if (start < prevStart)
 	continue;
 
     if (start == prevStart)
 	{
 	if (p->is_del)
 	    {
 	    if (p->indel > 0)
 		insertion = 1;
 	    else if (p->indel < 0)
 		deletion = 1;
 	    }
 	else
 	    {
 	    int b = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);    
 	    if (best_b1 == 'N')
 		{
 		best_b1 = b;
 		best_q1 = qual;
 		}
 	    else if (qual > best_q1)
 		{
 		best_b2 = best_b1;
 		best_q2 = best_q1;
 		
 		best_b1 = b;
 		best_q1 = qual;
 		}
 	    else if (qual > best_q2)
 		{
 		best_b2 = b;
 		best_q2 = qual;
 		}
 	    }
 	continue;
 	}
     prevStart = start;
     
     if (insertion)
 	insL += 1;
     else if (deletion)
 	delL += 1;
 
     double lseq = (double) p->b->core.l_qseq;
     if ((index = nuc_index(best_b1)) >= 0)
 	{
 	votesL[index] += best_q1;
 	countsL[index]++;
 
         if (p->qpos < round(0.33 * lseq))
 	    countsL1[index]++;
 	else if (p->qpos < round(0.67 * lseq))
 	    countsL2[index]++;
 	else
 	    countsL3[index]++;
 	}
     
     if ((index = nuc_index(best_b2)) >= 0)
 	{
 	votesL[index] += best_q2;
 	countsL[index]++;
 	
         if (p->qpos < round(0.33 * lseq))
 	    countsL1[index]++;
 	else if (p->qpos < round(0.67 * lseq))
 	    countsL2[index]++;
 	else
 	    countsL3[index]++;
 	}
     best_b1 = 'N';
     best_q1 = -1;
     best_b2 = 'N';
     best_q2 = -1;
     
     insertion = 0, deletion = 0;
 
     if (p->is_del)
 	{
 	if (p->indel > 0)
 	    insertion = 1;
 	else if (p->indel < 0)
 	    deletion = 1;
 	}
     else
 	{
 	int b = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
 	if (best_b1 == 'N')
 	    {
 	    best_b1 = b;
 	    best_q1 = qual;
 	    }
 	else if (qual > best_q1)
 	    {
 	    best_b2 = best_b1;
 	    best_q2 = best_q1;
 
 	    best_b1 = b;
 	    best_q1 = qual;
 	    }
 	else if (qual > best_q2)
 	    {
 	    best_b2 = b;
 	    best_q2 = qual;
 	    }
 	}
     }
 
 prevStart = -1;
 for (i = 0; i < d->nR; ++i)
     {
     p = d->puR + i;
     if (!check_read(p, ap))
 	continue;
     bam1_core_t *c = &p->b->core;
     int qual = bam1_qual(p->b)[p->qpos];    
 
     /* Check if start & stop has already been visited, to dupes missed in prev step*/    
     start = (c)->pos;
     stop  = start + (c)->l_qseq;    
     if (start <= prevStart)
 	continue;
     prevStart = start;
 
     if (ap->pStart == -1)
 	{
 	ap->pStart = start;
 	ap->pStop  = stop;
 	}
     if (start < ap->pStart)
 	ap->pStart = start;
     if (stop  > ap->pStop)
 	ap->pStop = stop;
 
     if (p->indel > 0)
 	insR += 1;
     else if (p->indel < 0)
 	delR += 1;
     
     if (!p->is_del && p->indel == 0)
 	{
         double lseq = (double) p->b->core.l_qseq;
 	int b = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
 	if ((index = nuc_index(b)) < 0)
 	    continue;
 	votesR[index] += qual;
 	countsR[index]++;
 
         if (p->qpos < round(0.33 * lseq))
 	    countsR1[index]++;
 	else if (p->qpos < round(0.67 * lseq))
 	    countsR2[index]++;
 	else
 	    countsR3[index]++;
 	}
     }
 
 int iL1 = -1, iL2 = -1, maxL1 = 0, maxL2 = 0;
 int iR1 = -1, iR2 = -1, maxR1 = 0, maxR2 = 0;
 int countL1 = -1, countL2 = -1, countR1 = -1, countR2 = -1;
 for (i = 0; i < 4; i++)
     {
     if (countsL[i] > countL1)
 	{
 	iL2     = iL1;     // set prev max to max2
 	maxL2   = maxL1;
 	countL2 = countL1;
 
 	iL1     = i;
 	maxL1   = votesL[i];
 	countL1 = countsL[i];
 	}
     else if (countsL[i] > countL2)
 	{
 	iL2     = i;
 	maxL2   = votesL[i];
 	countL2 = countsL[i];
 	}
     if (countsR[i] > countR1)
 	{
 	iR2     = iR1;
 	maxR2   = maxR1;
 	countR2 = countR1;
 
 	iR1     = i;
 	maxR1   = votesR[i];
 	countR1 = countsR[i];
 	}
     else if (countsR[i] > countR2)
 	{
 	iR2     = i;
 	maxR2   = votesR[i];
 	countR2 = countsR[i];
 	}
     }
 
 majorL.nuc   = 'N';
 majorL.vote  = 0;
 majorL.count = 0;
 if (iL1 != -1 && countL1 >= ap->minSuppSNP)
     {
     majorL.nuc   = NUCS[iL1];
     majorL.vote  = maxL1;
     majorL.count = countL1;
     }
 
 minorL.nuc   = 'N';
 minorL.vote  = 0;
 minorL.count = 0;
 if (iL2 != -1 && countL2 >= ap->minSuppSNP)
     {
     minorL.nuc   = NUCS[iL2];
     minorL.vote  = maxL2;
     minorL.count = countL2;
     }
 
 majorR.nuc   = 'N';
 majorR.vote  = 0;
 majorR.count = 0;
 if (iR1 != -1 && countR1 >= ap->minSuppSNP)
     {
     majorR.nuc   = NUCS[iR1];
     majorR.vote  = maxR1;
     majorR.count = countR1;
     }
 
 minorR.nuc   = 'N';
 minorR.vote  = 0;
 minorR.count = 0;
 if (iR2 != -1 && countR2 >= ap->minSuppSNP)
     {
     minorR.nuc   = NUCS[iR2];
     minorR.vote  = maxR2;
     minorR.count = countR2;
     }
 }
 
 double chi_square(int obs1, int obs2, double p)  // p = binomial prob of getting 1
 {
 double pseudo = 1.0;
 int count = obs1 + obs2;
 
 double exp1 = p * ((double) count) + pseudo;
 double exp2 = (1.0 - p) * ((double) count) + pseudo;
 
 // apply Yates correction... will often have small observed values.
 return pow(abs((double)obs1 - exp1) - 0.5, 2.0)/exp1 + pow(abs((double)obs2 - exp2) - 0.5, 2.0)/exp2;
 }
 
 static int sig_diff(dual_pileup_t *d, analysis_params_t *ap)
 {
 int i;
 double pseudo = 1.0;
 int totalL = 0, totalR = 0;
 
 for (i = 0; i < 4; i++)
     {
     totalL += countsL[i];
     totalR += countsR[i];
     }
 
 double obs, exp, chi_sq = 0.0;
 for (i = 0; i < 4; i++)
     {
     obs = ((double) countsL[i] * 100.0) / (double) totalL + pseudo;
     exp = ((double) countsR[i] * 100.0) / (double) totalR + pseudo;
     chi_sq += pow(abs(obs - exp) - 0.5, 2.0) / exp;
     }
 
 return chi_sq;
 }
 
 static int is_mut(dual_pileup_t *d, analysis_params_t *ap, char *ref,
 		  char *callL1, char *callL2, char *callR1, char *callR2)
 {
 if (majorR.vote == 0 || majorL.vote == 0)
     return 0;  // most likely a bad quality region.
 
 if (sig_diff(d, ap) < ap->minChiSq)
     return 0;
 
 int i = 0;
 int rCount = 0;
 for (i = 0; i < 4; i++)
     rCount += countsR1[i] + countsR2[i] + countsR3[i];
 
 if (rCount < 10)
     return 0;
 
 char rb = (d->ref && (int)d->posL < d->len)? d->ref[d->posL] : 'N';
 
 int retVal1 = 0, retVal2 = 0;
 for (i = 0; i < 4; i++)
     {
     if (NUCS[i] != majorL.nuc && NUCS[i] != minorL.nuc)
 	continue;
 
     if (NUCS[i] == majorL.nuc && majorL.nuc != majorR.nuc && majorL.nuc != minorR.nuc)
 	{
 	if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
 	    retVal1 = 1;
 	else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
 	    retVal1 = 1;
 	else if (countsL2[i] > 0 && countsL3[i] > 0 && countsL2[i] + countsL3[i] >= 3)
 	    retVal1 = 1;
 
 	if (retVal1)
 	    {
 	    if (rCount >= 30 && countsR[i] <= 1)
 		retVal1 = 1;
 	    else if (rCount < 30 && countsR[i] == 0)
 		retVal1 = 1;
 	    else
 		retVal1 = 0;
 	    }
 	}
 
     if (NUCS[i] == minorL.nuc && (minorL.nuc != majorR.nuc && minorL.nuc != minorR.nuc))
 	{
 	if (countsL1[i] > 0 && countsL2[i] > 0 && countsL1[i] + countsL2[i] >= 3)
 	    retVal2 = 1;
 	else if (countsL1[i] > 0 && countsL3[i] > 0 && countsL1[i] + countsL3[i] >= 3)
 	    retVal2 = 1;
 	else if (countsL2[i] > 0 && countsL3[i] > 0 && countsL2[i] + countsL3[i] >= 3)
 	    retVal2 = 1;
 
 	if (retVal2)
 	    {
 	    if (rCount >= 30 && countsR[i] <= 1)
 		retVal2 = 1;
 	    else if (rCount < 30 && countsR[i] == 0)
 		retVal2 = 1;
 	    else
 		retVal2 = 0;
 	    }
 	}
     }
 
 if (!retVal1 && !retVal2)
     return 0;
 
 *ref    = rb;
 *callL1 = majorL.nuc;
 *callL2 = minorL.nuc;
 *callR1 = majorR.nuc;
 *callR2 = minorR.nuc;
 
 if (majorL.nuc != majorR.nuc && majorL.nuc != minorR.nuc)
     return 1;
 if (minorL.nuc != 'N' && (minorL.nuc != majorR.nuc && minorL.nuc != minorR.nuc))
     return 1;
 
 return 0;
 }
 
 static int calc_cnv(dual_pileup_t *d, analysis_params_t *ap)
 {
 if ((ap->tid != BAD_TID) && (ap->tid != d->tidL))  // onto next chromosome
     return 2;
 
 int i;
 const bam_pileup1_t *p;
 if ((ap->prevTid != BAD_TID) && (ap->prevTid == d->tidL))
     {
     for (i = 0; i < d->nL; ++i)
 	{
 	p = d->puL + i;
 	bam1_core_t *c = &p->b->core;
 	if ((c)->pos <= ap->prevStop)
 	    return 0;
 	}
 
     for (i = 0; i < d->nR; ++i)
 	{
 	p = d->puR + i;
 	bam1_core_t *c = &p->b->core;
 	if ((c)->pos <= ap->prevStop)
 	    return 0;
 	}
     }
 
 ap->tid = d->tidL;
 
 int32_t start, stop;
 int32_t lstart = ap->lstart, lstop = ap->lstop;
 int32_t prevPos = lstop - 1;
 for (i = 0; i < d->nL; ++i)
     {
     p = d->puL + i;
     bam1_core_t *c = &p->b->core;
     if ((c)->qual < ap->minMapQ)
 	continue;
 
     if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
 	{ // read is mapped and not an optical/PCR dupe
 	start = (c)->pos;
 	stop  = start + (c)->l_qseq;
 
 	if (lstart == -1)
 	    {
 	    lstart = start;
 	    lstop  = start + 1;
 	    ap->lcounts += 1;
 	    }
 	else if ((start > prevPos) && (start > lstart)) // && stop > lstop))
 	    {  // make sure there are no possible dupes by removing all by
 	    // comparing left-most position of each read, only allowing one per pos.
 	    prevPos = start;
 	    lstop = start + 1;
 	    ap->lcounts += 1;
 	    }
 	}
     }
 
 int32_t rstart = ap->rstart, rstop = ap->rstop;
 prevPos = rstop - 1;
 for (i = 0; i < d->nR; ++i)
     {
     p = d->puR + i;
     bam1_core_t *c = &p->b->core;
     if ((c)->qual < ap->minMapQ)
 	continue;
 
     if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
 	{ // read is mapped and not an optical/PCR dupe
 	start = (c)->pos;
 	stop  = start + (c)->l_qseq;
 
 	if (rstart == -1)
 	    {
 	    rstart = start;
 	    rstop  = start + 1;
 	    ap->rcounts += 1;
 	    }
 	else if ((start > prevPos) && (start > rstart)) //  && (stop > rstop))
 	    {  // make sure there are no possible dupes by removing all by
 	    // comparing left-most position of each read, only allowing one per pos.
 	    prevPos = start;
 	    rstop   = start + 1;
 	    ap->rcounts += 1;
 	    }
 	}
     }
 
 ap->lstart = lstart;
 ap->lstop  = lstop;
 
 ap->rstart = rstart;
 ap->rstop  = rstop;
 
 if ((ap->lcounts >= ap->readsPerBin) || (ap->rcounts >= ap->readsPerBin))
     return 1;
 
 return 0;
 }
 
 static int calc_ascn(dual_pileup_t *d, analysis_params_t *ap, 
 		     int maxS, int minS, int maxG, int minG)
 {
 if ((ap->tidAS != BAD_TID) && (ap->tidAS != d->tidL))  // onto next chromosome
     return 2;
 
 if ( ap->pStopPrev != -1 && (d->posL < ap->pStopPrev))
     return 0; // within previous position's territory, avoid double-counting
 
 ap->tidAS = d->tidL;
 
 if (ap->startAS == -1)
     {
     ap->startAS = d->posL;
     ap->stopAS  = d->posL;
     }
 
 if (d->posL > ap->stopAS)
     ap->stopAS = d->posL;
 
 ap->lcountsMin += minS;
 ap->lcountsMax += maxS;
 
 ap->rcountsMin += minG;
 ap->rcountsMax += maxG;
 
 int totalL = ap->lcountsMin + ap->lcountsMax;
 int totalR = ap->rcountsMin + ap->rcountsMax;
 
 if (totalL >= ap->readsPerBin || totalR >= ap->readsPerBin)
     return 1;
 
 return 0;
 }
 
 
 static int is_het(dual_pileup_t *d, analysis_params_t *ap)
 {
 int i = 0;
 int rCount = 0, maxCountR = 0;
 for (i = 0; i < 4; i++)
     {
     if (countsR[i] < ap->minSuppSNP)
 	continue;
     rCount += countsR[i];
     if (countsR[i] > maxCountR)
 	maxCountR = countsR[i];
     }
 
 double het = (double) maxCountR / (double) rCount;
 if (het > 0.4 && het < 0.6)
     return 1;  // need a smarter test here. 
 
 return 0;
 }
 
-
 static void print_nuc_info()
 {
 int i, first = 1;
 for (i = 0; i < 4; i++)
     {
     if (countsR[i] == 0)
 	continue;
     if (!first)
 	putchar(' ');
     printf("%c:%d/%2.1f", NUCS[i], countsR[i], (double) votesR[i] / (double) countsR[i]);
     first = 0;
     }
 putchar('\t');
 first = 1;
 for (i = 0; i < 4; i++)
     {
     if (countsL[i] == 0)
 	continue;
     if (!first)
 	putchar(' ');
     printf("%c:%d/%2.1f", NUCS[i], countsL[i], (double) votesL[i] / (double) countsL[i]);
     first = 0;
     }
 
 putchar('\n');
 }
 
+
+
 static void do_cnv(dual_pileup_t *d, analysis_params_t *ap)
 {
 int ret;
 if ((ret = calc_cnv(d, ap)) == 0)
     return;
 
 int start = ap->rstart;
 int stop  = ap->rstop;
 if (ap->lstart < start)
     start = ap->lstart;
 if (ap->lstop > stop)
     stop  = ap->lstop;
 
 printf("CNV\t%s\t%d\t%d\t%f\t%d\t%d\n",
        d->hL->target_name[ap->tid],
        start, stop, (double) ap->lcounts / (double) ap->rcounts, 
        ap->rcounts, ap->lcounts);
 
 ap->prevTid   = ap->tid;
 ap->prevStart = ap->lstart;
 ap->prevStop  = ap->lstop;
 
 ap->lcounts = 0;
 ap->rcounts = 0;
 
 if (ret == 2)
     {  // switching chroms
     ap->tid   = BAD_TID;
     
     ap->lstart = -1;
     ap->lstop  = -1;
     ap->rstart = -1;
     ap->rstop  = -1;
     }
 else
     { // on same chrom
     ap->lstart = ap->lstop;
     ap->rstart = ap->rstop;
     }
 }
 
 static inline void reset_inter()
 { 
 interL.tid     = -1;
 interL.mtid    = -1;
 interL.qstart  = -1;
 interL.qstop   = -1;
 interL.mstart  = -1;
 interL.mstop   = -1;
 interL.supp    =  0;
 interL.qual    =  0;
 interL.qstrand = -1;
 interL.mstrand = -1;
 interL.mdir    =  0;
 interL.pmstart = -1;
 
 interR.tid     = -1;
 interR.mtid    = -1;
 interR.qstart  = -1;
 interR.qstop   = -1;
 interR.mstart  = -1;
 interR.mstop   = -1;
 interR.supp    =  0;
 interR.qual    =  0;
 interR.qstrand = -1;
 interR.mstrand = -1;
 interR.mdir    =  0;
 interR.pmstart = -1;
 }
 
 static inline void reset_intra()
 { 
 intraL.tid     = -1;
 intraL.qstart  = -1;
 intraL.qstop   = -1;
 intraL.mstart  = -1;
 intraL.mstop   = -1;
 intraL.supp    =  0;
 intraL.qual    =  0;
 intraL.qstrand = -1;
 intraL.mstrand = -1;
 intraL.mdir    =  0;
 intraL.pmstart = -1;
 
 intraR.tid     = -1;
 intraR.qstart  = -1;
 intraR.qstop   = -1;
 intraR.mstart  = -1;
 intraR.mstop   = -1;
 intraR.supp    =  0;
 intraR.qual    =  0;
 intraR.qstrand = -1;
 intraR.mstrand = -1;
 intraR.mdir    =  0;
 intraR.pmstart = -1;
 }
 
 static inline void update_sv(const bam_pileup1_t *p, sv_t *sv)
 {
 if (!p)
     return;
 
 bam1_core_t *c = &p->b->core;
-int qstrand    = bam1_strand(p->b);
-int mstrand    = bam1_mstrand(p->b);
 int32_t qstart = (c)->pos;
 int32_t qstop  = qstart + (c)->l_qseq;
 int32_t mstart = (c)->mpos;
 int32_t mstop  = mstart + (c)->l_qseq;
+int qstrand    = bam1_strand(p->b); 
+int mstrand    = bam1_mstrand(p->b);
 
 if (sv->tid == -1)  // not set yet
     {
     sv->tid     = (c)->tid;
     sv->mtid    = (c)->mtid;
     sv->qstart  = qstart;
     sv->qstop   = qstop;
     sv->mstart  = mstart;
     sv->mstop   = mstop;
     sv->mstrand = mstrand;
     sv->qstrand = qstrand;
     sv->mdir    = 0;
     sv->pmstart = mstart;
     sv->qual    = (c)->qual;
     sv->supp    = 1;
     }
 else if ((c)->tid == sv->tid && (c)->mtid == sv->mtid &&
 	 qstrand == sv->qstrand && mstrand == sv->mstrand &&
 	 qstart < (sv->qstop + BUF) && qstop > (sv->qstart - BUF) &&
 	 mstart < (sv->mstop + BUF) && mstop > (sv->mstart - BUF))
     {  // overlapping on both sides within +/- BUF
     if (sv->qstart > qstart)
 	sv->qstart = qstart;
     if (sv->qstop < qstop)
 	sv->qstop = qstop;
     if (sv->mstart > mstart)
 	sv->mstart = mstart;
     if (sv->mstop < mstop)
 	sv->mstop = mstop;
     
     sv->mdir   += (mstart - sv->pmstart);
     sv->pmstart = mstart; 
     
     sv->qual += (c)->qual;
     sv->supp += 1;
     }
 }
 
 static inline void update_hq_discordant(const bam_pileup1_t *p, const bam_pileup1_t **inter, const bam_pileup1_t **intra)
 {
 if (!p)
     return;
 
 bam1_core_t *c = &p->b->core;
 if ((c)->tid != (c)->mtid)
     {
     if (!(*inter))  // best inter-chrom not set yet
 	*inter = p;
     else if ((*inter)->b->core.qual > (c)->qual)
 	*inter = p;	
     }
 else
     {
     int qstrand = bam1_strand(p->b);
     int mstrand = bam1_mstrand(p->b);
  
     if ((qstrand != mstrand) && abs((c)->isize) <= maxISize)
 	return;
 	
     if (!(*intra))
 	*intra = p;
     else if ((*intra)->b->core.qual > (c)->qual)
 	*intra = p;
     }
 }
 
 static inline void struct_var_scan(dual_pileup_t *d, analysis_params_t *ap, 
 				   int *minStartL, int *minStartR)
 {
 int i;
 const bam_pileup1_t *p, *pInterL = NULL, *pIntraL = NULL;
 int prevStart = ap->svStartL;
 for (i = 0; i < d->nL; ++i)
     {
     p = d->puL + i;
     if (p->b->core.qual < ap->minMapQ)
 	continue;
     bam1_core_t *c = &p->b->core;
     if (!((c)->flag & BAM_FPAIRED)) 
 	continue; // read is not paired
     if ((c)->flag & BAM_FUNMAP || ((c)->flag & BAM_FMUNMAP))
 	continue; // read or its mate is unmapped.
 
     int32_t qstart = (c)->pos;
     if (*minStartL == -1 || qstart < *minStartL)
 	*minStartL = qstart;
     int qstrand = bam1_strand(p->b);
     int mstrand = bam1_mstrand(p->b);
    
     if ((c)->mtid == (c)->tid && (qstrand != mstrand) && abs((c)->isize) <= maxISize) 
 	continue;  // mate is properly paired
 
     if (ap->svStartL == -1)
 	{
 	ap->svStartL = qstart;
 	prevStart = ap->svStartL;
 	}
 
     if (qstart < ap->svStartL)
 	continue;
     else if (qstart == ap->svStartL)
 	update_hq_discordant(p, &pInterL, &pIntraL);
     else
 	{
 	ap->svStartL = qstart;
 	break;
 	}
     }
 if (ap->svStartL == prevStart)
     ap->svStartL += 1;
 
 prevStart = ap->svStartR;
 const bam_pileup1_t *pInterR = NULL, *pIntraR = NULL;
 for (i = 0; i < d->nR; ++i)
     {
     p = d->puR + i;
     if (p->b->core.qual < ap->minMapQ)
 	continue;
     bam1_core_t *c = &p->b->core;
     if (!((c)->flag & BAM_FPAIRED)) 
 	continue; // read is not paired
     if ((c)->flag & BAM_FUNMAP || ((c)->flag & BAM_FMUNMAP))
 	continue; // read or its mate is unmapped.
 
     int32_t qstart = (c)->pos;
     if (*minStartR == -1 || qstart < *minStartR)
 	*minStartR = qstart;
     
     int qstrand = bam1_strand(p->b);
     int mstrand = bam1_mstrand(p->b);
 
     if ((c)->mtid == (c)->tid && (qstrand != mstrand) && abs((c)->isize) <= maxISize) 
 	continue;  // mate is properly paired
 
     if (ap->svStartR == -1)
 	{
 	ap->svStartR = qstart;
 	prevStart = ap->svStartR;
 	}
 
     if (qstart < ap->svStartR)
 	continue;
     else if (qstart == ap->svStartR)
 	update_hq_discordant(p, &pInterR, &pIntraR);
     else
 	{
 	ap->svStartR = qstart;
 	break;
 	}
     }
 if (ap->svStartR == prevStart)
     ap->svStartR += 1;
 
 update_sv(pIntraL, &intraL);
 update_sv(pInterL, &interL);
 update_sv(pIntraR, &intraR);
 update_sv(pInterR, &interR);
 }
 
 static inline int is_intra_sv(analysis_params_t *ap, int minStart)
 {
 if (intraL.supp > 0)
     {
     if (minStart <= intraL.qstop)
 	return 0;  // there still might be reads to overlap
 
     /* check average mapping quality of discordants */
     int qual = ceil((double) intraL.qual / (double) intraL.supp); 
     if (intraL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
 	{  // check mate pair direction trend... should be positive for properly paired (opposite strand)
-
-	//return 1;
-
-	if ((intraL.qstrand == intraL.mstrand && intraL.mdir < 0) || 
-	    (intraL.qstrand != intraL.mstrand && intraL.mdir > 0))
+	if ((intraL.qstrand && intraL.mstrand && intraL.mdir < 0) ||    // + +
+	    (!intraL.qstrand && !intraL.mstrand && intraL.mdir < 0) ||  // - -
+	    (intraL.qstrand != intraL.mstrand && intraL.mdir > 0))      // + - or - +
 	    return 1;
 	}
 
     reset_intra();
     }
 return 0;
 }
 
 static inline int is_inter_sv(analysis_params_t *ap, int minStart)
 {
 if (interL.supp > 0)
     {
     if (minStart <= interL.qstop)
 	return 0;  // there still might be reads to overlap
 
     /* check average mapping quality of discordants */
     int qual = ceil((double) interL.qual / (double) interL.supp); 
     if (interL.supp >= ap->minSuppSV && qual >= ap->avgMapQ)
 	{  // check mate pair direction trend... should be positive for properly paired (opposite strand)
-
-	//return 1;
-
-	if ((interL.qstrand == interL.mstrand && interL.mdir < 0) || 
-	    (interL.qstrand != interL.mstrand && interL.mdir > 0))
+	if ((interL.qstrand && interL.mstrand && interL.mdir < 0) ||     // + + 
+	    (!interL.qstrand && !interL.mstrand && interL.mdir < 0) ||   // - - 
+	    (interL.qstrand != interL.mstrand && interL.mdir > 0))       // + - or - +
 	    return 1;
 	}
     reset_inter();
     }
 return 0;
 }
 
 static int indel_variant(dual_pileup_t *d, analysis_params_t *ap,
 			 int *isIns, int *isDel, int *insGerm, int *delGerm)
 {
 if (insL >= ap->minSuppSNP)
     {
     *isIns = 1;
     if (insR > 0)  // if you see even once, believe it.
 	*insGerm = 1;
     else
 	*insGerm = 0;
     return 1;
     }
 
 if (delL >= ap->minSuppSNP)
     {
     *isDel = 1;
     if (delR > 0)  // if you see even once, believe it.
 	*delGerm = 1;
     else
 	*delGerm = 0;
     return 1;
     }
 
 return 0;
 }
 
 static void do_struct_var(dual_pileup_t *d, analysis_params_t *ap)
 {
 int minStartL = -1, minStartR = -1;
 struct_var_scan(d, ap, &minStartL, &minStartR);
 if (is_inter_sv(ap, minStartL))
     {
     printf("SVS\t%s\t%d\t%d\t",
 	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);
 
     char qsL = '+';
     if (interL.qstrand)
 	qsL = '-';
     char msL = '+';
     if (interL.mstrand)
 	msL = '-';
 
     char qsR = '+';
     if (interR.qstrand)
 	qsR = '-';
     char msR = '+';
     if (interR.mstrand)
 	msR = '-';
 
     if ( interR.supp > 0 && interL.mtid == interR.mtid &&  
-	(interR.qstart < (interL.qstop + BUF) && interR.qstop > (interL.qstart - BUF)) )
+	(interR.qstart < (interL.qstop + BUF) && interR.qstop > (interL.qstart - BUF)) &&
+	(interR.mstart < (interL.mstop + BUF) && interR.mstop > (interL.mstart - BUF)) )
 	printf("GIR\t%d\t%3.1f\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)\t%c(%s:%d-%d)>%c(%s:%d-%d)", 
 	       interR.supp, (double) interR.qual / (double) interR.supp, 
 	       interL.supp, (double) interL.qual / (double) interL.supp, 
 	       qsR, d->hL->target_name[interR.tid], interR.qstart, interR.qstop,
 	       msR, d->hL->target_name[interR.mtid], interR.mstart, interR.mstop,
 	       qsL, d->hL->target_name[interL.tid], interL.qstart, interL.qstop,
 	       msL, d->hL->target_name[interL.mtid], interL.mstart, interL.mstop);
     else
 	printf("SIR\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)", 
 	       interL.supp, (double) interL.qual / (double) interL.supp, 
 	       qsL, d->hL->target_name[interL.tid], interL.qstart, interL.qstop, 
 	       msL, d->hL->target_name[interL.mtid], interL.mstart, interL.mstop);
 
     putchar('\n');
 
     reset_inter();
     }
 
 if (is_intra_sv(ap, minStartL))
     {
     printf("SVS\t%s\t%d\t%d\t",
 	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);
 
     char qsL = '+';
     if (intraL.qstrand)
 	qsL = '-';
     char msL = '+';
     if (intraL.mstrand)
 	msL = '-';
 
     char qsR = '+';
     if (intraR.qstrand)
 	qsR = '-';
     char msR = '+';
     if (intraR.mstrand)
 	msR = '-';
 
     if ( intraR.supp > 0 && 
-	(intraR.qstart < (intraL.qstop + BUF) && intraR.qstop > (intraL.qstart - BUF)) )
+	(intraR.qstart < (intraL.qstop + BUF) && intraR.qstop > (intraL.qstart - BUF)) &&
+	(intraR.mstart < (intraL.mstop + BUF) && intraR.mstop > (intraL.mstart - BUF)) )
 	printf("GIA\t%d\t%3.1f\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)\t%c(%s:%d-%d)>%c(%s:%d-%d)", 
 	       intraR.supp, (double) intraR.qual / (double) intraR.supp, 
 	       intraL.supp, (double) intraL.qual / (double) intraL.supp, 
 	       qsR, d->hL->target_name[intraR.tid], intraR.qstart, intraR.qstop,
 	       msR, d->hL->target_name[intraR.tid], intraR.mstart, intraR.mstop,
 	       qsL, d->hL->target_name[intraL.tid], intraL.qstart, intraL.qstop,
 	       msL, d->hL->target_name[intraL.tid], intraL.mstart, intraL.mstop);
     else
 	printf("SIA\t%d\t%3.1f\t%c(%s:%d-%d)>%c(%s:%d-%d)", 
 	       intraL.supp, (double) intraL.qual / (double) intraL.supp, 
 	       qsL, d->hL->target_name[intraL.tid], intraL.qstart, intraL.qstop, 
 	       msL, d->hL->target_name[intraL.tid], intraL.mstart, intraL.mstop);
 
     putchar('\n');
 
     reset_intra();
     }
 
 int isIns = 0, isDel = 0, insGerm = 0, delGerm = 0;
 if (indel_variant(d, ap, &isIns, &isDel, &insGerm, &delGerm))
     {
     printf("IDL\t%s\t%d\t%d\t",
 	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);
 
     if (isIns && insGerm)
 	printf("GIns\t(%d,%d),", insR, insL);
     else if (isIns && !insGerm)
 	printf("SIns\t(%d,%d),", insR, insL);
 
     if (isDel && delGerm)
 	printf("GDel\t(%d,%d)", delR, delL);
     else if (isDel && !delGerm)
 	printf("SDel\t(%d,%d)", delR, delL);
 
     putchar('\n');
     }
 }
 
 
 
 static void do_het_analysis(dual_pileup_t *d, analysis_params_t *ap)
 {
 if (!is_het(d, ap))
     return;
 
 // do ascn & phasing
 char minGa = 'N', maxGa = 'N', minSa = 'N', maxSa = 'N';
 int i, minGc = 0, maxGc = 0, minSc = 0, maxSc = 0;
 for (i = 0; i < 4; i++)
     {
     int cr = countsR[i];
     int cl = countsL[i];
     if (cr > maxGc)
 	{
 	minGc = maxGc;
 	minGa = maxGa;
 	
 	maxGc = cr;
 	maxGa = NUCS[i];
 	}
     else if (cr > minGc)
 	{
 	minGc = cr;
 	minGa = NUCS[i];
 	}
     
     if (cl > maxSc)
 	{
 	minSc = maxSc;
 	minSa = maxSa;
 	
 	maxSc = cl;
 	maxSa = NUCS[i];
 	}
     else if (cl > minSc)
 	{
 	minSc = cl;
 	minSa = NUCS[i];
 	}
     }
 
 if (minSc < ap->minSuppSNP)
     {
     minSc = 0;
     minSa = 'N';
     }
 
 char g1 = 'N', g2 = 'N';
 if (maxSa == maxGa)
     {
     g1 = maxGa;
     g2 = minGa;
     }
 else if (maxSa == minGa)
     {
     g1 = minGa;
     g2 = maxGa;
     } 
 // else, weird case!
 
 if (g1 == 'N' || g2 == 'N')
     return;
 
 printf("HAP\t%s\t%d\t%d\t%c %c\t%c %c\t", 
        d->hL->target_name[d->tidL], d->posL, d->posL + 1, 
        g1, g2, maxSa, minSa);
 print_nuc_info();
 
 int ret;
 if ((ret = calc_ascn(d, ap, maxSc, minSc, maxGc, minGc)) == 0)
     return;
 
 double minCN = (double) (ap->lcountsMin) / (double) (ap->rcountsMin + ap->rcountsMax);
 double maxCN = (double) (ap->lcountsMax) / (double) (ap->rcountsMin + ap->rcountsMax);
 
 printf("ASC\t%s\t%d\t%d\t%f\t%f\t%d\t%d\t%d\t%d\n",
        d->hL->target_name[ap->tidAS],
        ap->startAS, ap->stopAS,
        maxCN, minCN, 
        ap->rcountsMax, ap->rcountsMin, 
        ap->lcountsMax, ap->lcountsMin);
 
 ap->startAS    = -1;
 ap->stopAS     = -1;	
 ap->lcountsMax = 0;
 ap->lcountsMin = 0;
 ap->rcountsMax = 0;
 ap->rcountsMin = 0;
 
 ap->pStartPrev = ap->pStart;
 ap->pStopPrev  = ap->pStop;
 
 if (ret == 2)
     {
     ap->tidAS  = BAD_TID;
     ap->pStartPrev = -1;
     ap->pStopPrev  = -1;
     }
 }
 
 static void do_mut(dual_pileup_t *d, analysis_params_t *ap)
 {
 char ref, callL1, callL2, callR1, callR2;
 if (!is_mut(d, ap, &ref, &callL1, &callL2, &callR1, &callR2))
     return;
 
-printf("MUT\t%s\t%d\t%d\t",
+printf("MUT1\t%s\t%d\t%d\t",
        d->hL->target_name[d->tidL], d->posL, d->posL + 1);
 
 printf("%c\t", ref);
 if (callR2 == 'N')
     printf("%c>", callR1);
 else
     printf("%c/%c>", callR1, callR2);
 
 if (callL2 == 'N')
     printf("%c\t", callL1);
 else
     printf("%c/%c\t", callL1, callL2);
 
 double chi_sq = sig_diff(d, ap);
 printf("%f\t", chi_sq);
 print_nuc_info();
 }
 
+#define A 0
+#define G 1
+#define T 2
+#define C 3
+
+#define NUM_NUC 4
+
+#define AA 0
+#define AG 1
+#define AT 2
+#define AC 3
+#define GA 1
+#define GG 4
+#define GT 5
+#define GC 6
+#define TA 2
+#define TG 5
+#define TT 7
+#define TC 8
+#define CA 3
+#define CG 6
+#define CT 8
+#define CC 9
+
+#define NUM_GENO 10
+
+#define MAX_COUNT 1000
+
+static long double *logvals;
+static long double **genoPrior;
+static long double **mutProb;
+static long double **mutProbGeno;
+static long double **baseProb;
+
+#define GENO_1 "AAAAGGGTTC"
+#define GENO_2 "AGTCGTCTCC"
+
+
+void initProbTables()
+{
+int i, j;
+
+logvals = (long double *)(malloc(MAX_COUNT * sizeof(long double)));
+for (i = 0; i < MAX_COUNT; i++)
+    logvals[i] = logl((long double) i + 1.0);
+
+genoPrior = (long double **)(malloc(NUM_GENO * sizeof(long double*)));
+for (i = 0; i < NUM_GENO; i++)
+    genoPrior[i] = (long double *)(malloc(NUM_NUC * sizeof(long double)));
+
+for (i = 0; i < NUM_GENO; i++)
+    {
+    int i1 = nuc_index(GENO_1[i]);
+    int i2 = nuc_index(GENO_2[i]);
+    for (j = 0; j < NUM_NUC; j++)
+	{
+	long double val;
+	if (i1 == j && i2 == j)
+	    val = 0.80;
+	else if (i1 == j || i2 == j)
+	    val = 0.10;
+	else
+	    val = 0.05;
+
+	genoPrior[i][j] = logl(val);
+	}
+    }
+mutProb = (long double **)(malloc(NUM_NUC * sizeof(long double*)));
+for (i = 0; i < NUM_NUC; i++)
+    mutProb[i] = (long double *)(malloc(NUM_NUC * sizeof(long double)));
+
+long double val, tot;
+for (i = 0; i < NUM_NUC; i++)
+    {
+    tot = 0.0;
+    for (j = 0; j < NUM_NUC; j++)
+	{
+	/* else, transitions 4x more likely than transitions */
+	if (i == j)
+	    val = 1.0;
+	else if ((NUCS[i] == 'A' && NUCS[j] == 'G') || (NUCS[i] == 'G' && NUCS[j] == 'A'))
+	    val = 0.1;
+	else if ((NUCS[i] == 'C' && NUCS[j] == 'T') || (NUCS[i] == 'T' && NUCS[j] == 'C'))
+	    val = 0.1;
+	else
+	    val = 0.025;
+
+	mutProb[i][j] = val;
+	tot += val;
+	}
+
+    /* normalize probs, conditioned on germline call */
+    for (j = 0; j < NUM_NUC; j++)
+	{
+	mutProb[i][j] = logl(mutProb[i][j]/tot);
+	}
+    }
+
+mutProbGeno = (long double **)(malloc(NUM_GENO * sizeof(long double*)));
+for (i = 0; i < NUM_GENO; i++)
+    mutProbGeno[i] = (long double *)(malloc(NUM_GENO * sizeof(long double)));
+
+for (i = 0; i < NUM_GENO; i++)
+    {
+    int i1 = nuc_index(GENO_1[i]);
+    int i2 = nuc_index(GENO_2[i]);
+    for (j = 0; j < NUM_GENO; j++)
+	{
+	int j1 = nuc_index(GENO_1[j]);
+	int j2 = nuc_index(GENO_2[j]);
+
+	long double val1 = mutProb[i1][j1] + mutProb[i2][j2];
+	long double val2 = mutProb[i1][j2] + mutProb[i2][j1];
+
+	if (val1 > val2)
+	    mutProbGeno[i][j] = val1;
+	else
+	    mutProbGeno[i][j] = val2;
+	}
+    }
+baseProb = (long double **)(malloc(NUM_GENO * sizeof(long double*)));
+for (i = 0; i < NUM_GENO; i++)
+    baseProb[i] = (long double *)(malloc(NUM_NUC * sizeof(long double)));
+
+long double inprob = IN_PROB;
+long double outprob = (1.0 - inprob) / 3.0;
+
+for (i = 0; i < NUM_GENO; i++)
+    {
+    int i1 = nuc_index(GENO_1[i]);
+    int i2 = nuc_index(GENO_2[i]);
+    for (j = 0; j < NUM_NUC; j++)
+	{
+	if (i1 == j && i2 == j)
+	    baseProb[i][j] = inprob;
+	else if (i1 == j || i2 == j)
+	    baseProb[i][j] = (inprob + outprob) / 2.0;
+	else
+	    baseProb[i][j] = outprob;
+
+	baseProb[i][j] = logl(baseProb[i][j]);
+	fprintf(stderr, "%c%c-%c %Lf\t", GENO_1[i], GENO_2[i], NUCS[j], baseProb[i][j]);
+	}
+    fprintf(stderr, "\n");
+    }
+}
+
+static long double dataProbGerm(int geno)
+{
+int i, j, tot = 0;
+long double p = 0.0, scale = 0.0;
+
+for (i = 0; i < NUM_NUC; i++)
+    {
+    p += (long double) countsR[i] * baseProb[geno][i];
+
+    tot += countsR[i];
+    for (j = 0; j < countsR[i]; j++)
+	scale -= logvals[j];
+    }
+
+for (i = 0; i < tot; i++)
+    scale += logvals[i];
+
+return p + scale;
+}
+
+static long double dataProbTum(int genoG, int genoT, long double alpha)
+{
+int i, j, tot = 0;
+long double p = 0.0, scale = 0.0;
+
+for (i = 0; i < NUM_NUC; i++)
+    {
+    p += (long double) countsL[i] * (alpha * baseProb[genoG][i] + (1.0 - alpha) * baseProb[genoT][i]);
+
+    tot += countsL[i];
+    for (j = 0; j < countsL[i]; j++)
+	scale -= logvals[j];
+    }
+
+for (i = 0; i < tot; i++)
+    scale += logvals[i];
+
+return p + scale;
+}
+
+
+static int is_mut_prob(dual_pileup_t *d, analysis_params_t *ap, char *ref,
+		       char *callL1, char *callL2, char *callR1, char *callR2,
+		       long double *conf)
+{
+if (majorR.vote == 0 || majorL.vote == 0)
+    return 0;  // most likely a bad quality region.
+
+int i, j, imax = -1, jmax = -1;
+int totalL = 0, totalR = 0;
+for (i = 0; i < NUM_NUC; i++)
+    {
+    totalL += countsL[i];
+    totalR += countsR[i];
+    }
+
+if (totalL < ap->minSuppSNP || totalR < ap->minSuppSNP)
+    return 0;
+
+/* rescale counts to total to less than MAX_COUNT, if exceeded */
+if (totalL > MAX_COUNT)
+    for (i = 0; i < NUM_NUC; i++)
+	countsL[i] = (int) floor((float) countsL[i] * (float) MAX_COUNT / (float) totalL);
+if (totalR > MAX_COUNT)
+    for (i = 0; i < NUM_NUC; i++)
+	countsR[i] = (int) floor((float) countsR[i] * (float) MAX_COUNT / (float) totalR);
+
+long double alpha = (ap->fracGerm * (long double) totalR) / (long double) totalL;
+if (alpha > ap->fracGerm)
+    alpha = ap->fracGerm;
+
+char rb = (d->ref && (int)d->posL < d->len) ? toupper(d->ref[d->posL]) : 'N';
+int ri  = nuc_index(rb);
+int i1, i2, j1, j2;
+
+long double p, ptot = 0.0, pmax = -999999999.99;
+for (i = 0; i < NUM_GENO; i++)
+    {  // loop germline genotypes
+    i1 = nuc_index(GENO_1[i]);
+    i2 = nuc_index(GENO_2[i]);
+    if (!countsL[i1] && !countsL[i2] && !countsR[i1] && !countsR[i2])
+	continue;  // skip genotypes
+
+    long double germP = 0.0;
+    if (ri >= 0)
+	germP = genoPrior[i][ri];
+    germP += dataProbGerm(i);
+
+    for (j = 0; j < NUM_GENO; j++)
+	{  // loop tumor genotypes
+	j1 = nuc_index(GENO_1[j]);
+	j2 = nuc_index(GENO_2[j]);
+	if (!countsL[j1] && !countsL[j2] && !countsR[j1] && !countsR[j2])
+	    continue;
+
+	p = germP;
+	p += mutProbGeno[i][j];
+	p += dataProbTum(i, j, alpha);
+
+	ptot += expl(p);
+
+	if (p > pmax)
+	    {
+	    pmax = p;
+	    imax = i;
+	    jmax = j;
+	    }
+	}
+    }
+
+if (imax < 0 || jmax < 0)
+    return 0;
+
+*ref    = rb;
+*callL1 = GENO_1[jmax];
+*callL2 = GENO_2[jmax];
+*callR1 = GENO_1[imax];
+*callR2 = GENO_2[imax];
+*conf   = expl(pmax) / ptot;
+
+if (imax != jmax)
+    return 1;
+if (GENO_1[imax] != GENO_2[imax] || (rb != GENO_1[imax] && rb != GENO_2[imax]))
+    return 2;
+
+return 0;
+}
+
+
+static void do_mut_prob(dual_pileup_t *d, analysis_params_t *ap)
+{
+int ret;
+char ref, callL1, callL2, callR1, callR2;
+long double conf;
+if ((ret = is_mut_prob(d, ap, &ref, &callL1, &callL2, &callR1, &callR2, &conf)) == 0)
+    return;
+
+printf("MUT2\t%s\t%d\t%d\t",
+       d->hL->target_name[d->tidL], d->posL, d->posL + 1);
+if (ret == 1)
+    printf("SOM\t");
+else
+    printf("GERM\t");
+
+printf("%c\t", ref);
+printf("%c%c>", callR1, callR2);
+printf("%c%c\t", callL1, callL2);
+
+printf("%Lf\t", conf);
+print_nuc_info();
+}
 
 
 static void perform_analysis(dual_pileup_t *d, void *func_data)
 {
 analysis_params_t *ap = (analysis_params_t *)func_data;
 
 /* processes pileup for het and mut analysis */
 nuc_counts(d, ap);
 
 /* overall cnv */
 do_cnv(d, ap);
 
 /* structural variation */
 do_struct_var(d, ap);
 
 /* het analysis: ASCN & phasing */
 do_het_analysis(d, ap);
 
 /* mutation discovery */
-do_mut(d, ap);
+do_mut(d, ap);       // old way
+do_mut_prob(d, ap);  // new way
 
 fflush(stdout);
 }
 
 void bamBam(char *inbamL, char *inbamR, char *faidx, int mask)
 {
 analysis_params_t ap;
 ap.minQ     = minQ;
 ap.minMapQ  = minMapQ;
 ap.avgMapQ  = avgMapQ;
 ap.maxISize = maxISize;
 ap.minChiSq = minChiSq;
 ap.minSuppSNP  = minSuppSNP;
 ap.minSuppSV   = minSuppSV;
 ap.sites    = 0;
 
 ap.readsPerBin = readsPerBin;
 
 ap.pStart = -1;
 ap.pStop  = -1;
 ap.pStartPrev = -1;
 ap.pStopPrev  = -1;
 
 /* Overall Copy Number */
 ap.tid       = BAD_TID;
 ap.lstart    = -1;
 ap.lstop     = -1;
 ap.lcounts   = 0;
 ap.rstart    = -1;
 ap.rstop     = -1;
 ap.rcounts   = 0;
 ap.prevTid   = BAD_TID;
 ap.prevStart = -1;
 ap.prevStop  = -1;
 
 /* ASCN */
 ap.tidAS      = BAD_TID;
 ap.startAS   = -1;
 ap.stopAS    = -1;
 ap.lcountsMin = 0;
 ap.lcountsMax = 0;
 ap.rcountsMin = 0;
 ap.rcountsMax = 0;
 
 /* SV */
 ap.svStartL = -1;
 ap.svStopL  = -1;
 
 ap.svStartR = -1;
 ap.svStopR  = -1;
 
 reset_intra();
 reset_inter();
 
 bam_bam_file(inbamL, inbamR, faidx, position, mask, perform_analysis, &ap);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc < 3)
     usage();
 
 if (optionExists("fa"))
     faidx = optionVal("fa", NULL);
 if (optionExists("position"))
     position = optionVal("position", NULL);
 if (optionExists("minSuppSNP"))
     minSuppSNP = optionInt("minSuppSNP", 4);
 if (optionExists("minSuppSV"))
     minSuppSV = optionInt("minSuppSV", 2);
 if (optionExists("minQ"))
     minQ = optionInt("minQ", 10);
 if (optionExists("minMapQ"))
     minMapQ = optionInt("minMapQ", 20);
 if (optionExists("avgMapQ"))
     avgMapQ = optionInt("avgMapQ", 30);
 if (optionExists("maxISize"))
     maxISize = optionInt("maxISize", 2000);
 if (optionExists("minChiSq"))
     minChiSq = optionInt("minChiSq", 250);
 if (optionExists("readsPerBin"))
     readsPerBin = optionInt("readsPerBin", 2500);
 
 char *inbamL = argv[1];
 char *inbamR = argv[2];
 bamBam(inbamL, inbamR, faidx, BAM_DEF_MASK);
 
 return 0;
 }
 
 #else // USE_BAM not defined
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 printf("BAM not installed");
 
 return 0;
 }
 #endif