src/hg/tcga/bamBam/bamBam.c 1.7

1.7 2009/09/30 04:45:06 jsanborn
updated
Index: src/hg/tcga/bamBam/bamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamBam/bamBam.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 1000000 -r1.6 -r1.7
--- src/hg/tcga/bamBam/bamBam.c	29 Sep 2009 04:52:56 -0000	1.6
+++ src/hg/tcga/bamBam/bamBam.c	30 Sep 2009 04:45:06 -0000	1.7
@@ -1,662 +1,704 @@
 /* bamBam -- discovering variants from BAM file
  *   code adapted from Heng Li's Samtools library functions
  */
 
 #ifdef USE_BAM
 
 #include "common.h"
 #include "hCommon.h"
 #include "linefile.h"
 #include "hash.h"
 #include "hdb.h"
 #include "options.h"
 #include "jksql.h"
 
 #define _IOLIB 2
 #include "bam.h"
 #include "sam.h"
 #include "bam_dual_pileup.h"
 
 static char const rcsid[] = "$Id";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bamBam - Perform analysis on two BAM files.\n"
   "usage:\n"
   "   bamBam [options] <left.bam> <right.bam>\n"
   "options:\n"
   "   -minSupport=INT - Minimum Support (i.e. num reads) to call a variant. [2]\n"
   "   -minQ=INT       - Minimum acceptable base quality. [0]\n"
   "   -minMapQ=INT    - Minimum acceptable mapping quality. [0]\n"
+  "   -avgMapQ=INT    - Minimum acceptable avg mapping quality. [0]\n"
+  "   -minChiSq=INT    - Minimum acceptable chi square of nucleotide freqs. [0]\n"
   "\n"
   );
 }
 
 static struct optionSpec options[] = {
     {"minSupport", OPTION_INT},
     {"minQ", OPTION_INT},
     {"minMapQ", OPTION_INT},
+    {"avgMapQ", OPTION_INT},
+    {"minChiSq", OPTION_INT},
     {NULL, 0}
 };
 
 #define HET_THRESH 0.05
 
 /* Option variables */
 char *position = NULL;
 int minSupport = 2;
 int minQ = 0;
 int minMapQ = 0;
+int avgMapQ  = 0;
+int minChiSq = 0;
 
 typedef struct {
     int minQ;
     int minMapQ;
+    int avgMapQ;
     int minSupp;
+    int minChiSq;
     int sites;
 } analysis_params_t;
 
 static int32_t votesL[4];
 static int32_t countsL[4];
 static int32_t votesR[4];
 static int32_t countsR[4];
 
+static int32_t insL, delL, insR, delR;
 
 typedef struct {
     char nuc;
     int count;
     int vote;
 } variant_t;
 
 variant_t minorL, majorL;
 variant_t minorR, majorR;
 
 static int nuc_index(char c)
 {
 switch (c)
     {
     case 'A':
 	return 0;
     case 'G':
 	return 1;
     case 'C':
 	return 2;
     case 'T':
 	return 3;
     default:
 	return -1;
     }
 return -1;
 }
 
 #define NUCS "AGCT"
 
 static inline void nuc_counts(pileup_data_t *d, analysis_params_t *ap)
 { 
 int i, index;
 for (i = 0; i < 4; i++)
     { // sum up the qualities of each base
     votesL[i]  = 0;
     countsL[i] = 0;
     votesR[i]  = 0; 
     countsR[i] = 0;
     }
+insL = 0;
+insR = 0;
+delL = 0;
+delR = 0;
 
 const bam_pileup1_t *p;
 for (i = 0; i < d->nL; ++i)
     { 
     p = d->puL + i;
 
     if (p->b->core.qual < ap->minMapQ)
 	continue;
 
-    int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
     int qual = bam1_qual(p->b)[p->qpos];    
     if (qual < ap->minQ)
 	continue;
-    
+    if (!p->is_del)
+	{
+	int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);    
     if ((index = nuc_index(c)) < 0)
 	continue;
 
     votesL[index] += qual;
     countsL[index]++;
+	
+	if (p->indel > 0)
+	    insL += 1;
+	else if (p->indel < 0)
+	    delL += 1;
+	}
     }
 
 for (i = 0; i < d->nR; ++i)
     {
     p = d->puR + i;
 
     if (p->b->core.qual < ap->minMapQ)
 	continue;
 
-    int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
     int qual = bam1_qual(p->b)[p->qpos];    
     if (qual < ap->minQ)
 	continue;
 
+    if (!p->is_del)
+	{
+	int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
     if ((index = nuc_index(c)) < 0)
 	continue;
-
     votesR[index] += qual;
     countsR[index]++;
+
+	if (p->indel > 0)
+	    insR += 1;
+	else if (p->indel < 0)
+	    delR += 1;
+	}
     }
 
-int iL1 = -1, iL2 = -1, maxL1 = -1, maxL2 = -1; 
-int iR1 = -1, iR2 = -1, maxR1 = -1, maxR2 = -1;
+int iL1 = -1, iL2 = -1, maxL1 = 0, maxL2 = 0; 
+int iR1 = -1, iR2 = -1, maxR1 = 0, maxR2 = 0;
 int countL1 = -1, countL2 = -1, countR1 = -1, countR2 = -1;
 for (i = 0; i < 4; i++)
     {
     if (votesL[i] > maxL1)
 	{
 	iL2     = iL1;     // set prev max to max2
 	maxL2   = maxL1;
 	countL2 = countL1;
 
 	iL1     = i;
 	maxL1   = votesL[i];
 	countL1 = countsL[i];
 	}
     else if (votesL[i] > maxL2)
 	{
 	iL2     = i;
 	maxL2   = votesL[i];
 	countL2 = countsL[i];
 	}
 
     if (votesR[i] > maxR1)
 	{
 	iR2     = iR1;
 	maxR2   = maxR1;
 	countR2 = countR1;
 
 	iR1     = i;
 	maxR1   = votesR[i];
 	countR1 = countsR[i];
 	}
     else if (votesR[i] > maxR2)
 	{
 	iR2     = i;
 	maxR2   = votesR[i];
 	countR2 = countsR[i];
 	}
     }
 
 majorL.nuc   = 'N';
 majorL.vote  = 0;
 majorL.count = 0;
 if (iL1 != -1 && countL1 >= ap->minSupp)
     {
     majorL.nuc   = NUCS[iL1];
     majorL.vote  = maxL1;
     majorL.count = countL1;
     }
 
 minorL.nuc   = 'N';
 minorL.vote  = 0;
 minorL.count = 0;
 if (iL2 != -1 && countL2 >= ap->minSupp)
     {
     minorL.nuc   = NUCS[iL2];
     minorL.vote  = maxL2;
     minorL.count = countL2;
     }
 
 majorR.nuc   = 'N';
 majorR.vote  = 0;
 majorR.count = 0;
 if (iR1 != -1 && countR1 >= ap->minSupp)
     {
     majorR.nuc   = NUCS[iR1];
     majorR.vote  = maxR1;
     majorR.count = countR1;
     }
 
 minorR.nuc   = 'N';
 minorR.vote  = 0;
 minorR.count = 0;
 if (iR2 != -1 && countR2 >= ap->minSupp)
     {
     minorR.nuc   = NUCS[iR2];
     minorR.vote  = maxR2;
     minorR.count = countR2;
     }
 }
 
 
 static double chi_square(int obs1, int obs2, double p)  // p = binomial prob of getting 1
 {
-int pseudo = 5;
-obs1 += pseudo;
-obs2 += pseudo;
+double pseudo = 1.0;
 int count = obs1 + obs2;
 
 double exp1 = p * ((double) count) + pseudo;
 double exp2 = (1.0 - p) * ((double) count) + pseudo;
 
 // apply Yates correction... will often have small observed values.
-return pow(abs((double)obs1 - exp1) - 0.5, 2.0) / obs1 
-    + pow(abs((double)obs2 - exp2) - 0.5, 2.0) / obs2;
+return pow(abs((double)obs1 - exp1) - 0.5, 2.0)/exp1 + pow(abs((double)obs2 - exp2) - 0.5, 2.0)/exp2;
 }
 
 static int sig_diff(pileup_data_t *d, analysis_params_t *ap)
 {
 int i;
 double pseudo = 1.0;
 int totalL = 0, totalR = 0;
 
 for (i = 0; i < 4; i++)
     {
-    totalL += votesL[i];
-    totalR += votesR[i];
+    totalL += countsL[i];
+    totalR += countsR[i];
     }
 
 double obs, exp, chi_sq = 0.0;
 for (i = 0; i < 4; i++)
     {
-    obs = ((double) votesL[i] * 100.0) / (double) totalL + pseudo;
-    exp = ((double) votesR[i] * 100.0) / (double) totalR + pseudo;
+    obs = ((double) countsL[i] * 100.0) / (double) totalL + pseudo;
+    exp = ((double) countsR[i] * 100.0) / (double) totalR + pseudo;
     chi_sq += pow(abs(obs - exp) - 0.5, 2.0) / exp;
     }
 
 return chi_sq;
 }
 
 
-static int which_model(double homo, double het, double tri)
+static int which_model(double homo, double het)
 {
-if (homo <= het && homo <= tri)
+if (homo <= het)
     return 1;
-if (het <= homo && het <= tri)
+else if (homo > het)
     return 2;
-if (tri <= homo && tri <= het)
-    return 3;
 return 0;
 }
 
 
 static int model_select(pileup_data_t *d, analysis_params_t *ap, 
 			char *callL1, char *callL2, char *callR1, char *callR2, 
 			float *hetL, float *hetR, int *stateL, int *stateR, int *sites)
 {
 if (majorR.vote == 0 || majorL.vote == 0)
     return 0;  // most likely a bad quality region.
 
-// might want to check here if the germline & tumor are significantly different, using
-// votesL & votesR
-
-if (!sig_diff(d, ap))
+if (sig_diff(d, ap) < ap->minChiSq)
     return 0;
 
-float freqL = ((float) minorL.vote) / ((float) majorL.vote + (float) minorL.vote);
-float freqR = ((float) minorR.vote) / ((float) majorR.vote + (float) minorR.vote);
+float freqL = ((float) minorL.count) / ((float) majorL.count + (float) minorL.count);
+float freqR = ((float) minorR.count) / ((float) majorR.count + (float) minorR.count);
 
 *hetL = freqL;
 *hetR = freqR;
 
 *sites += 1;
 
-double modelR_homo = chi_square(majorR.vote, minorR.vote, 1.0);
-double modelR_het  = chi_square(majorR.vote, minorR.vote, 0.5);
-double modelR_tri  = chi_square(majorR.vote, minorR.vote, 0.66);
-
-double modelL_homo = chi_square(majorL.vote, minorL.vote, 1.0);
-double modelL_het  = chi_square(majorL.vote, minorL.vote, 0.5);
-double modelL_tri  = chi_square(majorL.vote, minorL.vote, 0.66);
+double modelR_homo = chi_square(majorR.count, minorR.count, 1.0);
+double modelR_het  = chi_square(majorR.count, minorR.count, 0.5);
 
-int typeL = which_model(modelL_homo, modelL_het, modelL_tri);
-int typeR = which_model(modelR_homo, modelR_het, modelR_tri);
+double modelL_homo = chi_square(majorL.count, minorL.count, 1.0);
+double modelL_het  = chi_square(majorL.count, minorL.count, 0.5);
+
+int typeL = which_model(modelL_homo, modelL_het);
+int typeR = which_model(modelR_homo, modelR_het);
 
 *stateL = typeL;
 *stateR = typeR;
 
 *callL1 = majorL.nuc;
 *callL2 = minorL.nuc;
 *callR1 = majorR.nuc;
 *callR2 = minorR.nuc;
 
 int retVal = 0;
 if (typeR == 1)
     {  // normal most likely homozygous.
     if (typeL == 1)
 	{  // tumor is also homozygous.
 	if (majorL.nuc != majorR.nuc) // homozygous SNP
 	    retVal = 1;
 	}
     else if (typeL == 2)
 	retVal = 1;
-    else if (typeL == 3)
-	retVal = 1;
     }
-else if (typeR == 2 || typeR == 3)
+else if (typeR == 2)
     {  // normal most likely heterozygous.
     if (typeL == 1)
 	retVal = 1; // tumor is homozygous... LOH!
-    else if (typeL == 2 || typeL == 3)
+    else if (typeL == 2)
 	{ // tumor also heterozygous.
 	if (majorL.nuc == majorR.nuc && minorL.nuc == minorR.nuc)
 	    retVal = 0;
 	else if (majorL.nuc == minorR.nuc && minorL.nuc == majorR.nuc)
 	    retVal = 0;
 	else
 	    retVal = 1;
 	}
     }
 
 return retVal;
 }
 
-/** usually set to static, removed to avoid compiler warning/error */
-
-int snp_variant(pileup_data_t *d, analysis_params_t *ap, 
-		       char *callL1, char *callL2, char *callR1, char *callR2, 
-		       float *hetL, float *hetR, int *sites)
-{
-float freqL = ((float) minorL.vote) / ((float) majorL.vote + (float) minorL.vote);
-float freqR = ((float) minorR.vote) / ((float) majorR.vote + (float) minorR.vote);
-
-*hetL = freqL;
-*hetR = freqR;
-
-*sites += 1;
-
-if (freqR > HET_THRESH && freqL <= HET_THRESH) 
-    { // hetero in norm, homo in normal (LOH?)
-    if (majorL.nuc != majorR.nuc)
-	{
-	*callL1 = majorL.nuc;
-	*callL2 = minorL.nuc;  
-	*callR1 = majorR.nuc;
-	*callR2 = minorR.nuc;
-	return 1;
-	}
-    else if (majorL.nuc != minorR.nuc)
-	{
-	*callL1 = majorL.nuc;
-	*callL2 = minorL.nuc;
-	*callR1 = minorR.nuc;
-	*callR2 = majorR.nuc;
-	return 1;
-	}
-    else 
-	return 0;
-    }
-else if (freqR > HET_THRESH && freqL > HET_THRESH) 
-    { // hetero in both tumor and normal
-    if ((majorL.nuc == majorR.nuc) && (minorL.nuc == minorR.nuc))
-	return 0;  // same nucs
-    else if ((minorL.nuc == majorR.nuc) && ( majorL.nuc == minorR.nuc))
-	return 0;  // same nucs, just changed proportions.
-    else if ((majorL.nuc != majorR.nuc) && (minorL.nuc == minorR.nuc))
-	{
-	*callL1 = majorL.nuc;
-	*callL2 = minorL.nuc;
-	*callR1 = majorR.nuc;
-	*callR2 = minorR.nuc;
-	return 1;
-	}
-    else if ((majorL.nuc == majorR.nuc) && (minorL.nuc != minorR.nuc))
-	{
-	*callL1 = minorL.nuc;
-	*callL2 = majorL.nuc;
-	*callR1 = minorR.nuc;
-	*callR2 = majorR.nuc;
-	return 1;
-	}
-    else if ((majorL.nuc == minorR.nuc) && (minorL.nuc != majorR.nuc))
-	{
-	*callL1 = minorL.nuc;
-	*callL2 = majorL.nuc;
-	*callR1 = majorR.nuc;
-	*callR2 = minorR.nuc;
-	return 1;
-	}
-    else if ((minorL.nuc == majorR.nuc) && (majorL.nuc != minorR.nuc))
-	{
-	*callL1 = majorL.nuc;
-	*callL2 = minorL.nuc;
-	*callR1 = minorR.nuc;
-	*callR2 = majorR.nuc;
-	return 1;
-	}
-    else
-	return 0;
-    }
-else if (freqR <= HET_THRESH && freqL > HET_THRESH) 
-    { // homo in right (germline), hetero in left (tumor)
-    if (majorL.nuc != majorR.nuc)
-	{
-	*callL1 = majorL.nuc;
-	*callL2 = minorL.nuc;
-	*callR1 = majorR.nuc;
-	*callR2 = minorR.nuc;
-	return 1;
-	}
-    else if (minorL.nuc != majorR.nuc)
-	{
-	*callL1 = minorL.nuc;
-	*callL2 = majorL.nuc;
-	*callR1 = majorR.nuc;
-	*callR2 = minorR.nuc;
-	return 1;
-	}
-    }
-else if (freqR <= HET_THRESH && freqL <= HET_THRESH) 
-    {  // homozygous in right and left (tumor)
-    if (majorL.nuc != majorR.nuc)
-	{
-	*callL1 = majorL.nuc;
-	*callL2 = minorL.nuc;
-	*callR1 = majorR.nuc;
-	*callR2 = minorR.nuc;
-	return 1;
-	}
-    }
-
-return 0;   // not a SNP variant.
-}
-
 #define NUM_CHROMS 100
 static int mateChromsL[NUM_CHROMS];
 static int mateChromsR[NUM_CHROMS];
 static int somaticSV[NUM_CHROMS];
 static int germlineSV[NUM_CHROMS];
 
 static int struct_variant(pileup_data_t *d, analysis_params_t *ap)
 {
 int i;
 for (i = 0; i < NUM_CHROMS; ++i)
     {
     mateChromsL[i] = 0;
     mateChromsR[i] = 0;
     somaticSV[i]   = 0;
     germlineSV[i]  = 0;
     }
 
 const bam_pileup1_t *p;
+int avgMapQ = 0;
 for (i = 0; i < d->nL; ++i)
     {
     p = d->puL + i;
+    avgMapQ += p->b->core.qual;
+    }
+if (((double)avgMapQ / (double) d->nL) < (double)ap->avgMapQ)
+    return 0;
 
-    if (p->b->core.qual < minMapQ)
+avgMapQ = 0;
+for (i = 0; i < d->nR; ++i)
+    {
+    p = d->puR + i;    
+    avgMapQ += p->b->core.qual;
+    }
+if (((double)avgMapQ / (double) d->nR) < (double) ap->avgMapQ)
+    return 0;
+
+for (i = 0; i < d->nL; ++i)
+    {
+    p = d->puL + i;
+
+    if (p->b->core.qual < ap->minMapQ)
 	continue;
 
     bam1_core_t *c = &p->b->core;
     if (((c)->flag & BAM_FPAIRED) && !((c)->flag & BAM_FUNMAP))
 	{ // read is paired and mapped.
 	if (!((c)->flag & BAM_FMUNMAP))
 	    {  // mate is mapped
 	    if ((c)->mtid != (c)->tid) // different chroms.
 		mateChromsL[(c)->mtid]++;
 	    }
 	}
     }
 
 for (i = 0; i < d->nR; ++i)
     {
     p = d->puR + i;
 
-    if (p->b->core.qual < minMapQ)
+    if (p->b->core.qual < ap->minMapQ)
 	continue;
 
     bam1_core_t *c = &p->b->core;
     if (((c)->flag & BAM_FPAIRED) && !((c)->flag & BAM_FUNMAP))
 	{ // read is paired and mapped.
 	if (!((c)->flag & BAM_FMUNMAP))
 	    {  // mate is mapped
 	    if ((c)->mtid != (c)->tid)  // different chroms
 		mateChromsR[(c)->mtid]++;
 	    }
 	}
     }
 
 int retVal = 0;
 for (i = 0; i < NUM_CHROMS; i++)
     {
     if (mateChromsL[i] < ap->minSupp)
 	continue;
     retVal = 1;
     if (mateChromsR[i] < ap->minSupp)
 	somaticSV[i]  = mateChromsL[i];
     else
 	germlineSV[i] = mateChromsL[i] + mateChromsR[i];
     }
 
 return retVal;
 }
 
+static int indel_variant(pileup_data_t *d, analysis_params_t *ap, 
+			 int *isIns, int *isDel, int *insGerm, int *delGerm)
+{
+if (insL >= ap->minSupp)
+    {
+    *isIns = 1;
+    if (insR >= ap->minSupp)
+	*insGerm = 1;
+    else 
+	*insGerm = 0;
+    return 1;
+    }
+
+if (delL >= ap->minSupp)
+    {
+    *isDel = 1;
+    if (delR >= ap->minSupp)
+	*delGerm = 1;
+    else
+	*delGerm = 0;
+    return 1;
+    }
+
+return 0;
+}
+
+static void print_seq(uint32_t tid, uint32_t pos, int n, 
+		      const bam_pileup1_t *pu, pileup_data_t *d)
+{
+int i, j, rb;
+rb = (d->ref && (int)pos < d->len)? d->ref[pos] : 'N';
+for (i = 0; i < n; ++i)
+    {
+    const bam_pileup1_t *p = pu + i;
+    if (p->b->core.qual < minMapQ)
+	continue;
+
+    int qual = bam1_qual(p->b)[p->qpos];    
+    if (qual < minQ)
+	continue;
+
+    if (p->is_head)
+	printf("^%c", p->b->core.qual > 93 ? 126 : p->b->core.qual + 33);
+    if (!p->is_del)
+	{
+	int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
+	if (c == '=' || toupper(c) == toupper(rb))
+	    c = bam1_strand(p->b) ? ',' : '.';
+	else
+	    c = bam1_strand(p->b) ? tolower(c) : toupper(c);
+	putchar(c);
+	if (p->indel > 0)
+	    {
+	    printf("+%d", p->indel);
+	    for (j = 1; j <= p->indel; ++j)
+		{
+		c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
+		putchar(bam1_strand(p->b) ? tolower(c) : toupper(c));
+		}
+	    }
+	else if (p->indel < 0)
+	    {
+	    printf("%d", p->indel);
+	    for (j = 1; j <= -p->indel; ++j)
+		{
+		c = (d->ref && (int)pos+j < d->len)? d->ref[pos+j] : 'N';
+		putchar(bam1_strand(p->b) ? tolower(c) : toupper(c));
+		}
+	    }
+	}
+    else
+	putchar('*');
+    if (p->is_tail)
+	putchar('$');
+    }
+putchar('\t');
+
+// print base quality
+for (i = 0; i < n; ++i)
+    {
+    const bam_pileup1_t *p = pu + i;
+    if (p->b->core.qual < minMapQ)
+	continue;
+
+    int qual = bam1_qual(p->b)[p->qpos];    
+    if (qual < minQ)
+	continue;
+
+    int c = qual + 33;
+    if (c > 126)
+	c = 126;
+    putchar(c);
+    }
+putchar('\t');
+
+// print mapping quality
+for (i = 0; i < n; ++i)
+    {
+    const bam_pileup1_t *p = pu + i;
+    int qual = p->b->core.qual;
+    if (qual < minMapQ)
+	continue;
+
+    if (bam1_qual(p->b)[p->qpos] < minQ)
+	continue;
+
+    int c = qual + 33;
+    if (c > 126)
+	c = 126;
+    putchar(c);
+    }
+}
+
 static void perform_analysis(void *data)
 {
 pileup_data_t *d = (pileup_data_t*)data;
 analysis_params_t *ap = (analysis_params_t*)d->func_data;
 
 int i;
 int stateL, stateR;
 char callL1, callL2, callR1, callR2;
 float hetL, hetR;
 
 nuc_counts(d, ap);
 
-// remember, BAM data is starts at base zero, not base one like browser.
-/***
-if (snp_variant(d, ap, &callL1, &callL2, &callR1, &callR2, &hetL, &hetR, &ap->sites))
-    {
-    printf("chr%s\t%d\t%d\t", 
-	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);
-
-    if (hetR > HET_THRESH && hetL > HET_THRESH) // both hetero
-	printf("SNP:%c/%c(%f)>%c/%c(%f)\t", callR1, callR2, 1.0-hetR, callL1, callL2, 1.0-hetL);
-    else if (hetR <= HET_THRESH && hetL <= HET_THRESH) // both homo
-	printf("SNP:%c(%f)>%c(%f)\t", callR1, 1.0-hetR, callL1, 1.0-hetL);	
-    else if (hetR > HET_THRESH && hetL <= HET_THRESH) // germ hetero, tumor hom
-	printf("LOH:%c/%c(%f)>%c(%f)\t", callR1, callR2, 1.0-hetR, callL1, 1.0-hetL);
-    else if (hetR <= HET_THRESH && hetL > HET_THRESH) // germ homo, tumor hetero
-	printf("GOH:%c(%f)>%c/%c(%f)\t", callR1, 1.0-hetR, callL1, callL2, 1.0-hetL);
-    else
-	printf("error\t");  // should never get here.
-
-
-    fflush(stdout);
-    }
-***/
-
 if (model_select(d, ap, &callL1, &callL2, &callR1, &callR2, 
 		 &hetL, &hetR, &stateL, &stateR, &ap->sites))
     {
-    printf("chr%s\t%d\t%d\t", 
+    printf("%s\t%d\t%d\t", 
 	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);
 
     if (stateR == 1)
 	printf("Hom:%c(%f)>", callR1, 1.0-hetR);
     else if (stateR == 2)
 	printf("Het:%c/%c(%f)>", callR1, callR2, 1.0-hetR);
-    else if (stateR == 3)
-	printf("Tri:%c/%c(%f)>", callR1, callR2, 1.0-hetR);
     
     if (stateL == 1)
 	printf("Hom:%c(%f)\t", callL1, 1.0-hetL);
     else if (stateL == 2)
 	printf("Het:%c/%c(%f)\t", callL1, callL2, 1.0-hetL);
-    else if (stateL == 3)
-	printf("Tri:%c/%c(%f)\t", callL1, callL2, 1.0-hetL);
 
     double chi_sq = sig_diff(d, ap);
     printf("%f\t", chi_sq);
 
-    for (i = 0; i < 4; i++)
-	printf("%c:%d,%d ", NUCS[i], votesR[i], countsR[i]);
+    print_seq(d->tidR, d->posR, d->nR, d->puR, d);
     putchar('\t');
+    print_seq(d->tidL, d->posL, d->nL, d->puL, d);
+    putchar('\n');
+
+    fflush(stdout);
+    }
+
+int isIns = 0, isDel = 0, insGerm = 0, delGerm = 0;
+if (indel_variant(d, ap, &isIns, &isDel, &insGerm, &delGerm))
+    {
+    printf("%s\t%d\t%d\t", 
+	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);
+
+    if (isIns && insGerm)
+	printf("GIns(%d,%d),", insR, insL);
+    else if (isIns && !insGerm)
+	printf("SIns(%d,%d),", insR, insL);
+    
+    if (isDel && delGerm)
+	printf("GDel(%d,%d)", delR, delL);
+    else if (isDel && !delGerm)
+	printf("SDel(%d,%d)", delR, delL);
     
-    for (i = 0; i < 4; i++)
-	printf("%c:%d,%d ", NUCS[i], votesL[i], countsL[i]);
+    putchar('\t');
+    print_seq(d->tidR, d->posR, d->nR, d->puR, d);
+    putchar('\t');
+    print_seq(d->tidL, d->posL, d->nL, d->puL, d);
     putchar('\n');
+
     fflush(stdout);
     }
 
 if (struct_variant(d, ap))
     {
-    printf("chr%s\t%d\t%d\t", 
+    printf("%s\t%d\t%d\t", 
 	   d->hL->target_name[d->tidL], d->posL, d->posL + 1);
 
     for (i = 0; i < NUM_CHROMS; ++i)
 	{
 	if (somaticSV[i] > 0)
 	    printf("SSV:%s(%d),", d->hL->target_name[i], somaticSV[i]);
 	if (germlineSV[i] > 0)
 	    printf("GSV:%s(%d),", d->hL->target_name[i], germlineSV[i]);
 	}
+    putchar('\t');
+    print_seq(d->tidR, d->posR, d->nR, d->puR, d);
+    putchar('\t');
+    print_seq(d->tidL, d->posL, d->nL, d->puL, d);
     putchar('\n');
+
     fflush(stdout);
     }
 }
 
 void bamBam(char *inbamL, char *inbamR)
 {
 analysis_params_t ap;
 ap.minQ    = minQ;
 ap.minMapQ = minMapQ;
+ap.avgMapQ = avgMapQ;
 ap.minSupp = minSupport;
+ap.minChiSq = minChiSq;
 ap.sites   = 0;
 bam_bam_file(inbamL, inbamR, perform_analysis, &ap);
 fprintf(stderr, "# qualified sites = %d\n", ap.sites);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 // Print out command line as a comment.
 int i;
 putchar('#');
 for (i = 0; i < argc; i++)
     printf(" %s", argv[i]);
 putchar('\n');
 
-
 optionInit(&argc, argv, options);
 if (argc < 3)
     usage();
 
 if (optionExists("minSupport"))
     minSupport = optionInt("minSupport", 2);
 if (optionExists("minQ"))
     minQ = optionInt("minQ", 0);
 if (optionExists("minMapQ"))
     minMapQ = optionInt("minMapQ", 0);
+if (optionExists("avgMapQ"))
+    avgMapQ = optionInt("avgMapQ", 0);
+if (optionExists("minChiSq"))
+    minChiSq = optionInt("minChiSq", 0);
 
 char *inbamL = argv[1];
 char *inbamR = argv[2];
 bamBam(inbamL, inbamR);
 
 return 0;
 }
 
 #else // USE_BAM not defined
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 printf("BAM not installed");
 
 return 0;
 }
 #endif