src/hg/tcga/bamBam/bamBam.c 1.5
1.5 2009/09/26 18:29:47 jsanborn
updated
Index: src/hg/tcga/bamBam/bamBam.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamBam/bamBam.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 1000000 -r1.4 -r1.5
--- src/hg/tcga/bamBam/bamBam.c 25 Sep 2009 21:16:13 -0000 1.4
+++ src/hg/tcga/bamBam/bamBam.c 26 Sep 2009 18:29:47 -0000 1.5
@@ -1,108 +1,228 @@
/* bamBam -- discovering variants from BAM file
* code adapted from Heng Li's Samtools library functions
*/
#ifdef USE_BAM
#include "common.h"
#include "hCommon.h"
#include "linefile.h"
#include "hash.h"
#include "hdb.h"
#include "options.h"
#include "jksql.h"
#define _IOLIB 2
#include "bam.h"
#include "sam.h"
#include "bam_dual_pileup.h"
static char const rcsid[] = "$Id";
void usage()
/* Explain usage and exit. */
{
errAbort(
"bamBam - Perform analysis on two BAM files.\n"
"usage:\n"
" bamBam [options] <left.bam> <right.bam>\n"
"options:\n"
+ " -minSupport=INT - Minimum Support (i.e. num reads) to call a variant. [2]\n"
+ " -minQ=INT - Minimum acceptable base quality. [0]\n"
+ " -minMapQ=INT - Minimum acceptable mapping quality. [0]\n"
"\n"
);
}
static struct optionSpec options[] = {
+ {"minSupport", OPTION_INT},
+ {"minQ", OPTION_INT},
+ {"minMapQ", OPTION_INT},
{NULL, 0}
};
/* Option variables */
char *position = NULL;
+int minSupport = 2;
+int minQ = 0;
+int minMapQ = 0;
typedef struct {
- int minQual;
- int minMapQual;
+ int minQ;
+ int minMapQ;
+ int minSupp;
} analysis_params;
-static void perform_analysis(void *data)
+static int32_t votesL[4];
+static int32_t countsL[4];
+static int32_t votesR[4];
+static int32_t countsR[4];
+
+static int nuc_index(char c)
{
-pileup_data_t *d = (pileup_data_t*)data;
-analysis_params *ap = (analysis_params*)d->func_data;
+switch (c)
+ {
+ case 'A':
+ return 0;
+ case 'G':
+ return 1;
+ case 'C':
+ return 2;
+ case 'T':
+ return 3;
+ default:
+ return -1;
+ }
+return -1;
+}
-printf("%d\t%d\t%d\t%d/%d\t", d->tidL, d->posL, d->nL, ap->minQual, ap->minMapQual);
+#define NUCS "AGCT"
+
+static int consensus_snp_variant(pileup_data_t *d, analysis_params *ap,
+ char *callL, char *callR)
+{
+int i, index;
+for (i = 0; i < 4; i++)
+ { // sum up the qualities of each base
+ votesL[i] = 0;
+ countsL[i] = 0;
+ votesR[i] = 0;
+ countsR[i] = 0;
+ }
-int i;
const bam_pileup1_t *p;
for (i = 0; i < d->nL; ++i)
{
p = d->puL + i;
+
+ if (p->b->core.qual < ap->minMapQ)
+ continue;
+
int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
- putchar(c);
+ int qual = bam1_qual(p->b)[p->qpos];
+ if (qual < ap->minQ)
+ continue;
+
+ index = nuc_index(c);
+ if ((index = nuc_index(c)) < 0)
+ continue;
+
+ votesL[index] += qual;
+ countsL[index]++;
}
-putchar('\t');
-printf("%d\t", d->nR);
for (i = 0; i < d->nR; ++i)
{
p = d->puR + i;
+
+ if (p->b->core.qual < ap->minMapQ)
+ continue;
+
int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]);
- putchar(c);
+ int qual = bam1_qual(p->b)[p->qpos];
+ if (qual < ap->minQ)
+ continue;
+
+ index = nuc_index(c);
+ if ((index = nuc_index(c)) < 0)
+ continue;
+
+ votesR[index] += qual;
+ countsR[index]++;
}
-putchar('\n');
+
+int iL = -1, maxL = -1, countL = -1;
+int iR = -1, maxR = -1, countR = -1;
+for (i = 0; i < 4; i++)
+ {
+ if (votesL[i] > maxL)
+ {
+ iL = i;
+ maxL = votesL[i];
+ countL = countsL[i];
+ }
+ if (votesR[i] > maxR)
+ {
+ iR = i;
+ maxR = votesR[i];
+ countR = countsR[i];
+ }
+ }
+
+if (iL == -1 || iR == -1 || (iL == iR)) // either no good data or matching consensus.
+ return 0;
+if (countL < ap->minSupp || countR < ap->minSupp)
+ return 0;
+
+*callL = NUCS[iL];
+*callR = NUCS[iR];
+
+return 1;
}
-void bamBam(char *inbamL, char *inbamR)
+static void perform_analysis(void *data)
{
+pileup_data_t *d = (pileup_data_t*)data;
+analysis_params *ap = (analysis_params*)d->func_data;
+
+char callL, callR;
+if (!consensus_snp_variant(d, ap, &callL, &callR))
+ return;
+
+// remember, BAM data is starts at base zero, not base one like browser.
+printf("chr%s\t%d\t%d\t%c,%c\n",
+ d->hL->target_name[d->tidL], d->posL, d->posL + 1, callL, callR);
+fflush(stdout);
+}
+
+void bamBam(char *inbamL, char *inbamR)
+{
analysis_params aps;
-aps.minQual = 50;
-aps.minMapQual = 25;
+aps.minQ = minQ;
+aps.minMapQ = minMapQ;
+aps.minSupp = minSupport;
bam_bam_file(inbamL, inbamR, perform_analysis, &aps);
}
int main(int argc, char *argv[])
/* Process command line. */
{
+int i;
+putchar('#');
+for (i = 0; i < argc; i++)
+ printf(" %s", argv[i]);
+putchar('\n');
+
+
optionInit(&argc, argv, options);
-if (argc != 3)
+if (argc < 3)
usage();
+if (optionExists("minSupport"))
+ minSupport = optionInt("minSupport", 2);
+if (optionExists("minQ"))
+ minQ = optionInt("minQ", 0);
+if (optionExists("minMapQ"))
+ minMapQ = optionInt("minMapQ", 0);
+
char *inbamL = argv[1];
char *inbamR = argv[2];
bamBam(inbamL, inbamR);
return 0;
}
#else // USE_BAM not defined
int main(int argc, char *argv[])
/* Process command line. */
{
printf("BAM not installed");
return 0;
}
#endif