src/hg/tcga/bamBam/bamBamCNV.c 1.1

1.1 2009/11/06 20:27:29 jsanborn
added file
Index: src/hg/tcga/bamBam/bamBamCNV.c
===================================================================
RCS file: src/hg/tcga/bamBam/bamBamCNV.c
diff -N src/hg/tcga/bamBam/bamBamCNV.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/tcga/bamBam/bamBamCNV.c	6 Nov 2009 20:27:29 -0000	1.1
@@ -0,0 +1,271 @@
+/* bamBam -- discovering variants from BAM file
+ *   code adapted from Heng Li's Samtools library functions
+ */
+
+#ifdef USE_BAM
+
+#include "common.h"
+#include "hCommon.h"
+#include "linefile.h"
+#include "hash.h"
+#include "hdb.h"
+#include "options.h"
+#include "jksql.h"
+
+#define _IOLIB 2
+#include "bam.h"
+#include "sam.h"
+#include "bam_dual_pileup.h"
+
+static char const rcsid[] = "$Id";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "bamBamCNV - Perform copy number analysis on two BAM files.\n"
+  "usage:\n"
+  "   bamBamCNV [options] <left.bam> <right.bam>\n"
+  "options:\n"
+  "   -position=chrX:1-100 - Position to do analysis. [None]\n" 
+  "   -minMapQ=INT - Minimum acceptable mapping quality. [0]\n"
+  "   -readsPerBin - Number of RIGHT reads to put in each cnv bin. [500]\n" 
+  "\n"
+  );
+}
+
+static struct optionSpec options[] = {
+    {"position", OPTION_STRING},
+    {"minMapQ", OPTION_INT},
+    {"readsPerBin", OPTION_INT},
+    {NULL, 0}
+};
+
+#define BAD_TID -9999
+
+/* Option variables */
+char *position  = NULL;
+int minMapQ     = 0;
+int readsPerBin = 500;
+
+typedef struct {
+    int minMapQ;
+    int readsPerBin;
+    int sites;
+    
+    int32_t tid;
+
+    int32_t lstart;
+    int32_t lstop;
+    int lcounts;
+
+    int32_t rstart;
+    int32_t rstop;
+    int rcounts;
+
+    int32_t prevTid;
+    int32_t prevStart;
+    int32_t prevStop;    
+} analysis_params_t;
+
+static int calc_cnv(pileup_data_t *d, analysis_params_t *ap)
+{
+if ((ap->tid != BAD_TID) && (ap->tid != d->tidL))  // onto next chromosome
+    return 1;
+
+int i;
+const bam_pileup1_t *p;
+if ((ap->prevTid != BAD_TID) && (ap->prevTid == d->tidL))
+    {
+    for (i = 0; i < d->nL; ++i)
+	{
+	p = d->puL + i;
+	bam1_core_t *c = &p->b->core;
+	if ((c)->pos < ap->prevStop)
+	    return 0;
+	}
+
+    for (i = 0; i < d->nR; ++i)
+	{
+	p = d->puR + i;
+	bam1_core_t *c = &p->b->core;
+	if ((c)->pos < ap->prevStop)
+	    return 0;
+	}
+    }
+
+ap->tid = d->tidL;
+int32_t start, stop, rstart = ap->rstart, rstop = ap->rstop;
+int32_t prevPos = -1;
+for (i = 0; i < d->nR; ++i)
+    {
+    p = d->puR + i;
+    bam1_core_t *c = &p->b->core;
+    if ((c)->qual < ap->minMapQ)
+	continue;
+
+    if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
+	{ // read is mapped and not an optical/PCR dupe 
+	start = (c)->pos;
+	stop  = start + (c)->l_qseq;
+
+	if (rstart == -1)
+	    {
+	    rstart = start;
+	    rstop  = stop;
+	    ap->rcounts += 1;
+	    }
+	else if ((start > prevPos) && (start > rstart) && (stop > rstop))
+	    {  // make sure there are no possible dupes by removing all by 
+	       // comparing left-most position of each read, only allowing one per pos.
+	    prevPos = start;
+	    rstop  = stop;
+	    ap->rcounts += 1;
+	    }
+	}
+    }
+
+int32_t lstart = ap->lstart, lstop = ap->lstop;
+prevPos = -1;
+for (i = 0; i < d->nL; ++i)
+    {
+    p = d->puL + i;
+    bam1_core_t *c = &p->b->core;
+    if ((c)->qual < ap->minMapQ)
+	continue;
+
+    if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
+	{ // read is mapped and not an optical/PCR dupe 
+	start = (c)->pos;
+	stop  = start + (c)->l_qseq;
+
+	if (!((start >= rstart) && (stop <= rstop)))
+	    continue;
+
+	if (lstart == -1)
+	    {
+	    lstart = start;
+	    lstop  = stop;
+	    ap->lcounts += 1;
+	    }
+	else if ((start > prevPos) && (start > lstart && stop > lstop))
+	    {  // make sure there are no possible dupes by removing all by 
+	       // comparing left-most position of each read, only allowing one per pos.
+	    prevPos = start;
+	    lstop = stop;
+	    ap->lcounts += 1;
+	    }
+	}
+    }
+
+ap->lstart = lstart;
+ap->lstop  = lstop;
+
+ap->rstart = rstart;
+ap->rstop  = rstop;
+
+if (ap->rcounts >= ap->readsPerBin)
+    return 1;
+
+return 0;
+}
+
+static void perform_analysis(void *data)
+{
+pileup_data_t *d = (pileup_data_t*)data;
+analysis_params_t *ap = (analysis_params_t*)d->func_data;
+
+if (calc_cnv(d, ap))
+    {
+    //int32_t start = min(ap->lstart, ap->rstart);
+    //int32_t stop = max(ap->lstop, ap->rstop);
+    //int32_t len  = (stop - start) + 1;
+	
+    printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\n", 
+	   d->hL->target_name[ap->tid], 
+	   ap->lstart, ap->lstop + 1, 
+	   ap->rstart, ap->rstop + 1, 
+	   ap->lcounts, ap->rcounts);
+
+    fflush(stdout);
+
+    ap->prevTid   = ap->tid;
+    ap->prevStart = ap->rstart;
+    ap->prevStop  = ap->rstop;
+
+    ap->tid   = BAD_TID;
+    ap->lstart = -1;
+    ap->lstop  = -1;
+    ap->lcounts = 0;
+    
+    ap->rstart = -1;
+    ap->rstop  = -1;
+    ap->rcounts = 0;
+    }
+
+ap->sites += 1;
+}
+
+void bamBam(char *inbamL, char *inbamR)
+{
+analysis_params_t ap;
+ap.minMapQ = minMapQ;
+ap.sites   = 0;
+ap.readsPerBin = readsPerBin;
+
+ap.tid = BAD_TID;
+ap.lstart = -1;
+ap.lstop  = -1;
+ap.lcounts = 0;
+
+ap.rstart = -1;
+ap.rstop  = -1;
+ap.rcounts = 0;
+
+ap.prevTid = BAD_TID;
+ap.prevStart = -1;
+ap.prevStop  = -1;
+
+bam_bam_file(inbamL, inbamR, position, perform_analysis, &ap);
+fprintf(stderr, "# qualified sites = %d\n", ap.sites);
+}
+
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+// Print out command line as a comment.
+int i;
+putchar('#');
+for (i = 0; i < argc; i++)
+    printf(" %s", argv[i]);
+putchar('\n');
+
+optionInit(&argc, argv, options);
+if (argc < 3)
+    usage();
+
+if (optionExists("minMapQ"))
+    minMapQ = optionInt("minMapQ", 0);
+if (optionExists("readsPerBin"))
+    readsPerBin = optionInt("readsPerBin", 0);
+if (optionExists("position"))
+    position = optionVal("position", NULL);
+
+char *inbamL = argv[1];
+char *inbamR = argv[2];
+bamBam(inbamL, inbamR);
+
+return 0;
+}
+
+#else // USE_BAM not defined
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+printf("BAM not installed");
+
+return 0;
+}
+#endif