src/hg/tcga/bamBam/bamBamCNV.c 1.2

1.2 2009/11/06 20:29:26 jsanborn
added file
Index: src/hg/tcga/bamBam/bamBamCNV.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamBam/bamBamCNV.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/tcga/bamBam/bamBamCNV.c	6 Nov 2009 20:27:29 -0000	1.1
+++ src/hg/tcga/bamBam/bamBamCNV.c	6 Nov 2009 20:29:26 -0000	1.2
@@ -1,271 +1,266 @@
 /* bamBam -- discovering variants from BAM file
  *   code adapted from Heng Li's Samtools library functions
  */
 
 #ifdef USE_BAM
 
 #include "common.h"
 #include "hCommon.h"
 #include "linefile.h"
 #include "hash.h"
 #include "hdb.h"
 #include "options.h"
 #include "jksql.h"
 
 #define _IOLIB 2
 #include "bam.h"
 #include "sam.h"
 #include "bam_dual_pileup.h"
 
 static char const rcsid[] = "$Id";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bamBamCNV - Perform copy number analysis on two BAM files.\n"
   "usage:\n"
   "   bamBamCNV [options] <left.bam> <right.bam>\n"
   "options:\n"
   "   -position=chrX:1-100 - Position to do analysis. [None]\n" 
   "   -minMapQ=INT - Minimum acceptable mapping quality. [0]\n"
   "   -readsPerBin - Number of RIGHT reads to put in each cnv bin. [500]\n" 
   "\n"
   );
 }
 
 static struct optionSpec options[] = {
     {"position", OPTION_STRING},
     {"minMapQ", OPTION_INT},
     {"readsPerBin", OPTION_INT},
     {NULL, 0}
 };
 
 #define BAD_TID -9999
 
 /* Option variables */
 char *position  = NULL;
 int minMapQ     = 0;
 int readsPerBin = 500;
 
 typedef struct {
     int minMapQ;
     int readsPerBin;
     int sites;
     
     int32_t tid;
 
     int32_t lstart;
     int32_t lstop;
     int lcounts;
 
     int32_t rstart;
     int32_t rstop;
     int rcounts;
 
     int32_t prevTid;
     int32_t prevStart;
     int32_t prevStop;    
 } analysis_params_t;
 
 static int calc_cnv(pileup_data_t *d, analysis_params_t *ap)
 {
 if ((ap->tid != BAD_TID) && (ap->tid != d->tidL))  // onto next chromosome
     return 1;
 
 int i;
 const bam_pileup1_t *p;
 if ((ap->prevTid != BAD_TID) && (ap->prevTid == d->tidL))
     {
     for (i = 0; i < d->nL; ++i)
 	{
 	p = d->puL + i;
 	bam1_core_t *c = &p->b->core;
 	if ((c)->pos < ap->prevStop)
 	    return 0;
 	}
 
     for (i = 0; i < d->nR; ++i)
 	{
 	p = d->puR + i;
 	bam1_core_t *c = &p->b->core;
 	if ((c)->pos < ap->prevStop)
 	    return 0;
 	}
     }
 
 ap->tid = d->tidL;
 int32_t start, stop, rstart = ap->rstart, rstop = ap->rstop;
 int32_t prevPos = -1;
 for (i = 0; i < d->nR; ++i)
     {
     p = d->puR + i;
     bam1_core_t *c = &p->b->core;
     if ((c)->qual < ap->minMapQ)
 	continue;
 
     if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
 	{ // read is mapped and not an optical/PCR dupe 
 	start = (c)->pos;
 	stop  = start + (c)->l_qseq;
 
 	if (rstart == -1)
 	    {
 	    rstart = start;
 	    rstop  = stop;
 	    ap->rcounts += 1;
 	    }
 	else if ((start > prevPos) && (start > rstart) && (stop > rstop))
 	    {  // make sure there are no possible dupes by removing all by 
 	       // comparing left-most position of each read, only allowing one per pos.
 	    prevPos = start;
 	    rstop  = stop;
 	    ap->rcounts += 1;
 	    }
 	}
     }
 
 int32_t lstart = ap->lstart, lstop = ap->lstop;
 prevPos = -1;
 for (i = 0; i < d->nL; ++i)
     {
     p = d->puL + i;
     bam1_core_t *c = &p->b->core;
     if ((c)->qual < ap->minMapQ)
 	continue;
 
     if ( !((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FDUP) )
 	{ // read is mapped and not an optical/PCR dupe 
 	start = (c)->pos;
 	stop  = start + (c)->l_qseq;
 
 	if (!((start >= rstart) && (stop <= rstop)))
 	    continue;
 
 	if (lstart == -1)
 	    {
 	    lstart = start;
 	    lstop  = stop;
 	    ap->lcounts += 1;
 	    }
 	else if ((start > prevPos) && (start > lstart && stop > lstop))
 	    {  // make sure there are no possible dupes by removing all by 
 	       // comparing left-most position of each read, only allowing one per pos.
 	    prevPos = start;
 	    lstop = stop;
 	    ap->lcounts += 1;
 	    }
 	}
     }
 
 ap->lstart = lstart;
 ap->lstop  = lstop;
 
 ap->rstart = rstart;
 ap->rstop  = rstop;
 
 if (ap->rcounts >= ap->readsPerBin)
     return 1;
 
 return 0;
 }
 
 static void perform_analysis(void *data)
 {
 pileup_data_t *d = (pileup_data_t*)data;
 analysis_params_t *ap = (analysis_params_t*)d->func_data;
 
 if (calc_cnv(d, ap))
     {
-    //int32_t start = min(ap->lstart, ap->rstart);
-    //int32_t stop = max(ap->lstop, ap->rstop);
-    //int32_t len  = (stop - start) + 1;
-	
     printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\n", 
 	   d->hL->target_name[ap->tid], 
-	   ap->lstart, ap->lstop + 1, 
-	   ap->rstart, ap->rstop + 1, 
-	   ap->lcounts, ap->rcounts);
+	   ap->lstart, ap->lstop + 1, ap->lcounts,
+	   ap->rstart, ap->rstop + 1, ap->rcounts);
 
     fflush(stdout);
 
     ap->prevTid   = ap->tid;
     ap->prevStart = ap->rstart;
     ap->prevStop  = ap->rstop;
 
     ap->tid   = BAD_TID;
     ap->lstart = -1;
     ap->lstop  = -1;
     ap->lcounts = 0;
     
     ap->rstart = -1;
     ap->rstop  = -1;
     ap->rcounts = 0;
     }
 
 ap->sites += 1;
 }
 
 void bamBam(char *inbamL, char *inbamR)
 {
 analysis_params_t ap;
 ap.minMapQ = minMapQ;
 ap.sites   = 0;
 ap.readsPerBin = readsPerBin;
 
 ap.tid = BAD_TID;
 ap.lstart = -1;
 ap.lstop  = -1;
 ap.lcounts = 0;
 
 ap.rstart = -1;
 ap.rstop  = -1;
 ap.rcounts = 0;
 
 ap.prevTid = BAD_TID;
 ap.prevStart = -1;
 ap.prevStop  = -1;
 
 bam_bam_file(inbamL, inbamR, position, perform_analysis, &ap);
 fprintf(stderr, "# qualified sites = %d\n", ap.sites);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 // Print out command line as a comment.
 int i;
 putchar('#');
 for (i = 0; i < argc; i++)
     printf(" %s", argv[i]);
 putchar('\n');
 
 optionInit(&argc, argv, options);
 if (argc < 3)
     usage();
 
 if (optionExists("minMapQ"))
     minMapQ = optionInt("minMapQ", 0);
 if (optionExists("readsPerBin"))
     readsPerBin = optionInt("readsPerBin", 0);
 if (optionExists("position"))
     position = optionVal("position", NULL);
 
 char *inbamL = argv[1];
 char *inbamR = argv[2];
 bamBam(inbamL, inbamR);
 
 return 0;
 }
 
 #else // USE_BAM not defined
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 printf("BAM not installed");
 
 return 0;
 }
 #endif