src/hg/tcga/bamToFastQ/bamToFastQ.c 1.2

1.2 2010/03/10 00:21:30 jsanborn
updated
Index: src/hg/tcga/bamToFastQ/bamToFastQ.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamToFastQ/bamToFastQ.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/tcga/bamToFastQ/bamToFastQ.c	9 Mar 2010 23:25:57 -0000	1.1
+++ src/hg/tcga/bamToFastQ/bamToFastQ.c	10 Mar 2010 00:21:30 -0000	1.2
@@ -1,146 +1,177 @@
 /* bamVariants -- discovering variants from BAM file
  *   code adapted from Heng Li's Samtools library functions
  */
 
 #ifdef USE_BAM
 
 #include "common.h"
 #include "hCommon.h"
 #include "linefile.h"
 #include "hash.h"
 #include "hdb.h"
 #include "options.h"
 #include "jksql.h"
 
 #define _IOLIB 2
 #include "bam.h"
 #include "sam.h"
 
 static char const rcsid[] = "$Id$";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bamVariants - Find variants in input BAM file, output position/type of variant.\n"
   "usage:\n"
-  "   bamVariants [options] <in.bam>\n"
+  "   bamVariants [options] <in.bam> <out_read1.fq> <out_read2.fq>\n"
   "options:\n"
   "   -position=<chrom position> - Specify a region of BAM file to perform pileup\n"
   "\n"
   );
 }
 
 static struct optionSpec options[] = {
     {"position", OPTION_STRING},
     {NULL, 0}
 };
 
 /* Option variables */
 char *position = NULL;
 
 typedef struct {
     int beg, end;
     samfile_t *in;
 } tmpstruct_t;
 
-int readNum = 2;
+
+typedef struct {
+    FILE *f1, *f2;
+} readfiles_t;
 
 // callback for bam_fetch()
 static int fetch_func(const bam1_t *b, void *data)
 {
-/* sequence name */
-putchar('@');
-if (readNum == 2)
+int readNum = 0;
+const bam1_core_t *c = &b->core;
+
+readfiles_t *files = (readfiles_t*) data;
+FILE *fp = NULL;
+if ((c)->flag & BAM_FREAD1)
+    {
+    fp = files->f1; 
     readNum = 1;
-else
+    }
+else if ((c)->flag & BAM_FREAD2)
+    {
+    fp = files->f2;
     readNum = 2;
-printf("%s/%d", bam1_qname(b), readNum);
-putchar('\n');
+    }
+if (!fp)
+    return 0;
+
+/* sequence name */
+fprintf(fp, "@%s/%d\n", bam1_qname(b), readNum);
 
 /* sequence */
 int i, n = (b)->core.l_qseq;
 for (i = 0; i < n; ++i)
     {
     int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]);
-    putchar(c);
+    fputc(c, fp);
     }
-putchar('\n');
+fputc('\n', fp);
 
 /* sequence base qualities */
-putchar('+');
-putchar('\n');
+fputc('+', fp);
+fputc('\n', fp);
 for (i = 0; i < n; ++i)
     {
     int c = bam1_qual(b)[i] + 33;
     if (c > 126)
 	c = 126;
-    putchar(c);
+    fputc(c, fp);
     }
-putchar('\n');
+fputc('\n', fp);
 
 return 0;
 }
 
 
-void bamToFastQ(char *inbam)
+void bamToFastQ(char *inbam, char *fileRead1, char *fileRead2)
 /* bamToFastQ - Convert BAM -> FastQ format */
 {
 tmpstruct_t tmp;  
 tmp.beg = 0; tmp.end = 0x7fffffff;
 tmp.in = samopen(inbam, "rb", NULL);
 if (tmp.in == NULL)
     errAbort("samopen(%s, \"rb\") returned NULL", inbam);
 
-if (!position) 
-    {  // if a region is not specified
-    errAbort("Need to specify position!");
-    } 
 
-int ref;
-bam_index_t *idx;
-idx = bam_index_load(inbam); // load BAM index
-if (!idx) 
+readfiles_t files;
+files.f1 = fopen(fileRead1, "w");
+files.f2 = fopen(fileRead2, "w");
+
+if (position) 
+    {
+    int ref;
+    bam_index_t *idx;
+    idx = bam_index_load(inbam); // load BAM index
+    if (!idx) 
     {
     samclose(tmp.in);
     errAbort("BAM indexing file is not available.\n");
     }
-bam_parse_region(tmp.in->header, position, &ref,
+    bam_parse_region(tmp.in->header, position, &ref,
 		 &tmp.beg, &tmp.end); // parse the region
-if (ref < 0) 
+    if (ref < 0) 
     {
     samclose(tmp.in);
     errAbort("Invalid region %s\n", position);
     }
 
-bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, NULL, fetch_func);
-bam_index_destroy(idx);
-samclose(tmp.in);
+    bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, &files, fetch_func);
+    bam_index_destroy(idx);
+    samclose(tmp.in);
+    }
+else
+    {  // region not specified 
+    int ret;
+    bam1_t *b = bam_init1();
+    while ((ret = samread(tmp.in, b)) >= 0)
+	fetch_func(b, &files);   // read through entire file
+    bam_destroy1(b);
+    }
+
+fclose(files.f1);
+fclose(files.f2);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
-if (argc < 2)
+if (argc < 4)
     usage();
 
 if (optionExists("position"))
     position = optionVal("position", NULL);
 
 char *inbam = argv[1];
-bamToFastQ(inbam);
+char *fileRead1 = argv[2];
+char *fileRead2 = argv[3];
+bamToFastQ(inbam, fileRead1, fileRead2);
 
 return 0;
 }
 
 #else // USE_BAM not defined
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 printf("BAM not installed");
 
 return 0;
 }
 #endif