src/hg/tcga/bamToFastQ/bamToFastQ.c 1.2
1.2 2010/03/10 00:21:30 jsanborn
updated
Index: src/hg/tcga/bamToFastQ/bamToFastQ.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamToFastQ/bamToFastQ.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 1000000 -r1.1 -r1.2
--- src/hg/tcga/bamToFastQ/bamToFastQ.c 9 Mar 2010 23:25:57 -0000 1.1
+++ src/hg/tcga/bamToFastQ/bamToFastQ.c 10 Mar 2010 00:21:30 -0000 1.2
@@ -1,146 +1,177 @@
/* bamVariants -- discovering variants from BAM file
* code adapted from Heng Li's Samtools library functions
*/
#ifdef USE_BAM
#include "common.h"
#include "hCommon.h"
#include "linefile.h"
#include "hash.h"
#include "hdb.h"
#include "options.h"
#include "jksql.h"
#define _IOLIB 2
#include "bam.h"
#include "sam.h"
static char const rcsid[] = "$Id$";
void usage()
/* Explain usage and exit. */
{
errAbort(
"bamVariants - Find variants in input BAM file, output position/type of variant.\n"
"usage:\n"
- " bamVariants [options] <in.bam>\n"
+ " bamVariants [options] <in.bam> <out_read1.fq> <out_read2.fq>\n"
"options:\n"
" -position=<chrom position> - Specify a region of BAM file to perform pileup\n"
"\n"
);
}
static struct optionSpec options[] = {
{"position", OPTION_STRING},
{NULL, 0}
};
/* Option variables */
char *position = NULL;
typedef struct {
int beg, end;
samfile_t *in;
} tmpstruct_t;
-int readNum = 2;
+
+typedef struct {
+ FILE *f1, *f2;
+} readfiles_t;
// callback for bam_fetch()
static int fetch_func(const bam1_t *b, void *data)
{
-/* sequence name */
-putchar('@');
-if (readNum == 2)
+int readNum = 0;
+const bam1_core_t *c = &b->core;
+
+readfiles_t *files = (readfiles_t*) data;
+FILE *fp = NULL;
+if ((c)->flag & BAM_FREAD1)
+ {
+ fp = files->f1;
readNum = 1;
-else
+ }
+else if ((c)->flag & BAM_FREAD2)
+ {
+ fp = files->f2;
readNum = 2;
-printf("%s/%d", bam1_qname(b), readNum);
-putchar('\n');
+ }
+if (!fp)
+ return 0;
+
+/* sequence name */
+fprintf(fp, "@%s/%d\n", bam1_qname(b), readNum);
/* sequence */
int i, n = (b)->core.l_qseq;
for (i = 0; i < n; ++i)
{
int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]);
- putchar(c);
+ fputc(c, fp);
}
-putchar('\n');
+fputc('\n', fp);
/* sequence base qualities */
-putchar('+');
-putchar('\n');
+fputc('+', fp);
+fputc('\n', fp);
for (i = 0; i < n; ++i)
{
int c = bam1_qual(b)[i] + 33;
if (c > 126)
c = 126;
- putchar(c);
+ fputc(c, fp);
}
-putchar('\n');
+fputc('\n', fp);
return 0;
}
-void bamToFastQ(char *inbam)
+void bamToFastQ(char *inbam, char *fileRead1, char *fileRead2)
/* bamToFastQ - Convert BAM -> FastQ format */
{
tmpstruct_t tmp;
tmp.beg = 0; tmp.end = 0x7fffffff;
tmp.in = samopen(inbam, "rb", NULL);
if (tmp.in == NULL)
errAbort("samopen(%s, \"rb\") returned NULL", inbam);
-if (!position)
- { // if a region is not specified
- errAbort("Need to specify position!");
- }
-int ref;
-bam_index_t *idx;
-idx = bam_index_load(inbam); // load BAM index
-if (!idx)
+readfiles_t files;
+files.f1 = fopen(fileRead1, "w");
+files.f2 = fopen(fileRead2, "w");
+
+if (position)
+ {
+ int ref;
+ bam_index_t *idx;
+ idx = bam_index_load(inbam); // load BAM index
+ if (!idx)
{
samclose(tmp.in);
errAbort("BAM indexing file is not available.\n");
}
-bam_parse_region(tmp.in->header, position, &ref,
+ bam_parse_region(tmp.in->header, position, &ref,
&tmp.beg, &tmp.end); // parse the region
-if (ref < 0)
+ if (ref < 0)
{
samclose(tmp.in);
errAbort("Invalid region %s\n", position);
}
-bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, NULL, fetch_func);
-bam_index_destroy(idx);
-samclose(tmp.in);
+ bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, &files, fetch_func);
+ bam_index_destroy(idx);
+ samclose(tmp.in);
+ }
+else
+ { // region not specified
+ int ret;
+ bam1_t *b = bam_init1();
+ while ((ret = samread(tmp.in, b)) >= 0)
+ fetch_func(b, &files); // read through entire file
+ bam_destroy1(b);
+ }
+
+fclose(files.f1);
+fclose(files.f2);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
-if (argc < 2)
+if (argc < 4)
usage();
if (optionExists("position"))
position = optionVal("position", NULL);
char *inbam = argv[1];
-bamToFastQ(inbam);
+char *fileRead1 = argv[2];
+char *fileRead2 = argv[3];
+bamToFastQ(inbam, fileRead1, fileRead2);
return 0;
}
#else // USE_BAM not defined
int main(int argc, char *argv[])
/* Process command line. */
{
printf("BAM not installed");
return 0;
}
#endif