src/hg/tcga/bamToFastQ/bamToFastQ.c 1.3
1.3 2010/03/11 04:38:52 jsanborn
updated to correctly output paired ends
Index: src/hg/tcga/bamToFastQ/bamToFastQ.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/tcga/bamToFastQ/bamToFastQ.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/tcga/bamToFastQ/bamToFastQ.c 10 Mar 2010 00:21:30 -0000 1.2
+++ src/hg/tcga/bamToFastQ/bamToFastQ.c 11 Mar 2010 04:38:52 -0000 1.3
@@ -22,8 +22,10 @@
/* Explain usage and exit. */
{
errAbort(
"bamVariants - Find variants in input BAM file, output position/type of variant.\n"
+ " BAM files MUST be sorted by name to produce FASTQ files appropriate for \n"
+ " alignment by MAQ, BWA, etc.\n"
"usage:\n"
" bamVariants [options] <in.bam> <out_read1.fq> <out_read2.fq>\n"
"options:\n"
" -position=<chrom position> - Specify a region of BAM file to perform pileup\n"
@@ -46,54 +48,111 @@
typedef struct {
FILE *f1, *f2;
+ bam1_t *b1, *b2;
} readfiles_t;
-// callback for bam_fetch()
-static int fetch_func(const bam1_t *b, void *data)
+static inline void printForward(FILE *fp, const bam1_t *b)
{
-int readNum = 0;
-const bam1_core_t *c = &b->core;
-
-readfiles_t *files = (readfiles_t*) data;
-FILE *fp = NULL;
-if ((c)->flag & BAM_FREAD1)
+int i, n = (b)->core.l_qseq;
+for (i = 0; i < n; ++i)
{
- fp = files->f1;
- readNum = 1;
+ int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]);
+ fputc(c, fp);
}
-else if ((c)->flag & BAM_FREAD2)
+fputc('\n', fp);
+
+/* sequence base qualities */
+fputc('+', fp);
+fputc('\n', fp);
+for (i = 0; i < n; ++i)
{
- fp = files->f2;
- readNum = 2;
+ int c = bam1_qual(b)[i] + 33;
+ if (c > 126)
+ c = 126;
+ fputc(c, fp);
}
-if (!fp)
- return 0;
+fputc('\n', fp);
+}
-/* sequence name */
-fprintf(fp, "@%s/%d\n", bam1_qname(b), readNum);
+//char *bam_nt16_rev_table = "=ACMGRSVTWYHKDBN";
-/* sequence */
+char *bam_nt16_reverse_table = "=TGMCRSVAWYHKDBN";
+static inline void printReverse(FILE *fp, const bam1_t *b)
+{
int i, n = (b)->core.l_qseq;
-for (i = 0; i < n; ++i)
+for (i = n-1; i >= 0; --i)
{
- int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]);
+ int c = toupper(bam_nt16_reverse_table[bam1_seqi(bam1_seq(b), i)]);
fputc(c, fp);
}
fputc('\n', fp);
/* sequence base qualities */
fputc('+', fp);
fputc('\n', fp);
-for (i = 0; i < n; ++i)
+for (i = n-1; i >= 0; --i)
{
int c = bam1_qual(b)[i] + 33;
if (c > 126)
c = 126;
fputc(c, fp);
}
fputc('\n', fp);
+}
+
+// callback for bam_fetch()
+static inline int fetch_func(const bam1_t *b, void *data)
+{
+const bam1_core_t *c = &b->core;
+
+if (((c)->flag & BAM_FUNMAP) || ((c)->flag & BAM_FDUP) )
+ return 0;
+if (!((c)->flag & BAM_FPAIRED) || ((c)->flag & BAM_FUNMAP))
+ return 0; // read is not paired or unmapped.
+if ((c)->flag & BAM_FMUNMAP)
+ return 0; // mate is unmapped.
+if ((c)->mtid != (c)->tid)
+ return 0; // mate is on same chrom. MERGE INTER CODE HERE?
+
+readfiles_t *files = (readfiles_t *) data;
+if ((c)->flag & BAM_FREAD1)
+ files->b1 = bam_dup1(b);
+else if ((c)->flag & BAM_FREAD2)
+ files->b2 = bam_dup1(b);
+
+if (!files->b1 || !files->b2)
+ return 0;
+
+char *name1 = bam1_qname(files->b1);
+char *name2 = bam1_qname(files->b2);
+
+if (!name1 || !name2)
+ return 0;
+
+if (!sameString(name1, name2)) /* different names */
+ return 0;
+
+/* print read 1 */
+c = &files->b1->core;
+fprintf(files->f1, "@%s/1\n", name1);
+
+/* sequence & quality */
+if ((c)->flag & BAM_FREVERSE)
+ printReverse(files->f1, files->b1);
+else
+ printForward(files->f1, files->b1);
+
+/* print read 2 */
+c = &files->b2->core;
+fprintf(files->f2, "@%s/2\n", name2);
+
+/* sequence & quality */
+if ((c)->flag & BAM_FREVERSE)
+ printReverse(files->f2, files->b2);
+else
+ printForward(files->f2, files->b2);
return 0;
}
@@ -111,8 +170,12 @@
readfiles_t files;
files.f1 = fopen(fileRead1, "w");
files.f2 = fopen(fileRead2, "w");
+// containers to hold read pairs.
+files.b1 = bam_init1();
+files.b2 = bam_init1();
+
if (position)
{
int ref;
bam_index_t *idx;
@@ -144,8 +207,10 @@
}
fclose(files.f1);
fclose(files.f2);
+bam_destroy1(files.b1);
+bam_destroy1(files.b2);
}
int main(int argc, char *argv[])
/* Process command line. */