src/hg/tcga/bamToFastQ/bamToFastQ.c 1.1

1.1 2010/03/09 23:25:57 jsanborn
initial commit
Index: src/hg/tcga/bamToFastQ/bamToFastQ.c
===================================================================
RCS file: src/hg/tcga/bamToFastQ/bamToFastQ.c
diff -N src/hg/tcga/bamToFastQ/bamToFastQ.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/tcga/bamToFastQ/bamToFastQ.c	9 Mar 2010 23:25:57 -0000	1.1
@@ -0,0 +1,146 @@
+/* bamVariants -- discovering variants from BAM file
+ *   code adapted from Heng Li's Samtools library functions
+ */
+
+#ifdef USE_BAM
+
+#include "common.h"
+#include "hCommon.h"
+#include "linefile.h"
+#include "hash.h"
+#include "hdb.h"
+#include "options.h"
+#include "jksql.h"
+
+#define _IOLIB 2
+#include "bam.h"
+#include "sam.h"
+
+static char const rcsid[] = "$Id$";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "bamVariants - Find variants in input BAM file, output position/type of variant.\n"
+  "usage:\n"
+  "   bamVariants [options] <in.bam>\n"
+  "options:\n"
+  "   -position=<chrom position> - Specify a region of BAM file to perform pileup\n"
+  "\n"
+  );
+}
+
+static struct optionSpec options[] = {
+    {"position", OPTION_STRING},
+    {NULL, 0}
+};
+
+/* Option variables */
+char *position = NULL;
+
+typedef struct {
+    int beg, end;
+    samfile_t *in;
+} tmpstruct_t;
+
+int readNum = 2;
+
+// callback for bam_fetch()
+static int fetch_func(const bam1_t *b, void *data)
+{
+/* sequence name */
+putchar('@');
+if (readNum == 2)
+    readNum = 1;
+else
+    readNum = 2;
+printf("%s/%d", bam1_qname(b), readNum);
+putchar('\n');
+
+/* sequence */
+int i, n = (b)->core.l_qseq;
+for (i = 0; i < n; ++i)
+    {
+    int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]);
+    putchar(c);
+    }
+putchar('\n');
+
+/* sequence base qualities */
+putchar('+');
+putchar('\n');
+for (i = 0; i < n; ++i)
+    {
+    int c = bam1_qual(b)[i] + 33;
+    if (c > 126)
+	c = 126;
+    putchar(c);
+    }
+putchar('\n');
+
+return 0;
+}
+
+
+void bamToFastQ(char *inbam)
+/* bamToFastQ - Convert BAM -> FastQ format */
+{
+tmpstruct_t tmp;  
+tmp.beg = 0; tmp.end = 0x7fffffff;
+tmp.in = samopen(inbam, "rb", NULL);
+if (tmp.in == NULL)
+    errAbort("samopen(%s, \"rb\") returned NULL", inbam);
+
+if (!position) 
+    {  // if a region is not specified
+    errAbort("Need to specify position!");
+    } 
+
+int ref;
+bam_index_t *idx;
+idx = bam_index_load(inbam); // load BAM index
+if (!idx) 
+    {
+    samclose(tmp.in);
+    errAbort("BAM indexing file is not available.\n");
+    }
+bam_parse_region(tmp.in->header, position, &ref,
+		 &tmp.beg, &tmp.end); // parse the region
+if (ref < 0) 
+    {
+    samclose(tmp.in);
+    errAbort("Invalid region %s\n", position);
+    }
+
+bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, NULL, fetch_func);
+bam_index_destroy(idx);
+samclose(tmp.in);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc < 2)
+    usage();
+
+if (optionExists("position"))
+    position = optionVal("position", NULL);
+
+char *inbam = argv[1];
+bamToFastQ(inbam);
+
+return 0;
+}
+
+#else // USE_BAM not defined
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+printf("BAM not installed");
+
+return 0;
+}
+#endif