src/hg/tcga/bamToFastQ/bamToFastQ.c 1.1
1.1 2010/03/09 23:25:57 jsanborn
initial commit
Index: src/hg/tcga/bamToFastQ/bamToFastQ.c
===================================================================
RCS file: src/hg/tcga/bamToFastQ/bamToFastQ.c
diff -N src/hg/tcga/bamToFastQ/bamToFastQ.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/tcga/bamToFastQ/bamToFastQ.c 9 Mar 2010 23:25:57 -0000 1.1
@@ -0,0 +1,146 @@
+/* bamVariants -- discovering variants from BAM file
+ * code adapted from Heng Li's Samtools library functions
+ */
+
+#ifdef USE_BAM
+
+#include "common.h"
+#include "hCommon.h"
+#include "linefile.h"
+#include "hash.h"
+#include "hdb.h"
+#include "options.h"
+#include "jksql.h"
+
+#define _IOLIB 2
+#include "bam.h"
+#include "sam.h"
+
+static char const rcsid[] = "$Id$";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+ "bamVariants - Find variants in input BAM file, output position/type of variant.\n"
+ "usage:\n"
+ " bamVariants [options] <in.bam>\n"
+ "options:\n"
+ " -position=<chrom position> - Specify a region of BAM file to perform pileup\n"
+ "\n"
+ );
+}
+
+static struct optionSpec options[] = {
+ {"position", OPTION_STRING},
+ {NULL, 0}
+};
+
+/* Option variables */
+char *position = NULL;
+
+typedef struct {
+ int beg, end;
+ samfile_t *in;
+} tmpstruct_t;
+
+int readNum = 2;
+
+// callback for bam_fetch()
+static int fetch_func(const bam1_t *b, void *data)
+{
+/* sequence name */
+putchar('@');
+if (readNum == 2)
+ readNum = 1;
+else
+ readNum = 2;
+printf("%s/%d", bam1_qname(b), readNum);
+putchar('\n');
+
+/* sequence */
+int i, n = (b)->core.l_qseq;
+for (i = 0; i < n; ++i)
+ {
+ int c = toupper(bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]);
+ putchar(c);
+ }
+putchar('\n');
+
+/* sequence base qualities */
+putchar('+');
+putchar('\n');
+for (i = 0; i < n; ++i)
+ {
+ int c = bam1_qual(b)[i] + 33;
+ if (c > 126)
+ c = 126;
+ putchar(c);
+ }
+putchar('\n');
+
+return 0;
+}
+
+
+void bamToFastQ(char *inbam)
+/* bamToFastQ - Convert BAM -> FastQ format */
+{
+tmpstruct_t tmp;
+tmp.beg = 0; tmp.end = 0x7fffffff;
+tmp.in = samopen(inbam, "rb", NULL);
+if (tmp.in == NULL)
+ errAbort("samopen(%s, \"rb\") returned NULL", inbam);
+
+if (!position)
+ { // if a region is not specified
+ errAbort("Need to specify position!");
+ }
+
+int ref;
+bam_index_t *idx;
+idx = bam_index_load(inbam); // load BAM index
+if (!idx)
+ {
+ samclose(tmp.in);
+ errAbort("BAM indexing file is not available.\n");
+ }
+bam_parse_region(tmp.in->header, position, &ref,
+ &tmp.beg, &tmp.end); // parse the region
+if (ref < 0)
+ {
+ samclose(tmp.in);
+ errAbort("Invalid region %s\n", position);
+ }
+
+bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, NULL, fetch_func);
+bam_index_destroy(idx);
+samclose(tmp.in);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc < 2)
+ usage();
+
+if (optionExists("position"))
+ position = optionVal("position", NULL);
+
+char *inbam = argv[1];
+bamToFastQ(inbam);
+
+return 0;
+}
+
+#else // USE_BAM not defined
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+printf("BAM not installed");
+
+return 0;
+}
+#endif