d2c7e8be3ceae6027a5274faafdf6239d778cc63
aamp
Sat Jul 16 08:31:16 2011 -0700
Changes related to change source filenames around: bamUdc.ch -> bamFile.ch and bamFile.ch -> hgBam.ch
diff --git src/inc/bamFile.h src/inc/bamFile.h
new file mode 100644
index 0000000..080b6a4
--- /dev/null
+++ src/inc/bamFile.h
@@ -0,0 +1,132 @@
+/* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */
+
+#ifndef BAMFILE_H
+#define BAMFILE_H
+
+#ifdef USE_BAM
+
+// bam.h is incomplete without _IOLIB set to 1, 2 or 3. 2 is used by Makefile.generic:
+#ifndef _IOLIB
+#define _IOLIB 2
+#endif
+#include "bam.h"
+#include "sam.h"
+
+#else // no USE_BAM
+typedef struct { } bam1_t;
+typedef struct { } samfile_t;
+typedef int (*bam_fetch_f)(const bam1_t *b, void *data);
+
+#define COMPILE_WITH_SAMTOOLS "%s: in order to use this functionality you must " \
+ "install the samtools library (http://samtools.sourceforge.net) and recompile kent/src with " \
+ "USE_BAM=1 in your environment " \
+ "(see http://genomewiki.ucsc.edu/index.php/Build_Environment_Variables)."
+
+#endif // USE_BAM
+
+#ifndef DNASEQ_H
+#include "dnaseq.h"
+#endif
+
+#ifndef DYSTRING_H
+#include "dystring.h"
+#endif
+
+boolean bamFileExistsUdc(char *bamFileName, char *udcFuseRoot);
+/* Return TRUE if we can successfully open the bam file and its index file. */
+
+samfile_t *bamOpenUdc(char *fileOrUrl, char **retBamFileName, char *udcFuseRoot);
+/* Return an open bam file, dealing with FUSE caching if need be.
+ * Return parameter if NON-null will return the file name after FUSing */
+
+void bamFetchUdc(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
+ samfile_t **pSamFile, char *udcFuseRoot);
+/* Open the .bam file, fetch items in the seq:start-end position range,
+ * and call callbackFunc on each bam item retrieved from the file plus callbackData.
+ * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl.
+ * The pSamFile parameter is optional. If non-NULL it will be filled in, just for
+ * the benefit of the callback function, with the open samFile. */
+
+void bamClose(samfile_t **pSamFile);
+/* Close down a samefile_t */
+
+boolean bamIsRc(const bam1_t *bam);
+/* Return TRUE if alignment is on - strand. */
+
+INLINE int bamUnpackCigarElement(unsigned int packed, char *retOp)
+/* Given an unsigned int containing a number of bases and an offset into an
+ * array of BAM-enhanced-CIGAR ASCII characters (operations), store operation
+ * char into *retOp (retOp must not be NULL) and return the number of bases. */
+{
+#ifdef USE_BAM
+// decoding lifted from samtools bam.c bam_format1(), long may it remain stable:
+#define BAM_DOT_C_OPCODE_STRING "MIDNSHP"
+int n = packed>>BAM_CIGAR_SHIFT;
+int opcode = packed & BAM_CIGAR_MASK;
+if (opcode >= strlen(BAM_DOT_C_OPCODE_STRING))
+ errAbort("bamUnpackCigarElement: unrecognized opcode %d. "
+ "(I only recognize 0..%lu [" BAM_DOT_C_OPCODE_STRING "]) "
+ "Perhaps samtools bam.c's bam_format1 encoding changed? If so, update me.",
+ opcode, (unsigned long)(strlen(BAM_DOT_C_OPCODE_STRING)-1));
+*retOp = BAM_DOT_C_OPCODE_STRING[opcode];
+return n;
+#else // no USE_BAM
+errAbort(COMPILE_WITH_SAMTOOLS, "bamUnpackCigarElement");
+return 0;
+#endif// USE_BAM
+}
+
+void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen);
+/* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
+ * the beginning of the query sequence and quality; likewise for retHigh at end.
+ * For convenience, retClippedQLen is the original query length minus soft clipping
+ * (and the length of the query sequence that will be returned). */
+
+void bamUnpackQuerySequence(const bam1_t *bam, boolean useStrand, char *qSeq);
+/* Fill in qSeq with the nucleotide sequence encoded in bam. The BAM format
+ * reverse-complements query sequence when the alignment is on the - strand,
+ * so if useStrand is given we rev-comp it back to restore the original query
+ * sequence. */
+
+char *bamGetQuerySequence(const bam1_t *bam, boolean useStrand);
+/* Return the nucleotide sequence encoded in bam. The BAM format
+ * reverse-complements query sequence when the alignment is on the - strand,
+ * so if useStrand is given we rev-comp it back to restore the original query
+ * sequence. */
+
+UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand);
+/* Return the base quality scores encoded in bam as an array of ubytes. */
+
+void bamUnpackCigar(const bam1_t *bam, struct dyString *dyCigar);
+/* Unpack CIGAR string into dynamic string */
+
+char *bamGetCigar(const bam1_t *bam);
+/* Return a BAM-enhanced CIGAR string, decoded from the packed encoding in bam. */
+
+void bamShowCigarEnglish(const bam1_t *bam);
+/* Print out cigar in English e.g. "20 (mis)Match, 1 Deletion, 3 (mis)Match" */
+
+void bamShowFlagsEnglish(const bam1_t *bam);
+/* Print out flags in English, e.g. "Mate is on '-' strand; Properly paired". */
+
+int bamGetTargetLength(const bam1_t *bam);
+/* Tally up the alignment's length on the reference sequence from
+ * bam's packed-int CIGAR representation. */
+
+bam1_t *bamClone(const bam1_t *bam);
+/* Return a newly allocated copy of bam. */
+
+void bamShowTags(const bam1_t *bam);
+/* Print out tags in HTML: bold key, no type indicator for brevity. */
+
+char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize);
+/* If bam's tags include the given 2-character tag, place the value into
+ * buf (zero-terminated, trunc'd if nec) and return a pointer to buf,
+ * or NULL if tag is not present. */
+
+void bamUnpackAux(const bam1_t *bam, struct dyString *dy);
+/* Unpack the tag:type:val part of bam into dy */
+
+#endif//ndef BAMFILE_H