src/hg/inc/bamFile.h 1.2

1.2 2009/08/03 22:00:24 angie
Libified more commonly used BAM code from hgc and hgTracks.
Index: src/hg/inc/bamFile.h
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/inc/bamFile.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 4 -r1.1 -r1.2
--- src/hg/inc/bamFile.h	27 Jul 2009 21:52:08 -0000	1.1
+++ src/hg/inc/bamFile.h	3 Aug 2009 22:00:24 -0000	1.2
@@ -13,5 +13,33 @@
  * and call callbackFunc on each bam item retrieved from the file plus callbackData. 
  * Note: if sequences in .bam file don't begin with "chr" but db's do, skip the "chr"
  * at the beginning of the position. */
 
+INLINE int bamUnpackCigarElement(unsigned int packed, char *retOp)
+/* Given an unsigned int containing a number of bases and an offset into an
+ * array of BAM-enhanced-CIGAR ASCII characters (operations), store operation 
+ * char into *retOp (retOp must not be NULL) and return the number of bases. */
+{
+// decoding lifted from samtools bam.c bam_format1(), long may it remain stable:
+#define BAM_DOT_C_OPCODE_STRING "MIDNSHP"
+int n = packed>>BAM_CIGAR_SHIFT;
+int opcode = packed & BAM_CIGAR_MASK;
+if (opcode >= strlen(BAM_DOT_C_OPCODE_STRING))
+    errAbort("bamUnpackCigarElement: unrecognized opcode %d. "
+	     "(I only recognize 0..%lu [" BAM_DOT_C_OPCODE_STRING "])  "
+	     "Perhaps samtools bam.c's bam_format1 encoding changed?  If so, update me.",
+	     opcode, strlen(BAM_DOT_C_OPCODE_STRING)-1);
+*retOp = BAM_DOT_C_OPCODE_STRING[opcode];
+return n;
+}
+
+char *bamGetQuerySequence(const bam1_t *bam);
+/* Return the nucleotide sequence encoded in bam. */
+
+char *bamGetCigar(const bam1_t *bam);
+/* Return a BAM-enhanced CIGAR string, decoded from the packed encoding in bam. */
+
+int bamGetTargetLength(const bam1_t *bam);
+/* Tally up the alignment's length on the reference sequence from
+ * bam's packed-int CIGAR representation. */
+
 #endif//ndef BAMFILE_H