7f5b0b5cf3ed7efc0f9e84aa5bdcc097473c7584 kent Thu Feb 3 21:18:16 2011 -0800 Some preliminary work putting proper bam support in table browser. Still quite a ways to go, but have implemented a way to get a list of alignments in 'sam' format out of a BAM. This I think contains all the info. diff --git src/hg/inc/bamFile.h src/hg/inc/bamFile.h index cb2ea49..40fc1d1 100644 --- src/hg/inc/bamFile.h +++ src/hg/inc/bamFile.h @@ -1,39 +1,59 @@ /* bamFILE -- interface to binary alignment format files using Heng Li's samtools lib. */ #ifndef BAMFILE_H #define BAMFILE_H // bam.h is incomplete without _IOLIB set to 1, 2 or 3. 2 is used by Makefile.generic: #ifndef _IOLIB #define _IOLIB 2 #endif #include "bam.h" #include "sam.h" +#ifndef SAMALIGNMENT_H +#include "samAlignment.h" +#endif + +#ifndef DNASEQ_H +#include "dnaseq.h" +#endif + +#ifndef JKSQL_H +#include "jksql.h" +#endif + char *bamFileNameFromTable(struct sqlConnection *conn, char *table, char *bamSeqName); /* Return file name from table. If table has a seqName column, then grab the * row associated with bamSeqName (which is not nec. in chromInfo, e.g. * bam file might have '1' not 'chr1'). */ boolean bamFileExists(char *bamFileName); /* Return TRUE if we can successfully open the bam file and its index file. */ -void bamFetch(char *bamFileName, char *position, bam_fetch_f callbackFunc, void *callbackData); +void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile); /* Open the .bam file, fetch items in the seq:start-end position range, * and call callbackFunc on each bam item retrieved from the file plus callbackData. - * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. */ + * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. + * The pSamFile parameter is optional. If non-NULL it will be filled in, just for + * the benefit of the callback function, with the open samFile. */ + +struct samAlignment *bamFetchSamAlignment(char *fileOrUrl, char *chrom, int start, int end, + struct lm *lm); +/* Fetch region as a list of samAlignments - which is more or less an unpacked + * bam record. Results is allocated out of lm, since it tends to be large... */ boolean bamIsRc(const bam1_t *bam); /* Return TRUE if alignment is on - strand. */ INLINE int bamUnpackCigarElement(unsigned int packed, char *retOp) /* Given an unsigned int containing a number of bases and an offset into an * array of BAM-enhanced-CIGAR ASCII characters (operations), store operation * char into *retOp (retOp must not be NULL) and return the number of bases. */ { // decoding lifted from samtools bam.c bam_format1(), long may it remain stable: #define BAM_DOT_C_OPCODE_STRING "MIDNSHP" int n = packed>>BAM_CIGAR_SHIFT; int opcode = packed & BAM_CIGAR_MASK; if (opcode >= strlen(BAM_DOT_C_OPCODE_STRING)) errAbort("bamUnpackCigarElement: unrecognized opcode %d. "