11e45667d4e291b3038ccda729a1cdf5bcaf004a
braney
Mon Jul 11 15:46:54 2016 -0700
incorporate htslib in kent src, remove USE_BAM, USE_SAMTABIX, USE_TABIX
defines, modify a bunch of makefiles to include kentSrc variable
pointing to top of the tree.
diff --git src/inc/bamFile.h src/inc/bamFile.h
index 4c27828..499402c 100644
--- src/inc/bamFile.h
+++ src/inc/bamFile.h
@@ -1,98 +1,74 @@
/* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */
#ifndef BAMFILE_H
#define BAMFILE_H
#include "dnaseq.h"
#include "dystring.h"
-#ifdef USE_BAM
// bam.h is incomplete without _IOLIB set to 1, 2 or 3. 2 is used by Makefile.generic:
#ifndef _IOLIB
#define _IOLIB 2
#endif
-#ifdef USE_HTS
#include "htslib/sam.h"
typedef samFile samfile_t;
typedef hts_idx_t bam_index_t;
typedef bam_hdr_t bam_header_t;
typedef int (*bam_fetch_f)(const bam1_t *bam, void *data, bam_hdr_t *header) ;
#define samopen(a,b,c) sam_open(a,b)
#define samclose(a) sam_close(a)
#define bam1_qname bam_get_qname
#define bam1_qual bam_get_qual
#define bam1_aux bam_get_aux
#define bam1_cigar bam_get_cigar
#define bam1_seq bam_get_seq
#define bam1_seqi bam_seqi
#define bam_nt16_rev_table seq_nt16_str
#define data_len l_data
-#else
-#include "bam.h"
-#include "sam.h"
-#endif
-
-#else // no USE_BAM
-typedef struct { } bam1_t;
-typedef struct { } bam_index_t;
-typedef struct { } samfile_t;
-typedef int (*bam_fetch_f)(const bam1_t *b, void *data);
-#define COMPILE_WITH_SAMTOOLS "%s: in order to use this functionality you must " \
- "install the samtools library (http://samtools.sourceforge.net) and recompile kent/src with " \
- "USE_BAM=1 in your environment " \
- "(see http://genomewiki.ucsc.edu/index.php/Build_Environment_Variables)."
-
-#endif // USE_BAM
struct bamChromInfo
{
struct bamChromInfo *next;
char *name; /* Chromosome name */
bits32 size; /* Chromosome size in bases */
};
boolean bamFileExists(char *bamFileName);
/* Return TRUE if we can successfully open the bam file and its index file. */
void bamFileAndIndexMustExist(char *fileOrUrl);
/* Open both a bam file and its accompanying index or errAbort; this is what it
* takes for diagnostic info to propagate up through errCatches in calling code. */
samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName);
/* Return an open bam file as well as the filename of the bam. */
samfile_t *bamMustOpenLocal(char *fileName, char *mode, void *extraHeader);
/* Open up sam or bam file or die trying. The mode parameter is
* "r" - open SAM to read
* "rb" - open BAM to read
* "w" - open SAM to write
* "wb" - open BAM to write
* The extraHeader is generally NULL in the read case, and the write case
* contains a pointer to a bam_header_t with information about the header.
* The implementation is just a wrapper around samopen from the samtools library
* that aborts with error message if there's a problem with the open. */
-#ifdef USE_HTS
void bamFetchAlreadyOpen(samfile_t *samfile, bam_hdr_t *header, bam_index_t *idx, char *bamFileName,
-#else
-void bamFetchAlreadyOpen(samfile_t *samfile, bam_index_t *idx, char *bamFileName,
-#endif
char *position, bam_fetch_f callbackFunc, void *callbackData);
/* With the open bam file, return items the same way with the callbacks as with bamFetch() */
/* except in this case use an already-open bam file and index (use bam_index_load and free() for */
/* the index). It seems a little strange to pass the filename in with the open bam, but */
/* it's just used to report errors. */
void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
samfile_t **pSamFile, char *refUrl, char *cacheDir);
/* Open the .bam file, fetch items in the seq:start-end position range,
* and call callbackFunc on each bam item retrieved from the file plus callbackData.
* This handles BAM files with "chr"-less sequence names, e.g. from Ensembl.
* The pSamFile parameter is optional. If non-NULL it will be filled in, just for
* the benefit of the callback function, with the open samFile.
* refUrl points to the place to grab CRAM reference sequences (if any)
* cacheDir points to the directory in which CRAM reference sequences are cached */
@@ -104,46 +80,41 @@
* This handles BAM files with "chr"-less sequence names, e.g. from Ensembl.
* The pSamFile parameter is optional. If non-NULL it will be filled in, just for
* the benefit of the callback function, with the open samFile. */
void bamClose(samfile_t **pSamFile);
/* Close down a samefile_t */
boolean bamIsRc(const bam1_t *bam);
/* Return TRUE if alignment is on - strand. */
INLINE int bamUnpackCigarElement(unsigned int packed, char *retOp)
/* Given an unsigned int containing a number of bases and an offset into an
* array of BAM-enhanced-CIGAR ASCII characters (operations), store operation
* char into *retOp (retOp must not be NULL) and return the number of bases. */
{
-#ifdef USE_BAM
// decoding lifted from samtools bam.c bam_format1_core(), long may it remain stable:
#define BAM_DOT_C_OPCODE_STRING "MIDNSHP=X"
int n = packed>>BAM_CIGAR_SHIFT;
int opcode = packed & BAM_CIGAR_MASK;
if (opcode >= strlen(BAM_DOT_C_OPCODE_STRING))
errAbort("bamUnpackCigarElement: unrecognized opcode %d. "
"(I only recognize 0..%lu [" BAM_DOT_C_OPCODE_STRING "]) "
"Perhaps samtools bam.c's bam_format1 encoding changed? If so, update me.",
opcode, (unsigned long)(strlen(BAM_DOT_C_OPCODE_STRING)-1));
*retOp = BAM_DOT_C_OPCODE_STRING[opcode];
return n;
-#else // no USE_BAM
-errAbort(COMPILE_WITH_SAMTOOLS, "bamUnpackCigarElement");
-return 0;
-#endif// USE_BAM
}
void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen);
/* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
* the beginning of the query sequence and quality; likewise for retHigh at end.
* For convenience, retClippedQLen is the original query length minus soft clipping
* (and the length of the query sequence that will be returned). */
void bamUnpackQuerySequence(const bam1_t *bam, boolean useStrand, char *qSeq);
/* Fill in qSeq with the nucleotide sequence encoded in bam. The BAM format
* reverse-complements query sequence when the alignment is on the - strand,
* so if useStrand is given we rev-comp it back to restore the original query
* sequence. */
char *bamGetQuerySequence(const bam1_t *bam, boolean useStrand);