0c1d13ef4e3214c4029e098ed5ef708d15e6096f max Mon Nov 21 16:34:53 2016 -0800 CIRM: As per Jim: adding the trackDb tag 'bigDataIndex'. It allows to specify the URL of the .tbi or .bai file, in case you cannot put it alongside the .bam or .vcf.gz file. diff --git src/inc/bamFile.h src/inc/bamFile.h index 935b8e2..91883ba 100644 --- src/inc/bamFile.h +++ src/inc/bamFile.h @@ -26,55 +26,66 @@ #define bam1_seqi bam_seqi #define bam_nt16_rev_table seq_nt16_str #define data_len l_data struct bamChromInfo { struct bamChromInfo *next; char *name; /* Chromosome name */ bits32 size; /* Chromosome size in bases */ }; boolean bamFileExists(char *bamFileName); /* Return TRUE if we can successfully open the bam file and its index file. */ -void bamFileAndIndexMustExist(char *fileOrUrl); +void bamFileAndIndexMustExist(char *fileOrUrl, char *baiFileOrUrl); /* Open both a bam file and its accompanying index or errAbort; this is what it - * takes for diagnostic info to propagate up through errCatches in calling code. */ + * takes for diagnostic info to propagate up through errCatches in calling code. + * The parameter baiFileOrUrl can be NULL, defaults of <fileOrUrl>.bai. */ samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName); /* Return an open bam file as well as the filename of the bam. */ samfile_t *bamMustOpenLocal(char *fileName, char *mode, void *extraHeader); /* Open up sam or bam file or die trying. The mode parameter is * "r" - open SAM to read * "rb" - open BAM to read * "w" - open SAM to write * "wb" - open BAM to write * The extraHeader is generally NULL in the read case, and the write case * contains a pointer to a bam_header_t with information about the header. * The implementation is just a wrapper around samopen from the samtools library * that aborts with error message if there's a problem with the open. */ void bamFetchAlreadyOpen(samfile_t *samfile, bam_hdr_t *header, bam_index_t *idx, char *bamFileName, char *position, bam_fetch_f callbackFunc, void *callbackData); /* With the open bam file, return items the same way with the callbacks as with bamFetch() */ /* except in this case use an already-open bam file and index (use bam_index_load and free() for */ /* the index). It seems a little strange to pass the filename in with the open bam, but */ /* it's just used to report errors. */ +void bamAndIndexFetchPlus(char *fileOrUrl, char *baiFileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile, char *refUrl, char *cacheDir); +/* Open the .bam file with the .bai index specified by baiFileOrUrl. + * baiFileOrUrl can be NULL and defaults to <fileOrUrl>.bai. + * Fetch items in the seq:start-end position range, + * and call callbackFunc on each bam item retrieved from the file plus callbackData. + * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. + * The pSamFile parameter is optional. If non-NULL it will be filled in, just for + * the benefit of the callback function, with the open samFile. */ + void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, samfile_t **pSamFile, char *refUrl, char *cacheDir); /* Open the .bam file, fetch items in the seq:start-end position range, * and call callbackFunc on each bam item retrieved from the file plus callbackData. * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. * The pSamFile parameter is optional. If non-NULL it will be filled in, just for * the benefit of the callback function, with the open samFile. * refUrl points to the place to grab CRAM reference sequences (if any) * cacheDir points to the directory in which CRAM reference sequences are cached */ void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, samfile_t **pSamFile); /* Open the .bam file, fetch items in the seq:start-end position range, * and call callbackFunc on each bam item retrieved from the file plus callbackData. * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl.