0c1d13ef4e3214c4029e098ed5ef708d15e6096f max Mon Nov 21 16:34:53 2016 -0800 CIRM: As per Jim: adding the trackDb tag 'bigDataIndex'. It allows to specify the URL of the .tbi or .bai file, in case you cannot put it alongside the .bam or .vcf.gz file. diff --git src/lib/bamFile.c src/lib/bamFile.c index 001705c..c8c1027 100644 --- src/lib/bamFile.c +++ src/lib/bamFile.c @@ -1,54 +1,64 @@ /* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "portable.h" #include "bamFile.h" #include "htmshell.h" #include "udc.h" #include "psl.h" // If KNETFILE_HOOKS is used (as recommended!), then we can simply call bam_index_load // without worrying about the samtools lib creating local cache files in cgi-bin: -static bam_index_t *bamOpenIdx(samfile_t *sam, char *fileOrUrl) +static bam_index_t *bamOpenIdxAndBai(samfile_t *sam, char *fileOrUrl, char *baiFileOrUrl) /* If fileOrUrl has a valid accompanying .bai file, parse and return the index; - * otherwise return NULL. */ + * otherwise return NULL. baiFileOrUrl can be NULL. */ { if (sam->format.format == cram) return sam_index_load(sam, fileOrUrl); // assume that index is a .bai file char indexName[4096]; +if (baiFileOrUrl==NULL) safef(indexName, sizeof indexName, "%s.bai", fileOrUrl); +else + safef(indexName, sizeof indexName, "%s", baiFileOrUrl); return sam_index_load2(sam, fileOrUrl, indexName); } static void bamCloseIdx(bam_index_t **pIdx) /* Free unless already NULL. */ { if (pIdx != NULL && *pIdx != NULL) { free(*pIdx); // Not freeMem, freez etc -- sam just uses malloc/calloc. *pIdx = NULL; } } +static bam_index_t *bamOpenIdx(samfile_t *sam, char *fileOrUrl) +/* If fileOrUrl has a valid accompanying .bai file, parse and return the index; + * otherwise return NULL. baiFileOrUrl can be NULL. */ +{ +return bamOpenIdxAndBai(sam, fileOrUrl, NULL); +} + boolean bamFileExists(char *fileOrUrl) /* Return TRUE if we can successfully open the bam file and its index file. * NOTE: this doesn't give enough diagnostics */ { char *bamFileName = fileOrUrl; samfile_t *fh = samopen(bamFileName, "rb", NULL); if (fh != NULL) { bam_index_t *idx = bamOpenIdx(fh, bamFileName); samclose(fh); if (idx == NULL) { warn("bamFileExists: failed to read index corresponding to %s", bamFileName); return FALSE; } @@ -106,36 +116,36 @@ if (sf == NULL) errnoAbort("Couldn't open %s.\n", fileName); return sf; } void bamClose(samfile_t **pSamFile) /* Close down a samfile_t */ { if (pSamFile != NULL) { samclose(*pSamFile); *pSamFile = NULL; } } -void bamFileAndIndexMustExist(char *fileOrUrl) +void bamFileAndIndexMustExist(char *fileOrUrl, char *baiFileOrUrl) /* Open both a bam file and its accompanying index or errAbort; this is what it * takes for diagnostic info to propagate up through errCatches in calling code. */ { samfile_t *bamF = bamOpen(fileOrUrl, NULL); -bam_index_t *idx = bamOpenIdx(bamF, fileOrUrl); +bam_index_t *idx = bamOpenIdxAndBai(bamF, fileOrUrl, baiFileOrUrl); if (idx == NULL) errAbort("failed to read index file (.bai) corresponding to %s", fileOrUrl); bamCloseIdx(&idx); bamClose(&bamF); } void bamFetchAlreadyOpen(samfile_t *samfile, bam_hdr_t *header, bam_index_t *idx, char *bamFileName, char *position, bam_fetch_f callbackFunc, void *callbackData) /* With the open bam file, return items the same way with the callbacks as with bamFetch() */ /* except in this case use an already-open bam file and index (use bam_index_load and free() for */ /* the index). It seems a little strange to pass the filename in with the open bam, but */ /* it's just used to report errors. */ { bam1_t *b; AllocVar(b); @@ -176,60 +186,68 @@ mustGetLine(downFile, errorBuf, sizeof errorBuf); errAbort("cannot find reference %s. Error: %s\n", md5String,errorBuf); } else { if ((downFile = fopen(pendingFile, "w")) == NULL) errAbort("cannot find reference %s. Cannot create file %s.",md5String, pendingFile); fprintf(downFile, "%s\n", server); fclose(downFile); errAbort("Cannot find reference %s. Downloading from %s. Please refresh screen to check download status.",md5String, server); } } } } -void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, +void bamAndIndexFetchPlus(char *fileOrUrl, char *baiFileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, samfile_t **pSamFile, char *refUrl, char *cacheDir) -/* Open the .bam file, fetch items in the seq:start-end position range, +/* Open the .bam file with the .bai index specified by baiFileOrUrl. + * baiFileOrUrl can be NULL and defaults to <fileOrUrl>.bai. + * Fetch items in the seq:start-end position range, * and call callbackFunc on each bam item retrieved from the file plus callbackData. * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. * The pSamFile parameter is optional. If non-NULL it will be filled in, just for * the benefit of the callback function, with the open samFile. */ { char *bamFileName = NULL; samfile_t *fh = bamOpen(fileOrUrl, &bamFileName); if (fh->format.format == cram) { if (cacheDir == NULL) errAbort("CRAM cache dir hg.conf variable (cramRef) must exist for CRAM support"); cram_set_cache_url(fh, cacheDir, refUrl); } bam_hdr_t *header = sam_hdr_read(fh); if (pSamFile != NULL) *pSamFile = fh; -bam_index_t *idx = bamOpenIdx(fh, bamFileName); +bam_index_t *idx = bamOpenIdxAndBai(fh, bamFileName, baiFileOrUrl); if (idx == NULL) warn("bam_index_load(%s) failed.", bamFileName); else { bamFetchAlreadyOpen(fh, header, idx, bamFileName, position, callbackFunc, callbackData); bamCloseIdx(&idx); } bamClose(&fh); } +void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile, char *refUrl, char *cacheDir) +{ +bamAndIndexFetchPlus(fileOrUrl, NULL, position, callbackFunc, callbackData, pSamFile, refUrl, cacheDir); +} + void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, samfile_t **pSamFile) { bamFetchPlus(fileOrUrl, position, callbackFunc, callbackData, pSamFile, NULL, NULL); } boolean bamIsRc(const bam1_t *bam) /* Return TRUE if alignment is on - strand. */ { const bam1_core_t *core = &bam->core; return (core->flag & BAM_FREVERSE); } void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)