fdb4b5d215ff23b0f066a9b7d7a96ae31045a347 braney Fri Jan 15 12:27:29 2016 -0800 yet more work on support CRAM under htslib diff --git src/lib/bamFile.c src/lib/bamFile.c index c050b22..9ba386b 100644 --- src/lib/bamFile.c +++ src/lib/bamFile.c @@ -208,78 +208,127 @@ /* it's just used to report errors. */ { #ifdef USE_HTS bam1_t *b; AllocVar(b); hts_itr_t *iter = sam_itr_querys(idx, header, position); if (iter == NULL && startsWith("chr", position)) iter = sam_itr_querys(idx, header, position + strlen("chr")); if (iter == NULL) return; int result; while ((result = sam_itr_next(samfile, iter, b)) >= 0) callbackFunc(b, callbackData, header); +// if we're reading a CRAM file and the MD5 string has been set +// we know there was an error finding the reference and we need +// to request that it be loaded. +if (samfile->format.format == cram) + { + char *md5String = cram_get_Md5(samfile); + if (!isEmpty(md5String)) + { + char server[4096]; + char pendingFile[4096]; + char errorFile[4096]; + + char *refPath = cram_get_ref_url(samfile); + char *cacheDir = cram_get_cache_dir(samfile); + + sprintf(server, refPath, md5String); + sprintf(pendingFile, "%s/pending/%s",cacheDir,md5String); + sprintf(errorFile, "%s/error/%s",cacheDir,md5String); + makeDirsOnPath(pendingFile); + FILE *downFile; + if ((downFile = fopen(errorFile, "r")) != NULL) + { + char errorBuf[4096]; + mustGetLine(downFile, errorBuf, sizeof errorBuf); + errAbort("cannot find reference %s. Error: %s\n", md5String,errorBuf); + } + else + { + if ((downFile = fopen(pendingFile, "w")) == NULL) + errAbort("cannot find reference %s. Cannot create file %s.",md5String, pendingFile); + fprintf(downFile, "%s\n", server); + fclose(downFile); + errAbort("cannot find reference %s. Downloading from %s. Pending in %s",md5String, server, pendingFile); + } + } + } #else int chromId, start, end; int ret = bam_parse_region(samfile->header, position, &chromId, &start, &end); if (ret != 0 && startsWith("chr", position)) ret = bam_parse_region(samfile->header, position+strlen("chr"), &chromId, &start, &end); if (ret != 0) // If the bam file does not cover the current chromosome, OK return; ret = bam_fetch(samfile->x.bam, idx, chromId, start, end, callbackData, callbackFunc); if (ret != 0) warn("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret); #endif } -void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, - samfile_t **pSamFile) +void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile, char *refUrl, char *cacheDir) /* Open the .bam file, fetch items in the seq:start-end position range, * and call callbackFunc on each bam item retrieved from the file plus callbackData. * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. * The pSamFile parameter is optional. If non-NULL it will be filled in, just for * the benefit of the callback function, with the open samFile. */ { char *bamFileName = NULL; samfile_t *fh = bamOpen(fileOrUrl, &bamFileName); #ifdef USE_HTS +if (fh->format.format == cram) + { + if (cacheDir == NULL) + errAbort("CRAM cache dir hg.conf variable (cramRef) must exist for CRAM support"); + cram_set_cache_url(fh, cacheDir, refUrl); + } bam_hdr_t *header = sam_hdr_read(fh); if (pSamFile != NULL) *pSamFile = fh; bam_index_t *idx = bamOpenIdx(fh, bamFileName); #else if (pSamFile != NULL) *pSamFile = fh; bam_index_t *idx = bamOpenIdx(bamFileName); #endif if (idx == NULL) warn("bam_index_load(%s) failed.", bamFileName); else { #ifdef USE_HTS bamFetchAlreadyOpen(fh, header, idx, bamFileName, position, callbackFunc, callbackData); #else bamFetchAlreadyOpen(fh, idx, bamFileName, position, callbackFunc, callbackData); #endif bamCloseIdx(&idx); } bamClose(&fh); } +void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile) +{ +bamFetchPlus(fileOrUrl, position, callbackFunc, callbackData, pSamFile, + NULL, NULL); +} + boolean bamIsRc(const bam1_t *bam) /* Return TRUE if alignment is on - strand. */ { const bam1_core_t *core = &bam->core; return (core->flag & BAM_FREVERSE); } void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen) /* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at * the beginning of the query sequence and quality; likewise for retHigh at end. * For convenience, retClippedQLen is the original query length minus soft clipping * (and the length of the query sequence that will be returned). */ { unsigned int *cigarPacked = bam1_cigar(bam); const bam1_core_t *core = &bam->core;