fe875492a7da5c0f64cd9c6d43cd6809478ec2a4 markd Sat May 9 08:32:48 2026 -0700 *.bai and *.tbi index files were by-passing UDC and being downloaded to current directory diff --git src/lib/bamFile.c src/lib/bamFile.c index 7da5e819dff..5ffc6c421f7 100644 --- src/lib/bamFile.c +++ src/lib/bamFile.c @@ -71,62 +71,60 @@ fprintf(downFile, "\n"); fclose(downFile); } char server[4096]; if (refUrl != NULL) safef(server, sizeof server, refUrl, md5String); else safef(server, sizeof server, "http://www.ebi.ac.uk:80/ena/cram/md5/%s", md5String); cram_ref_check_free(chk); errAbort("Downloading reference from %s. " "Please refresh screen to check download status.", server); } -// If KNETFILE_HOOKS is used (as recommended!), then we can simply call bam_index_load -// without worrying about the samtools lib creating local cache files in cgi-bin: - static bam_index_t *tryIndexCandidates(samfile_t *sam, char *fileOrUrl, const char *ext) /* Try loading index by appending ext, then by replacing the suffix with ext. - * Passes explicit index name to sam_index_load2 so htslib does not attempt - * to download a remote index into the (non-writable) CWD. */ + * Uses sam_index_load3 with flags=0 so remote indexes are read through the + * UDC hfile scheme handler instead of htslib downloading a local copy into + * the (non-writable) CWD. */ { char indexName[4096]; bam_index_t *ret = NULL; safef(indexName, sizeof indexName, "%s%s", fileOrUrl, ext); -if ((ret = sam_index_load2(sam, fileOrUrl, indexName)) != NULL) +if ((ret = sam_index_load3(sam, fileOrUrl, indexName, 0)) != NULL) return ret; safef(indexName, sizeof indexName, "%s", fileOrUrl); char *lastDot = strrchr(indexName, '.'); if (lastDot && (lastDot - indexName) + strlen(ext) < sizeof indexName) { strcpy(lastDot, ext); - ret = sam_index_load2(sam, fileOrUrl, indexName); + ret = sam_index_load3(sam, fileOrUrl, indexName, 0); } return ret; } static bam_index_t *bamOpenIndexGivenUrl(samfile_t *sam, char *fileOrUrl, char *baiFileOrUrl) /* If fileOrUrl has a valid accompanying .bai/.crai file, parse and return the index; * otherwise return NULL. baiFileOrUrl can be NULL. * The difference to bamOpenIndex is that the URL/filename of the index file can be specified. */ { if (baiFileOrUrl != NULL) - return sam_index_load2(sam, fileOrUrl, baiFileOrUrl); + return sam_index_load3(sam, fileOrUrl, baiFileOrUrl, 0); const char *ext = (sam->format.format == cram) ? ".crai" : ".bai"; return tryIndexCandidates(sam, fileOrUrl, ext); } static void bamCloseIdx(bam_index_t **pIdx) /* Free unless already NULL. */ { if (pIdx != NULL && *pIdx != NULL) { free(*pIdx); // Not freeMem, freez etc -- sam just uses malloc/calloc. *pIdx = NULL; } } @@ -310,35 +308,35 @@ bam_hdr_destroy(header); hts_close(htsF); return tCount; } long long bamFileItemCount(char *fileOrUrl, char *baiFileOrUrl) /* Return the total number of mapped items across all sequences in fileOrUrl, using index file. * If baiFileOrUrl is NULL, the index file is assumed to be fileOrUrl.bai (BAM) or fileOrUrl.crai (CRAM). * NOTE: not all bam index files include mapped item counts, so this may return 0 even for large * bam. As of May 2019, our copy of hts_idx_get_stat does not support cram indexes * (perhaps they never include counts?), so this always returns 0 for cram. */ { long long itemCount = 0; hts_idx_t *idx = NULL; if (isNotEmpty(baiFileOrUrl)) - idx = hts_idx_load2(fileOrUrl, baiFileOrUrl); + idx = hts_idx_load3(fileOrUrl, baiFileOrUrl, HTS_FMT_BAI, 0); else { int format = endsWith(fileOrUrl, ".cram") ? HTS_FMT_CRAI : HTS_FMT_BAI; - idx = hts_idx_load(fileOrUrl, format); + idx = hts_idx_load3(fileOrUrl, NULL, format, 0); } if (idx == NULL) warn("bamFileItemCount: hts_idx_load(%s) failed.", baiFileOrUrl ? baiFileOrUrl : fileOrUrl); else { int tCount = bamGetTargetCount(fileOrUrl); int tid; for (tid = 0; tid < tCount; tid++) { uint64_t mapped, unmapped; int ret = hts_idx_get_stat(idx, tid, &mapped, &unmapped); if (ret == 0) itemCount += mapped; // ret is -1 if counts are unavailable. }