26c2f650359cc1c74fb31d96109bc0f8dedbc3d7 angie Thu May 30 10:32:19 2019 -0700 Adding BAM and VCF item counts to hubApi's list/tracks function. Note: older index files may not contain counts. refs #18869, #23521 diff --git src/lib/bamFile.c src/lib/bamFile.c index a725ef8..6e9e003 100644 --- src/lib/bamFile.c +++ src/lib/bamFile.c @@ -1,24 +1,28 @@ /* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "portable.h" #include "bamFile.h" #include "htmshell.h" +#include "cram/cram_samtools.h" +#include "cram/sam_header.h" +#include "cram/cram_structs.h" +#include "htslib/cram.h" #include "udc.h" #include "psl.h" // If KNETFILE_HOOKS is used (as recommended!), then we can simply call bam_index_load // without worrying about the samtools lib creating local cache files in cgi-bin: static bam_index_t *bamOpenIndexGivenUrl(samfile_t *sam, char *fileOrUrl, char *baiFileOrUrl) /* If fileOrUrl has a valid accompanying .bai file, parse and return the index; * otherwise return NULL. baiFileOrUrl can be NULL. * The difference to bamOpenIndex is that the URL/filename of the bai file can be specified. */ { if (sam->format.format == cram) return sam_index_load(sam, fileOrUrl); // assume that index is a .bai file @@ -219,30 +223,85 @@ } bam_hdr_t *header = sam_hdr_read(fh); if (pSamFile != NULL) *pSamFile = fh; bam_index_t *idx = bamOpenIndexGivenUrl(fh, bamFileName, baiFileOrUrl); if (idx == NULL) warn("bam_index_load(%s) failed.", bamFileName); else { bamFetchAlreadyOpen(fh, header, idx, bamFileName, position, callbackFunc, callbackData); bamCloseIdx(&idx); } bamClose(&fh); } +static int bamGetTargetCount(char *fileOrUrl) +/* Return the number of target sequences in a bam or cram file. */ +{ +int tCount = 0; +htsFile *htsF = hts_open(fileOrUrl, "r"); +if (htsF->format.format == crai) + { + SAM_hdr *cramHdr = cram_fd_get_header(htsF->fp.cram); + tCount = cramHdr->nref; + } +else + { + bam_hdr_t *bamHdr = bam_hdr_read(htsF->fp.bgzf); + tCount = bamHdr->n_targets; + } +hts_close(htsF); +return tCount; +} + +long long bamFileItemCount(char *fileOrUrl, char *baiFileOrUrl) +/* Return the total number of mapped items across all sequences in fileOrUrl, using index file. + * If baiFileOrUrl is NULL, the index file is assumed to be fileOrUrl.bai. + * NOTE: not all bam index files include mapped item counts, so this may return 0 even for large + * bam. As of May 2019, our copy of hts_idx_get_stat does not support cram indexes + * (perhaps they never include counts?), so this always returns 0 for cram. */ +{ +long long itemCount = 0; +hts_idx_t *idx = NULL; +if (isNotEmpty(baiFileOrUrl)) + idx = hts_idx_load2(fileOrUrl, baiFileOrUrl); +else + { + int format = endsWith(fileOrUrl, ".cram") ? HTS_FMT_CRAI : HTS_FMT_BAI; + idx = hts_idx_load(fileOrUrl, format); + } +if (idx == NULL) + warn("bamFileItemCount: hts_idx_load(%s) failed.", baiFileOrUrl ? baiFileOrUrl : fileOrUrl); +else + { + int tCount = bamGetTargetCount(fileOrUrl); + int tid; + for (tid = 0; tid < tCount; tid++) + { + uint64_t mapped, unmapped; + int ret = hts_idx_get_stat(idx, tid, &mapped, &unmapped); + if (ret == 0) + itemCount += mapped; + // ret is -1 if counts are unavailable. + } + hts_idx_destroy(idx); + idx = NULL; + } +return itemCount; +} + void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, samfile_t **pSamFile, char *refUrl, char *cacheDir) { bamAndIndexFetchPlus(fileOrUrl, NULL, position, callbackFunc, callbackData, pSamFile, refUrl, cacheDir); } void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, samfile_t **pSamFile) { bamFetchPlus(fileOrUrl, position, callbackFunc, callbackData, pSamFile, NULL, NULL); } boolean bamIsRc(const bam1_t *bam) /* Return TRUE if alignment is on - strand. */