26c2f650359cc1c74fb31d96109bc0f8dedbc3d7
angie
  Thu May 30 10:32:19 2019 -0700
Adding BAM and VCF item counts to hubApi's list/tracks function.  Note: older index files may not contain counts.  refs #18869, #23521

diff --git src/lib/bamFile.c src/lib/bamFile.c
index a725ef8..6e9e003 100644
--- src/lib/bamFile.c
+++ src/lib/bamFile.c
@@ -1,24 +1,28 @@
 /* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "portable.h"
 #include "bamFile.h"
 #include "htmshell.h"
+#include "cram/cram_samtools.h"
+#include "cram/sam_header.h"
+#include "cram/cram_structs.h"
+#include "htslib/cram.h"
 #include "udc.h"
 #include "psl.h"
 
 // If KNETFILE_HOOKS is used (as recommended!), then we can simply call bam_index_load
 // without worrying about the samtools lib creating local cache files in cgi-bin:
 
 static bam_index_t *bamOpenIndexGivenUrl(samfile_t *sam, char *fileOrUrl, char *baiFileOrUrl)
 /* If fileOrUrl has a valid accompanying .bai file, parse and return the index;
  * otherwise return NULL. baiFileOrUrl can be NULL. 
  * The difference to bamOpenIndex is that the URL/filename of the bai file can be specified. */
 {
 if (sam->format.format == cram) 
     return sam_index_load(sam, fileOrUrl);
 
 // assume that index is a .bai file 
@@ -219,30 +223,85 @@
     }
 bam_hdr_t *header = sam_hdr_read(fh);
 if (pSamFile != NULL)
     *pSamFile = fh;
 bam_index_t *idx = bamOpenIndexGivenUrl(fh, bamFileName, baiFileOrUrl);
 if (idx == NULL)
     warn("bam_index_load(%s) failed.", bamFileName);
 else
     {
     bamFetchAlreadyOpen(fh, header, idx, bamFileName, position, callbackFunc, callbackData);
     bamCloseIdx(&idx);
     }
 bamClose(&fh);
 }
 
+static int bamGetTargetCount(char *fileOrUrl)
+/* Return the number of target sequences in a bam or cram file. */
+{
+int tCount = 0;
+htsFile *htsF = hts_open(fileOrUrl, "r");
+if (htsF->format.format == crai)
+    {
+    SAM_hdr *cramHdr = cram_fd_get_header(htsF->fp.cram);
+    tCount = cramHdr->nref;
+    }
+else
+    {
+    bam_hdr_t *bamHdr = bam_hdr_read(htsF->fp.bgzf);
+    tCount = bamHdr->n_targets;
+    }
+hts_close(htsF);
+return tCount;
+}
+
+long long bamFileItemCount(char *fileOrUrl, char *baiFileOrUrl)
+/* Return the total number of mapped items across all sequences in fileOrUrl, using index file.
+ * If baiFileOrUrl is NULL, the index file is assumed to be fileOrUrl.bai.
+ * NOTE: not all bam index files include mapped item counts, so this may return 0 even for large
+ * bam.  As of May 2019, our copy of hts_idx_get_stat does not support cram indexes
+ * (perhaps they never include counts?), so this always returns 0 for cram. */
+{
+long long itemCount = 0;
+hts_idx_t *idx = NULL;
+if (isNotEmpty(baiFileOrUrl))
+    idx = hts_idx_load2(fileOrUrl, baiFileOrUrl);
+else
+    {
+    int format = endsWith(fileOrUrl, ".cram") ? HTS_FMT_CRAI : HTS_FMT_BAI;
+    idx = hts_idx_load(fileOrUrl, format);
+    }
+if (idx == NULL)
+    warn("bamFileItemCount: hts_idx_load(%s) failed.", baiFileOrUrl ? baiFileOrUrl : fileOrUrl);
+else
+    {
+    int tCount = bamGetTargetCount(fileOrUrl);
+    int tid;
+    for (tid = 0;  tid < tCount;  tid++)
+        {
+        uint64_t mapped, unmapped;
+        int ret = hts_idx_get_stat(idx, tid, &mapped, &unmapped);
+        if (ret == 0)
+            itemCount += mapped;
+        // ret is -1 if counts are unavailable.
+        }
+    hts_idx_destroy(idx);
+    idx = NULL;
+    }
+return itemCount;
+}
+
 void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 		 samfile_t **pSamFile, char *refUrl, char *cacheDir)
 {
 bamAndIndexFetchPlus(fileOrUrl, NULL, position, callbackFunc, callbackData, pSamFile, refUrl, cacheDir);
 }
 
 void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 		 samfile_t **pSamFile)
 {
 bamFetchPlus(fileOrUrl, position, callbackFunc, callbackData, pSamFile,
     NULL, NULL);
 }
 
 boolean bamIsRc(const bam1_t *bam)
 /* Return TRUE if alignment is on - strand. */