26c2f650359cc1c74fb31d96109bc0f8dedbc3d7
angie
  Thu May 30 10:32:19 2019 -0700
Adding BAM and VCF item counts to hubApi's list/tracks function.  Note: older index files may not contain counts.  refs #18869, #23521

diff --git src/lib/vcf.c src/lib/vcf.c
index 1fb7ad4..0213055 100644
--- src/lib/vcf.c
+++ src/lib/vcf.c
@@ -1,30 +1,31 @@
 /* VCF: Variant Call Format, version 4.0 / 4.1
  * http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-40
  * http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41
  */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "dnautil.h"
 #include "errAbort.h"
 #include <limits.h>
 #include "localmem.h"
 #include "net.h"
 #include "regexHelper.h"
+#include "htslib/tbx.h"
 #include "vcf.h"
 
 /* Reserved but optional INFO keys: */
 const char *vcfInfoAncestralAllele = "AA";
 const char *vcfInfoPerAlleleGtCount = "AC";	// allele count in genotypes, for each ALT allele,
 						// in the same order as listed
 const char *vcfInfoAlleleFrequency = "AF";	// allele frequency for each ALT allele in the same
 						// order as listed: use this when estimated from
 						// primary data, not called genotypes
 const char *vcfInfoNumAlleles = "AN";		// total number of alleles in called genotypes
 const char *vcfInfoBaseQuality = "BQ";		// RMS base quality at this position
 const char *vcfInfoCigar = "CIGAR";		// cigar string describing how to align an
 						// alternate allele to the reference allele
 const char *vcfInfoIsDbSnp = "DB";		// dbSNP membership
 const char *vcfInfoDepth = "DP";		// combined depth across samples, e.g. DP=154
@@ -1088,30 +1089,66 @@
         struct vcfRecord *lastRec = vcff->records;
         if (lastRec == NULL)
             vcff->records = records;
         else
             {
             // Considered just asserting the batches were in order, but a problem may
             // result when non-overlapping location windows pick up the same long variant.
             slSortMergeUniq(&(vcff->records), records, vcfRecordCmp, NULL);
             }
         }
     }
 
 return slCount(vcff->records) - oldCount;
 }
 
+long long vcfTabixItemCount(char *fileOrUrl, char *tbiFileOrUrl)
+/* Return the total number of items across all sequences in fileOrUrl, using index file.
+ * If tbiFileOrUrl is NULL, the index file is assumed to be fileOrUrl.tbi.
+ * NOTE: not all tabix index files include mapped item counts, so this may return 0 even for
+ * large files. */
+{
+long long itemCount = 0;
+hts_idx_t *idx = NULL;
+if (isNotEmpty(tbiFileOrUrl))
+    idx = hts_idx_load2(fileOrUrl, tbiFileOrUrl);
+else
+    {
+    idx = hts_idx_load(fileOrUrl, HTS_FMT_TBI);
+    }
+if (idx == NULL)
+    warn("vcfTabixItemCount: hts_idx_load(%s) failed.", tbiFileOrUrl ? tbiFileOrUrl : fileOrUrl);
+else
+    {
+    int tCount;
+    const char **seqNames = tbx_seqnames((tbx_t *)idx, &tCount);
+    int tid;
+    for (tid = 0;  tid < tCount;  tid++)
+        {
+        uint64_t mapped, unmapped;
+        int ret = hts_idx_get_stat(idx, tid, &mapped, &unmapped);
+        if (ret == 0)
+            itemCount += mapped;
+        // ret is -1 if counts are unavailable.
+        }
+    freeMem(seqNames);
+    hts_idx_destroy(idx);
+    idx = NULL;
+    }
+return itemCount;
+}
+
 void vcfFileFree(struct vcfFile **pVcff)
 /* Free a vcfFile object. */
 {
 if (pVcff == NULL || *pVcff == NULL)
     return;
 struct vcfFile *vcff = *pVcff;
 if (vcff->maxErr == VCF_IGNORE_ERRS && vcff->errCnt > 0)
     {
     vcff->maxErr++; // Never completely silent!  Go ahead and report how many errs were detected
     vcfFileErr(vcff,"Closing with %d errors.",vcff->errCnt);
     }
 freez(&(vcff->headerString));
 hashFree(&(vcff->pool));
 if (vcff->reusePool)
     lmCleanup(&vcff->reusePool);