26c2f650359cc1c74fb31d96109bc0f8dedbc3d7 angie Thu May 30 10:32:19 2019 -0700 Adding BAM and VCF item counts to hubApi's list/tracks function. Note: older index files may not contain counts. refs #18869, #23521 diff --git src/lib/vcf.c src/lib/vcf.c index 1fb7ad4..0213055 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -1,30 +1,31 @@ /* VCF: Variant Call Format, version 4.0 / 4.1 * http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-40 * http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41 */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "dnautil.h" #include "errAbort.h" #include #include "localmem.h" #include "net.h" #include "regexHelper.h" +#include "htslib/tbx.h" #include "vcf.h" /* Reserved but optional INFO keys: */ const char *vcfInfoAncestralAllele = "AA"; const char *vcfInfoPerAlleleGtCount = "AC"; // allele count in genotypes, for each ALT allele, // in the same order as listed const char *vcfInfoAlleleFrequency = "AF"; // allele frequency for each ALT allele in the same // order as listed: use this when estimated from // primary data, not called genotypes const char *vcfInfoNumAlleles = "AN"; // total number of alleles in called genotypes const char *vcfInfoBaseQuality = "BQ"; // RMS base quality at this position const char *vcfInfoCigar = "CIGAR"; // cigar string describing how to align an // alternate allele to the reference allele const char *vcfInfoIsDbSnp = "DB"; // dbSNP membership const char *vcfInfoDepth = "DP"; // combined depth across samples, e.g. DP=154 @@ -1088,30 +1089,66 @@ struct vcfRecord *lastRec = vcff->records; if (lastRec == NULL) vcff->records = records; else { // Considered just asserting the batches were in order, but a problem may // result when non-overlapping location windows pick up the same long variant. slSortMergeUniq(&(vcff->records), records, vcfRecordCmp, NULL); } } } return slCount(vcff->records) - oldCount; } +long long vcfTabixItemCount(char *fileOrUrl, char *tbiFileOrUrl) +/* Return the total number of items across all sequences in fileOrUrl, using index file. + * If tbiFileOrUrl is NULL, the index file is assumed to be fileOrUrl.tbi. + * NOTE: not all tabix index files include mapped item counts, so this may return 0 even for + * large files. */ +{ +long long itemCount = 0; +hts_idx_t *idx = NULL; +if (isNotEmpty(tbiFileOrUrl)) + idx = hts_idx_load2(fileOrUrl, tbiFileOrUrl); +else + { + idx = hts_idx_load(fileOrUrl, HTS_FMT_TBI); + } +if (idx == NULL) + warn("vcfTabixItemCount: hts_idx_load(%s) failed.", tbiFileOrUrl ? tbiFileOrUrl : fileOrUrl); +else + { + int tCount; + const char **seqNames = tbx_seqnames((tbx_t *)idx, &tCount); + int tid; + for (tid = 0; tid < tCount; tid++) + { + uint64_t mapped, unmapped; + int ret = hts_idx_get_stat(idx, tid, &mapped, &unmapped); + if (ret == 0) + itemCount += mapped; + // ret is -1 if counts are unavailable. + } + freeMem(seqNames); + hts_idx_destroy(idx); + idx = NULL; + } +return itemCount; +} + void vcfFileFree(struct vcfFile **pVcff) /* Free a vcfFile object. */ { if (pVcff == NULL || *pVcff == NULL) return; struct vcfFile *vcff = *pVcff; if (vcff->maxErr == VCF_IGNORE_ERRS && vcff->errCnt > 0) { vcff->maxErr++; // Never completely silent! Go ahead and report how many errs were detected vcfFileErr(vcff,"Closing with %d errors.",vcff->errCnt); } freez(&(vcff->headerString)); hashFree(&(vcff->pool)); if (vcff->reusePool) lmCleanup(&vcff->reusePool);