ea5551fd4f1ea0c73e95fb5ee1f5ecd557509854 kent Fri Jul 8 14:06:33 2022 -0700 Improving comments and diagnostic output. diff --git src/lib/cacheTwoBit.c src/lib/cacheTwoBit.c index e0e1d0d..88fcfcb 100644 --- src/lib/cacheTwoBit.c +++ src/lib/cacheTwoBit.c @@ -150,42 +150,44 @@ if (ctbSeq == NULL) { ctbSeq = cacheTwoBitSeqNew(cacheUrl, seqName, doRc, cacheAll->doUpper); hashAdd(seqHash, seqName, ctbSeq); slAddHead(&cacheUrl->seqList, ctbSeq); } struct dnaSeq *seq = NULL; if (start < end || !missOk) seq = cacheTwoBitSeqFetch(ctbSeq, start, end, retOffset); return seq; } struct dnaSeq *cacheTwoBitRangesFetch(struct cacheTwoBitRanges *cacheAll, char *url, char *seqName, int start, int end, boolean doRc, int *retOffset) -/* fetch a sequence from a 2bit. Caches open two bit files and sequence in - * both forward and reverse strand */ +/* Fetch a sequence from a twoBit cache. The result in retOffset is where the return dnaSeq + * sits within the named sequence, the whole of which is stored in the subtracted + * associated twoBit file. Do not free the returned sequence. Complains and aborts if + * url not found, or if seqName not found in URL */ { return cacheTwoBitRangesFetchOrNot(cacheAll, url, seqName, start, end, doRc, FALSE, retOffset); } struct dnaSeq *cacheTwoBitRangesMayFetch(struct cacheTwoBitRanges *cacheAll, char *url, char *seqName, int start, int end, boolean doRc, int *retOffset) /* Fetch a sequence from a twoBit cache. The result in retOffset is where the return dnaSeq * sits within the named sequence, the whole of which is stored in the subtracted * associated twoBit file. Do not free the returned sequence. Returns NULL if sequence not - * found */ + * found in any of the files we are caching without complaint. */ { return cacheTwoBitRangesFetchOrNot(cacheAll, url, seqName, start, end, doRc, TRUE, retOffset); } void cacheTwoBitRangesPrintStats(struct cacheTwoBitRanges *cache, FILE *f) /* print cache statistics - Debugging routine */ { fprintf(f, "caching %d twoBit files\n", slCount(cache->urlList)); struct cacheTwoBitUrl *cachedUrl; int totalSeq = 0; int totalRanges = 0; long basesQueried = 0; // Total bases read from cache long basesRead = 0; // Total bases read by cache int queryCount = 0; @@ -199,18 +201,19 @@ fprintf(f, " %s %c strand\n", ctbSeq->seqName, ctbSeq->doRc ? '-' : '+'); totalSeq += 1; totalRanges += ctbSeq->rangeTree->n; basesQueried += ctbSeq->basesQueried; basesRead += ctbSeq->basesRead; queryCount += ctbSeq->queryCount; struct range *range = rangeTreeList(ctbSeq->rangeTree); for ( ; range != NULL; range = range->next) { fprintf(f, " %d start %d size\n", range->start, range->end - range->start); } } } fprintf(f, "total sequences cached %d in %d ranges covering %d queries\n", totalSeq, totalRanges, queryCount); -fprintf(f, "basesRead %ld bases queried %ld\n", basesRead, basesQueried); +fprintf(f, "basesRead %ld %3.1f%% of bases queried %ld\n", basesRead, + 100.0*basesRead/basesQueried, basesQueried); }