6546198dd3ce4f12b81225c5093163f3a176b5e3 angie Tue Mar 13 14:17:15 2018 -0700 Adding twoBitSeqWindow{New,Free} for seqWindow on 2bit files. diff --git src/hg/lib/seqWindow.c src/hg/lib/seqWindow.c index 930e9d1..286abdc 100644 --- src/hg/lib/seqWindow.c +++ src/hg/lib/seqWindow.c @@ -127,15 +127,87 @@ sw->end = strlen(sw->seq); sw->fetch = memSeqFetch; return sw; } void memSeqWindowFree(struct seqWindow **pSw) /* Free a seqWindow that was created by memSeqWindowNew. */ { if (pSw && *pSw) { seqWindowFreeShared(pSw); // No extra stuff for memSeqWindow freez(pSw); } } + +struct twoBitSeqWindow +/* seqWindow for twoBit file */ + { + struct seqWindow sw; // generic interface + struct twoBitFile *tbf; // twoBitFile from which this can fetch sequence + }; + +#define TWOBITSEQ_CACHE_FUDGE 4096 + +static void twoBitSeqFetch(struct seqWindow *seqWin, char *chrom, uint start, uint end) +/* seqWindow fetch method for updating window with new location & sequence if window does not + * already cover the requested location. */ +{ +struct twoBitSeqWindow *tsw = (struct twoBitSeqWindow *)seqWin; +boolean sameChrom = sameOk(seqWin->seqName, chrom); +if (!sameChrom || start < seqWin->start || end > seqWin->end) + { + // We must fetch new sequence. Expand range by CHROMSEQ_CACHE_FUDGE so if we get + // successive requests for nearby sequences, we won't have to fetch sequence as often. + int chromSize = twoBitSeqSize(tsw->tbf, chrom); + if (start > chromSize) + errAbort("twoBitSeqFetch: start (%u) is out of range for %s %s (length %d)", + start, tsw->tbf->fileName, chrom, chromSize); + if (start == 0 && end == 0) + end = chromSize; + uint bufStart = (start > CHROMSEQ_CACHE_FUDGE) ? start - CHROMSEQ_CACHE_FUDGE : 0; + uint bufEnd = end + CHROMSEQ_CACHE_FUDGE; + // Tolerate & clip ranges that extend past the end of the sequence + if (bufEnd > chromSize) + bufEnd = chromSize; + struct dnaSeq *dnaSeq = twoBitReadSeqFragLower(tsw->tbf, chrom, bufStart, bufEnd); + if (dnaSeq) + { + bufEnd = bufStart + dnaSeq->size; // should be unnecessary but just in case + seqWindowUpdateRangeAndSeq(seqWin, chrom, bufStart, bufEnd, dnaSeqCannibalize(&dnaSeq)); + } + else + { + // No sequence for chrom + errAbort("twoBitSeqFetch: unable to get sequence for %s [%d,%d)", chrom, start, end); + } + } +} + +struct seqWindow *twoBitSeqWindowNew(char *twoBitFileName, char *chrom, uint start, uint end) +/* Return a new seqWindow that can fetch uppercase sequence from twoBitFileName. + * If chrom is non-NULL and end > start then load sequence from that range; if chrom is non-NULL + * and start == end == 0 then fetch entire chrom. */ +{ +struct twoBitSeqWindow *tsw; +AllocVar(tsw); +tsw->sw.fetch = twoBitSeqFetch; +tsw->tbf = twoBitOpen(twoBitFileName); +if (start > end) + errAbort("twoBitSeqWindowNew: start (%u) should be <= end (%u)", start, end); +if (chrom != NULL) + twoBitSeqFetch((struct seqWindow *)tsw, chrom, start, end); +return (struct seqWindow *)tsw; +} + +void twoBitSeqWindowFree(struct seqWindow **pSw) +/* Free a twoBitSeqWindow. */ +{ +if (pSw && *pSw) + { + seqWindowFreeShared(pSw); + struct twoBitSeqWindow *tsw = (struct twoBitSeqWindow *)*pSw; + twoBitClose(&tsw->tbf); + freez(pSw); + } +}