6546198dd3ce4f12b81225c5093163f3a176b5e3
angie
Tue Mar 13 14:17:15 2018 -0700
Adding twoBitSeqWindow{New,Free} for seqWindow on 2bit files.
diff --git src/hg/lib/seqWindow.c src/hg/lib/seqWindow.c
index 930e9d1..286abdc 100644
--- src/hg/lib/seqWindow.c
+++ src/hg/lib/seqWindow.c
@@ -127,15 +127,87 @@
sw->end = strlen(sw->seq);
sw->fetch = memSeqFetch;
return sw;
}
void memSeqWindowFree(struct seqWindow **pSw)
/* Free a seqWindow that was created by memSeqWindowNew. */
{
if (pSw && *pSw)
{
seqWindowFreeShared(pSw);
// No extra stuff for memSeqWindow
freez(pSw);
}
}
+
+struct twoBitSeqWindow
+/* seqWindow for twoBit file */
+ {
+ struct seqWindow sw; // generic interface
+ struct twoBitFile *tbf; // twoBitFile from which this can fetch sequence
+ };
+
+#define TWOBITSEQ_CACHE_FUDGE 4096
+
+static void twoBitSeqFetch(struct seqWindow *seqWin, char *chrom, uint start, uint end)
+/* seqWindow fetch method for updating window with new location & sequence if window does not
+ * already cover the requested location. */
+{
+struct twoBitSeqWindow *tsw = (struct twoBitSeqWindow *)seqWin;
+boolean sameChrom = sameOk(seqWin->seqName, chrom);
+if (!sameChrom || start < seqWin->start || end > seqWin->end)
+ {
+ // We must fetch new sequence. Expand range by CHROMSEQ_CACHE_FUDGE so if we get
+ // successive requests for nearby sequences, we won't have to fetch sequence as often.
+ int chromSize = twoBitSeqSize(tsw->tbf, chrom);
+ if (start > chromSize)
+ errAbort("twoBitSeqFetch: start (%u) is out of range for %s %s (length %d)",
+ start, tsw->tbf->fileName, chrom, chromSize);
+ if (start == 0 && end == 0)
+ end = chromSize;
+ uint bufStart = (start > CHROMSEQ_CACHE_FUDGE) ? start - CHROMSEQ_CACHE_FUDGE : 0;
+ uint bufEnd = end + CHROMSEQ_CACHE_FUDGE;
+ // Tolerate & clip ranges that extend past the end of the sequence
+ if (bufEnd > chromSize)
+ bufEnd = chromSize;
+ struct dnaSeq *dnaSeq = twoBitReadSeqFragLower(tsw->tbf, chrom, bufStart, bufEnd);
+ if (dnaSeq)
+ {
+ bufEnd = bufStart + dnaSeq->size; // should be unnecessary but just in case
+ seqWindowUpdateRangeAndSeq(seqWin, chrom, bufStart, bufEnd, dnaSeqCannibalize(&dnaSeq));
+ }
+ else
+ {
+ // No sequence for chrom
+ errAbort("twoBitSeqFetch: unable to get sequence for %s [%d,%d)", chrom, start, end);
+ }
+ }
+}
+
+struct seqWindow *twoBitSeqWindowNew(char *twoBitFileName, char *chrom, uint start, uint end)
+/* Return a new seqWindow that can fetch uppercase sequence from twoBitFileName.
+ * If chrom is non-NULL and end > start then load sequence from that range; if chrom is non-NULL
+ * and start == end == 0 then fetch entire chrom. */
+{
+struct twoBitSeqWindow *tsw;
+AllocVar(tsw);
+tsw->sw.fetch = twoBitSeqFetch;
+tsw->tbf = twoBitOpen(twoBitFileName);
+if (start > end)
+ errAbort("twoBitSeqWindowNew: start (%u) should be <= end (%u)", start, end);
+if (chrom != NULL)
+ twoBitSeqFetch((struct seqWindow *)tsw, chrom, start, end);
+return (struct seqWindow *)tsw;
+}
+
+void twoBitSeqWindowFree(struct seqWindow **pSw)
+/* Free a twoBitSeqWindow. */
+{
+if (pSw && *pSw)
+ {
+ seqWindowFreeShared(pSw);
+ struct twoBitSeqWindow *tsw = (struct twoBitSeqWindow *)*pSw;
+ twoBitClose(&tsw->tbf);
+ freez(pSw);
+ }
+}