6546198dd3ce4f12b81225c5093163f3a176b5e3
angie
  Tue Mar 13 14:17:15 2018 -0700
Adding twoBitSeqWindow{New,Free} for seqWindow on 2bit files.

diff --git src/hg/lib/seqWindow.c src/hg/lib/seqWindow.c
index 930e9d1..286abdc 100644
--- src/hg/lib/seqWindow.c
+++ src/hg/lib/seqWindow.c
@@ -1,141 +1,213 @@
 /* seqWindow -- generic interface & implementations for fetching subranges of a sequence */
 
 /* Copyright (C) 2017 The Regents of the University of California
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "hdb.h"
 #include "seqWindow.h"
 
 static void seqWindowUpdateRangeAndSeq(struct seqWindow *seqWin, char *seqName,
                                        uint start, uint end, char *seq)
 /* Update seqWin's window coordinates and sequence -- all fetch methods need to do this.
  * seq is *not* cloned, don't free it afterward! */
 {
 if (!sameOk(seqName, seqWin->seqName))
     {
     freeMem(seqWin->seqName);
     seqWin->seqName = cloneString(seqName);
     }
 seqWin->start = start;
 seqWin->end = end;
 freeMem(seqWin->seq);
 if (seq)
     touppers(seq);
 seqWin->seq = seq;
 }
 
 static void seqWindowFreeShared(struct seqWindow **pSw)
 /* Free the parts of seqWindow that are shared by all implementations.
  * This *does not* zero out *pSw like a normal Free would, so that implementations
  * can free up the implementation-specific parts of *pSw */
 {
 if (pSw && *pSw)
     {
     struct seqWindow *sw = (struct seqWindow *)*pSw;
     freeMem(sw->seqName);
     freeMem(sw->seq);
     }
 }
 
 struct chromSeqWindow
 /* seqWindow for chrom sequence from a genome database */
 {
     struct seqWindow sw;   // generic interface
     char *db;              // db from which this can fetch sequence
     };
 
 #define CHROMSEQ_CACHE_FUDGE 4096
 
 static void chromSeqFetch(struct seqWindow *seqWin, char *chrom, uint start, uint end)
 /* seqWindow fetch method for updating window with new location & sequence if window does not
  * already cover the requested location. */
 {
 struct chromSeqWindow *csw = (struct chromSeqWindow *)seqWin;
 boolean sameChrom = sameOk(seqWin->seqName, chrom);
 if (!sameChrom || start < seqWin->start || end > seqWin->end)
     {
     // We must fetch new sequence. Expand range by CHROMSEQ_CACHE_FUDGE so if we get
     // successive requests for nearby sequences, we won't have to fetch sequence as often.
     int chromSize = hChromSize(csw->db, chrom);
     if (start > chromSize)
         errAbort("chromSeqFetch: start (%u) is out of range for %s %s (length %d)",
                  start, csw->db, chrom, chromSize);
     uint bufStart = (start > CHROMSEQ_CACHE_FUDGE) ? start - CHROMSEQ_CACHE_FUDGE : 0;
     uint bufEnd = end + CHROMSEQ_CACHE_FUDGE;
     // Tolerate & clip ranges that extend past the end of the sequence
     if (bufEnd > chromSize)
         bufEnd = chromSize;
     struct dnaSeq *dnaSeq = hChromSeq(csw->db, chrom, bufStart, bufEnd);
     if (dnaSeq)
         {
         bufEnd = bufStart + dnaSeq->size;  // should be unnecessary but just in case
         seqWindowUpdateRangeAndSeq(seqWin, chrom, bufStart, bufEnd, dnaSeqCannibalize(&dnaSeq));
         }
     else
         {
         // No sequence for chrom
         errAbort("chromSeqFetch: unable to get sequence for %s [%d,%d)", chrom, start, end);
         }
     }
 }
 
 struct seqWindow *chromSeqWindowNew(char *db, char *chrom, uint start, uint end)
 /* Return a new seqWindow that can fetch uppercase sequence from the chrom sequences in db.
  * If chrom is non-NULL and end > start then load sequence from that range; if chrom is non-NULL
  * and start == end == 0 then fetch entire chrom. */
 {
 struct chromSeqWindow *csw;
 AllocVar(csw);
 csw->sw.fetch = chromSeqFetch;
 csw->db = cloneString(db);
 if (start > end)
     errAbort("chromSeqWindowNew: start (%u) should be <= end (%u)", start, end);
 if (chrom != NULL)
     chromSeqFetch((struct seqWindow *)csw, chrom, start, end);
 return (struct seqWindow *)csw;
 }
 
 void chromSeqWindowFree(struct seqWindow **pSw)
 /* Free a chromSeqWindow. */
 {
 if (pSw && *pSw)
     {
     seqWindowFreeShared(pSw);
     struct chromSeqWindow *csw = (struct chromSeqWindow *)*pSw;
     freeMem(csw->db);
     freez(pSw);
     }
 }
 
 static void memSeqFetch(struct seqWindow *seqWin, char *acc, uint start, uint end)
 /* No changes, we have what we have. */
 {
 if (!sameOk(seqWin->seqName, acc))
     errAbort("memSeqFetch: sequence name '%s' requested from window on '%s'", acc, seqWin->seqName);
 }
 
 struct seqWindow *memSeqWindowNew(char *acc, char *seq)
 /* Return a new seqWindow copying this sequence already in memory. */
 {
 struct seqWindow *sw;
 AllocVar(sw);
 sw->seqName = cloneString(acc);
 sw->seq = cloneString(seq ? seq : "");
 touppers(sw->seq);
 sw->start = 0;
 sw->end = strlen(sw->seq);
 sw->fetch = memSeqFetch;
 return sw;
 }
 
 void memSeqWindowFree(struct seqWindow **pSw)
 /* Free a seqWindow that was created by memSeqWindowNew. */
 {
 if (pSw && *pSw)
     {
     seqWindowFreeShared(pSw);
     // No extra stuff for memSeqWindow
     freez(pSw);
     }
 }
+
+struct twoBitSeqWindow
+/* seqWindow for twoBit file */
+    {
+    struct seqWindow sw;     // generic interface
+    struct twoBitFile *tbf;  // twoBitFile from which this can fetch sequence
+    };
+
+#define TWOBITSEQ_CACHE_FUDGE 4096
+
+static void twoBitSeqFetch(struct seqWindow *seqWin, char *chrom, uint start, uint end)
+/* seqWindow fetch method for updating window with new location & sequence if window does not
+ * already cover the requested location. */
+{
+struct twoBitSeqWindow *tsw = (struct twoBitSeqWindow *)seqWin;
+boolean sameChrom = sameOk(seqWin->seqName, chrom);
+if (!sameChrom || start < seqWin->start || end > seqWin->end)
+    {
+    // We must fetch new sequence. Expand range by CHROMSEQ_CACHE_FUDGE so if we get
+    // successive requests for nearby sequences, we won't have to fetch sequence as often.
+    int chromSize = twoBitSeqSize(tsw->tbf, chrom);
+    if (start > chromSize)
+        errAbort("twoBitSeqFetch: start (%u) is out of range for %s %s (length %d)",
+                 start, tsw->tbf->fileName, chrom, chromSize);
+    if (start == 0 && end == 0)
+        end = chromSize;
+    uint bufStart = (start > CHROMSEQ_CACHE_FUDGE) ? start - CHROMSEQ_CACHE_FUDGE : 0;
+    uint bufEnd = end + CHROMSEQ_CACHE_FUDGE;
+    // Tolerate & clip ranges that extend past the end of the sequence
+    if (bufEnd > chromSize)
+        bufEnd = chromSize;
+    struct dnaSeq *dnaSeq = twoBitReadSeqFragLower(tsw->tbf, chrom, bufStart, bufEnd);
+    if (dnaSeq)
+        {
+        bufEnd = bufStart + dnaSeq->size;  // should be unnecessary but just in case
+        seqWindowUpdateRangeAndSeq(seqWin, chrom, bufStart, bufEnd, dnaSeqCannibalize(&dnaSeq));
+        }
+    else
+        {
+        // No sequence for chrom
+        errAbort("twoBitSeqFetch: unable to get sequence for %s [%d,%d)", chrom, start, end);
+        }
+    }
+}
+
+struct seqWindow *twoBitSeqWindowNew(char *twoBitFileName, char *chrom, uint start, uint end)
+/* Return a new seqWindow that can fetch uppercase sequence from twoBitFileName.
+ * If chrom is non-NULL and end > start then load sequence from that range; if chrom is non-NULL
+ * and start == end == 0 then fetch entire chrom. */
+{
+struct twoBitSeqWindow *tsw;
+AllocVar(tsw);
+tsw->sw.fetch = twoBitSeqFetch;
+tsw->tbf = twoBitOpen(twoBitFileName);
+if (start > end)
+    errAbort("twoBitSeqWindowNew: start (%u) should be <= end (%u)", start, end);
+if (chrom != NULL)
+    twoBitSeqFetch((struct seqWindow *)tsw, chrom, start, end);
+return (struct seqWindow *)tsw;
+}
+
+void twoBitSeqWindowFree(struct seqWindow **pSw)
+/* Free a twoBitSeqWindow. */
+{
+if (pSw && *pSw)
+    {
+    seqWindowFreeShared(pSw);
+    struct twoBitSeqWindow *tsw = (struct twoBitSeqWindow *)*pSw;
+    twoBitClose(&tsw->tbf);
+    freez(pSw);
+    }
+}