7580a364f0819e5ab6244b2b1ff685980b93c188
kent
  Fri Jul 1 18:25:00 2022 -0700
First cut at cacheTwoBit system - which will cache pieces of two bit files we've read.  System has some sense about only loading what it hasn't seen already, and only loading the part it needs to see. Could be improved, but better than storing all of each sequence.

diff --git src/inc/cacheTwoBit.h src/inc/cacheTwoBit.h
new file mode 100644
index 0000000..706b25e
--- /dev/null
+++ src/inc/cacheTwoBit.h
@@ -0,0 +1,27 @@
+#ifndef CACHETWOBIT_H
+#define CACHETWOBIT_H
+
+/* cacheTwoBit - system for caching open two bit files and ranges of sequences in them */
+
+
+struct cacheTwoBitRanges
+/* Cache open two bit files and sequences */
+    {
+    struct cacheTwoBitRanges *next;	/* Next in list of such caches if you need. */
+    struct hash *urlHash;	/* Hash of cacheTwoBitUrl structs */
+    struct cacheTwoBitUrl *urlList;  /* List of values in urlHash */
+    };
+
+struct cacheTwoBitRanges *cacheTwoBitRangesNew();
+/* Create a new cache for ranges or complete sequence in two bit files */
+
+void cacheTwoBitRangesPrintStats(struct cacheTwoBitRanges *cache);
+/* print cache statistics - Debugging routine */
+
+struct dnaSeq *cacheTwoBitRangesFetch(struct cacheTwoBitRanges *cacheAll, char *url, char *seqName, 
+    int start, int end, boolean doRc, int *retOffset);
+/* Fetch a sequence from a twoBit cache. The result in retOffset is where the return dnaSeq
+ * sits within the named sequence, the whole of which is stored in the subtracted 
+ * associated twoBit file. Do not free the returned sequence. */
+
+#endif /* CACHETWOBIT_H */