c7455312e1ba7fb58ed6b9191ee1ff147730066d
angie
  Mon Sep 30 12:02:37 2013 -0700
Make annoAssemblySeqSize cache sequence sizes, because twoBitSeqSizedoes a seek and read, and some annoStreams check sequence size pretty often.

diff --git src/lib/annoAssembly.c src/lib/annoAssembly.c
index 23453ec..45cee66 100644
--- src/lib/annoAssembly.c
+++ src/lib/annoAssembly.c
@@ -1,43 +1,56 @@
 /* annoAssembly -- basic metadata about an assembly for the annoGrator framework. */
 
-#include "common.h"
-#include "twoBit.h"
 #include "annoAssembly.h"
+#include "obscure.h"
+#include "twoBit.h"
 
 struct annoAssembly *annoAssemblyNew(char *name, char *twoBitPath)
 /* Return an annoAssembly with open twoBitFile. */
 {
 struct annoAssembly *aa;
 AllocVar(aa);
 aa->name = cloneString(name);
 aa->tbf = twoBitOpen(twoBitPath);
 aa->twoBitPath = cloneString(twoBitPath);
 return aa;
 }
 
 struct slName *annoAssemblySeqNames(struct annoAssembly *aa)
 /* Return a list of sequence names in this assembly. */
 {
 struct slName *seqNames = twoBitSeqNames(aa->twoBitPath);
 slSort(&seqNames, slNameCmp);
 return seqNames;
 }
 
 uint annoAssemblySeqSize(struct annoAssembly *aa, char *seqName)
 /* Return the number of bases in seq which must be in aa's twoBitFile. */
 {
-return (uint)twoBitSeqSize(aa->tbf, seqName);
+if (aa->seqSizes == NULL)
+    aa->seqSizes = hashNew(digitsBaseTwo(aa->tbf->seqCount));
+struct hashEl *hel = hashLookup(aa->seqSizes, seqName);
+uint seqSize;
+if (hel != NULL)
+    seqSize = (uint)(hel->val - NULL);
+else
+    {
+    seqSize = (uint)twoBitSeqSize(aa->tbf, seqName);
+    char *pt = NULL;
+    hashAdd(aa->seqSizes, seqName, pt + seqSize);
+    }
+return seqSize;
 }
 
 void annoAssemblyClose(struct annoAssembly **pAa)
 /* Close aa's twoBitFile and free mem. */
 {
 if (*pAa == NULL)
     return;
 struct annoAssembly *aa = *pAa;
 freeMem(aa->name);
 freeMem(aa->twoBitPath);
 twoBitClose(&(aa->tbf));
+hashFree(&(aa->seqSizes));
 freez(pAa);
 }