70495d3b08b1316f2eb2f0ca7032730b1333e6bb
kent
  Wed Sep 5 11:16:51 2012 -0700
Some refactoring.  Making it so saves entire generated chain in memory so that can go back further than just last 3.
diff --git src/kehayden/alphaChain/alphaChain.c src/kehayden/alphaChain/alphaChain.c
index 45bc5c0..f76263d 100644
--- src/kehayden/alphaChain/alphaChain.c
+++ src/kehayden/alphaChain/alphaChain.c
@@ -396,122 +396,137 @@
 struct dlNode *node;
 for (node = list; !dlEnd(node); node = node->next)
     {
     struct wordInfo *info = node->val;
     wt = wordTreeFindInList(wt->children, info);
     verbose(2, "   wordTreeFindInList(%s) = %p %s\n", info->word, wt, wordTreeString(wt));
     if (wt == NULL || wt->children == NULL)
         break;
     }
 struct wordTree *result = NULL;
 if (wt != NULL && wt->children != NULL)
     result = pickRandom(wt->children);
 return result;
 }
 
-struct wordTree *predictNext(struct wordTree *wt, struct dlList *recent)
+struct wordTree *predictNext(struct wordTree *wt, struct dlNode *recent)
 /* Predict next word given tree and recently used word list.  Will use all words in
  * recent list if can,  but if there is not data in tree, will back off, and use
  * progressively less previous words until ultimately it just picks a random
  * word. */
 {
 struct dlNode *node;
 
-for (node = recent->head; !dlEnd(node); node = node->next)
+for (node = recent; !dlEnd(node); node = node->next)
     {
     struct wordTree *result = predictNextFromAllPredecessors(wt, node);
     if (result != NULL)
         return result;
     }
 struct wordTree *topLevel = pickRandom(wt->children);
-verbose(2, "in predictNext(%s, %s) ", wordTreeString(wt), dlListFragWords(recent->head));
+verbose(2, "in predictNext(%s, %s) ", wordTreeString(wt), dlListFragWords(recent));
 verbose(2, "last resort pick of %s\n", topLevel->info->word);
 return topLevel;
 }
 
 void decrementOutputCounts(struct wordTree *wt)
 /* Decrement output count of self and parents. */
 {
 while (wt != NULL)
     {
     /* Decrement target count, but don't let it fall below sum of counts of all children. 
      * This can happen with incomplete data if we don't prevent it.  This
      * same code also prevents us from having negative outTarget. */
     int outTarget = wt->outTarget - 1;
     int kidSum = wordTreeSumOutTargets(wt->children);
     if (outTarget < kidSum)
         outTarget = kidSum;
     wt->outTarget = outTarget;
 
     /* Always bump outCount for debugging. */
     wt->outCount += 1;
     wt = wt->parent;
     }
 }
 
-static void wordTreeGenerateFaux(struct wordStore *store, int maxSize, struct wordTree *firstWord, 
-	int maxOutputWords, char *fileName)
-/* Go spew out a bunch of words according to probabilities in tree. */
+static struct dlList *wordTreeGenerateList(struct wordStore *store, int maxSize, 
+    struct wordTree *firstWord, int maxOutputWords)
+/* Make a list of words based on probabilities in tree. */
 {
 struct wordTree *wt = store->markovChains;
-FILE *f = mustOpen(fileName, "w");
 struct dlList *ll = dlListNew();
-int listSize = 0;
+int chainSize = 0;
 int outputWords = 0;
+struct dlNode *chainStartNode = NULL;
 
 for (;;)
     {
     if (++outputWords > maxOutputWords)
         break;
     struct dlNode *node;
     struct wordTree *picked;
 
     /* Get next predicted word. */
-    if (listSize == 0)
-        {
 	AllocVar(node);
-	++listSize;
+    if (chainSize == 0)
+        {
+	chainStartNode = node;
+	++chainSize;
 	picked = firstWord;
 	}
-    else if (listSize >= maxSize)
+    else if (chainSize >= maxSize)
 	{
-	node = dlPopHead(ll);
-	picked = predictNext(wt, ll);
+	chainStartNode = chainStartNode->next;
+	picked = predictNext(wt, chainStartNode);
 	}
     else
 	{
-	picked = predictNext(wt, ll);
-	AllocVar(node);
-	++listSize;
+	picked = predictNext(wt, chainStartNode);
+	++chainSize;
 	}
 
     if (picked == NULL)
          break;
 
 
-    /* Add word from whatever level we fetched back to our chain of up to maxChainSize. */
+    /* Add word from whatever level we fetched back to our output chain. */
     node->val = picked->info;
     dlAddTail(ll, node);
 
-    fprintf(f, "%s\n", picked->info->word);
-
     decrementOutputCounts(picked);
     }
-dlListFree(&ll);
-carefulClose(&f);
 verbose(2, "totUseZeroCount = %d\n", totUseZeroCount);
+return ll;
+}
+
+static void wordTreeGenerateFile(struct wordStore *store, int maxSize, struct wordTree *firstWord, 
+	int maxOutputWords, char *fileName)
+/* Create file containing words base on tree probabilities.  The wordTreeGenerateList does
+ * most of work. */
+{
+struct dlList *ll = wordTreeGenerateList(store, maxSize, firstWord, maxOutputWords);
+FILE *f = mustOpen(fileName, "w");
+struct dlNode *node;
+for (node = ll->head; !dlEnd(node); node = node->next)
+    {
+    struct wordInfo *info = node->val;
+    fprintf(f, "%s\n", info->word);
 }
+carefulClose(&f);
+dlListFree(&ll);
+}
+
 
 static void wordTreeSort(struct wordTree *wt)
 /* Sort all children lists in tree. */
 {
 slSort(&wt->children, wordTreeCmpWord);
 struct wordTree *child;
 for (child = wt->children; child != NULL; child = child->next)
     wordTreeSort(child);
 }
 
 struct wordStore *wordStoreNew()
 /* Allocate and initialize a new word store. */
 {
 struct wordStore *store;
 AllocVar(store);
@@ -652,31 +667,31 @@
 
 void alphaChain(char *readsFile, char *monomerOrderFile, char *outFile)
 /* alphaChain - Create Markov chain of words and optionally output chain in two formats. */
 {
 struct wordStore *store = wordStoreForChainsInFile(readsFile, maxChainSize);
 struct wordTree *wt = store->markovChains;
 wordStoreLoadMonomerOrder(store, readsFile, monomerOrderFile);
 wordTreeNormalize(wt, outSize, 1.0);
 
 if (optionExists("chain"))
     {
     char *fileName = optionVal("chain", NULL);
     wordTreeWrite(wt, fileName);
     }
 
-wordTreeGenerateFaux(store, maxChainSize, pickRandom(wt->children), outSize, outFile);
+wordTreeGenerateFile(store, maxChainSize, pickRandom(wt->children), outSize, outFile);
 
 if (optionExists("afterChain"))
     {
     char *fileName = optionVal("afterChain", NULL);
     wordTreeWrite(wt, fileName);
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 maxChainSize = optionInt("size", maxChainSize);