957b597719175a909ec1366cd1415892d51e5400 kent Thu Apr 4 08:59:08 2013 -0700 Adding use count for chain and monomer to between tag. diff --git src/kehayden/alphaAsm/alphaAsm.c src/kehayden/alphaAsm/alphaAsm.c index d338108..5c1b7c1 100644 --- src/kehayden/alphaAsm/alphaAsm.c +++ src/kehayden/alphaAsm/alphaAsm.c @@ -251,84 +251,92 @@ } void addChainToTree(struct wordTree *wt, struct dlList *chain) /* Add chain of words to tree. */ { struct dlNode *node; wt->useCount += 1; for (node = chain->head; !dlEnd(node); node = node->next) { struct monomer *monomer = node->val; verbose(4, " adding %s\n", monomer->word); wt = wordTreeAddFollowing(wt, monomer); } } -boolean gotMatchInTree(struct wordTree *wt, struct dlNode *nodeList, int chainSize) -/* Return TRUE if find node list in tree*/ +int useCountInTree(struct wordTree *wt, struct dlNode *nodeList, int chainSize) +/* Return number of times chainSize successive nodes from nodeList are found + * in wt, 0 if not found. */ { int i; struct wordTree *subTree = wt; struct dlNode *node = nodeList; for (i=0; i<chainSize; ++i) { struct monomer *monomer = node->val; subTree = wordTreeFindInList(subTree->children, monomer); if (subTree == NULL) return FALSE; node = node->next; } -return TRUE; +return subTree->useCount; } void findLongestSupportingMarkovChain(struct wordTree *wt, struct dlNode *node, int *retChainSize, int *retReadCount) /* See if chain of words is in tree tree. */ { struct dlNode *start = node; int chainSize = 1; +int readCount = 0; for (;;) { - if (!gotMatchInTree(wt, start, chainSize)) + int useCount = useCountInTree(wt, start, chainSize); + if (useCount == 0) break; + readCount = useCount; chainSize += 1; start = start->prev; if (dlStart(start)) break; } *retChainSize = chainSize; -*retReadCount = 0; // Not implemented. +*retReadCount = readCount; } static void writeMonomerListAndBetweens(struct alphaStore *store, char *fileName, struct dlList *ll) /* Write out monomer list to file. */ { FILE *f = mustOpen(fileName, "w"); struct dlNode *node; +struct wordTree *origTree = store->markovChainsNoOrphans; for (node = ll->head; !dlEnd(node); node = node->next) { struct monomer *monomer = node->val; if (betweens) { int chainSize = 0, readCount = 0; - findLongestSupportingMarkovChain(store->markovChainsNoOrphans, node, + findLongestSupportingMarkovChain(origTree, node, &chainSize, &readCount); /* The -2 is for 1 extra for the empty tree root, and 1 extra to get * from chain-size to markov model terminology. */ - fprintf(f, "<%d> ", chainSize-2); + char between[24]; + safef(between, sizeof(between), "<%d:%d:%d>", chainSize-2, + readCount, useCountInTree(origTree, node, 1)); + fprintf(f, "%-11s\t", between); } fprintf(f, "%s\n", monomer->word); } carefulClose(&f); } int wordTreeAddPseudoCount(struct wordTree *wt, int pseudo) /* Add pseudo to all leaves of tree and propagate counts up to parents. */ { if (wt->children == NULL) { wt->useCount += pseudo; return wt->useCount; }