14ac01d846c85a19424cda9594535ffbf2f6bfdb kent Wed Nov 28 13:34:06 2012 -0800 Adding some more error checking and diagnostic messages. diff --git src/kehayden/alphaAsm/alphaAsm.c src/kehayden/alphaAsm/alphaAsm.c index 4ff40a7..08f860f 100644 --- src/kehayden/alphaAsm/alphaAsm.c +++ src/kehayden/alphaAsm/alphaAsm.c @@ -574,31 +574,31 @@ verbose(2, "predictFromPreviousTypes past all unknown types\n"); return NULL; } struct wordTree *predictNext(struct alphaStore *store, struct dlList *past) /* Given input data store and what is known from the past, predict the next word. */ { struct dlNode *recent = nodesFromTail(past, store->maxChainSize); struct wordTree *pick = predictFromWordTree(store->markovChains, recent); if (pick == NULL) pick = predictFromPreviousTypes(store, past); if (pick == NULL) { pick = pickWeightedRandomFromList(store->markovChains->children); - warn("in predictNext() last resort pick of %s", pick->monomer->word); + verbose(2, "in predictNext() last resort pick of %s\n", pick->monomer->word); } return pick; } void decrementOutputCountsInTree(struct wordTree *wt) /* Decrement output count of self and parents. */ { while (wt != NULL) { /* Decrement target count, but don't let it fall below sum of counts of all children. * This can happen with incomplete data if we don't prevent it. This * same code also prevents us from having negative outTarget. */ int outTarget = wt->outTarget - 1; int kidSum = wordTreeSumOutTargets(wt->children); if (outTarget < kidSum) @@ -927,46 +927,48 @@ } } } /* Ok, now have to just manufacture other side of orphan starts out of thin air. */ for (start = orphanStarts; start != NULL; start = start->next) { if (start->paired) continue; struct monomer *startMono = start->monomer; struct monomerType *startType = startMono->type; if (startType == NULL) continue; struct monomerType *newType = typeBefore(store, startType, 1); + verbose(2, "Trying to find end of type %s\n", newType->name); struct monomer *newMono = pickRandomFromType(newType); addReadOfTwo(store, newMono, startMono); verbose(2, "Pairing new %s with start %s\n", newMono->word, startMono->word); } /* Ok, now have to just manufacture other side of orphan ends out of thin air. */ for (end = orphanEnds; end != NULL; end = end->next) { if (end->paired) continue; struct monomer *endMono = end->monomer; struct monomerType *endType = endMono->type; if (endType == NULL) continue; struct monomerType *newType = typeAfter(store, endType, 1); + verbose(2, "Trying to find start of type %s\n", newType->name); struct monomer *newMono = pickRandomFromType(newType); addReadOfTwo(store, endMono, newMono); verbose(2, "Pairing end %s with new %s\n", endMono->word, newMono->word); } } void makeMarkovChains(struct alphaStore *store) /* Return a alphaStore containing all words, and also all chains-of-words of length * chainSize seen in file. */ { /* We'll build up the tree starting with an empty root node. */ struct wordTree *wt = store->markovChains = wordTreeNew(alphaStoreAddMonomer(store, "")); int chainSize = store->maxChainSize; /* Loop through each read. There's special cases at the beginning and end of read, and for @@ -1055,30 +1057,33 @@ struct monomerType *type; AllocVar(type); type->name = cloneString(name); slAddHead(&store->typeList, type); while ((word = nextWord(&line)) != NULL) { struct monomer *monomer = hashFindVal(store->monomerHash, word); if (monomer == NULL) errAbort("%s is in %s but not %s", word, lf->fileName, readsFile); struct monomerRef *ref; AllocVar(ref); ref->val = monomer; slAddHead(&type->list, ref); hashAddUnique(store->typeHash, word, type); } + if (type->list == NULL) + errAbort("Short line %d of %s. Format should be:\ntype list-of-monomers-of-type\n", + lf->lineIx, lf->fileName); } slReverse(&store->typeList); lineFileClose(&lf); hashFree(&uniq); verbose(2, "Added %d types containing %d words from %s\n", slCount(store->typeList), store->typeHash->elCount, fileName); /* Create type array */ store->typeCount = slCount(store->typeList); struct monomerType **types = AllocArray(store->typeArray, store->typeCount); struct monomerType *type; int i; for (i=0, type = store->typeList; i<store->typeCount; ++i, type = type->next) types[i] = type; }