src/kehayden/alphaAsm/alphaAsm.c 14ac01d846c85a19424cda9594535ffbf2f6bfdb

14ac01d846c85a19424cda9594535ffbf2f6bfdb
kent
  Wed Nov 28 13:34:06 2012 -0800
Adding some more error checking and diagnostic messages.
diff --git src/kehayden/alphaAsm/alphaAsm.c src/kehayden/alphaAsm/alphaAsm.c
index 4ff40a7..08f860f 100644
--- src/kehayden/alphaAsm/alphaAsm.c
+++ src/kehayden/alphaAsm/alphaAsm.c
@@ -574,31 +574,31 @@
 verbose(2, "predictFromPreviousTypes past all unknown types\n");
 return NULL;
 }
 
 
 struct wordTree *predictNext(struct alphaStore *store, struct dlList *past)
 /* Given input data store and what is known from the past, predict the next word. */
 {
 struct dlNode *recent = nodesFromTail(past, store->maxChainSize);
 struct wordTree *pick =  predictFromWordTree(store->markovChains, recent);
 if (pick == NULL)
     pick = predictFromPreviousTypes(store, past);
 if (pick == NULL)
     {
     pick = pickWeightedRandomFromList(store->markovChains->children);
-    warn("in predictNext() last resort pick of %s", pick->monomer->word);
+    verbose(2, "in predictNext() last resort pick of %s\n", pick->monomer->word);
     }
 return pick;
 }
 
 void decrementOutputCountsInTree(struct wordTree *wt)
 /* Decrement output count of self and parents. */
 {
 while (wt != NULL)
     {
     /* Decrement target count, but don't let it fall below sum of counts of all children. 
      * This can happen with incomplete data if we don't prevent it.  This
      * same code also prevents us from having negative outTarget. */
     int outTarget = wt->outTarget - 1;
     int kidSum = wordTreeSumOutTargets(wt->children);
     if (outTarget < kidSum)
@@ -927,46 +927,48 @@
 	    }
 	}
     }
 
 /* Ok, now have to just manufacture other side of orphan starts out of thin air. */
 for (start = orphanStarts; start != NULL; start = start->next)
     {
     if (start->paired)
         continue;
     struct monomer *startMono = start->monomer;
     struct monomerType *startType = startMono->type;
     if (startType == NULL)
         continue;
 
     struct monomerType *newType = typeBefore(store, startType, 1);
+    verbose(2, "Trying to find end of type %s\n", newType->name);
     struct monomer *newMono = pickRandomFromType(newType);
     addReadOfTwo(store, newMono, startMono);
     verbose(2, "Pairing new %s with start %s\n", newMono->word, startMono->word);
     }
 
 /* Ok, now have to just manufacture other side of orphan ends out of thin air. */
 for (end = orphanEnds; end != NULL; end = end->next)
     {
     if (end->paired)
         continue;
     struct monomer *endMono = end->monomer;
     struct monomerType *endType = endMono->type;
     if (endType == NULL)
         continue;
 
     struct monomerType *newType = typeAfter(store, endType, 1);
+    verbose(2, "Trying to find start of type %s\n", newType->name);
     struct monomer *newMono = pickRandomFromType(newType);
     addReadOfTwo(store, endMono, newMono);
     verbose(2, "Pairing end %s with new %s\n", endMono->word, newMono->word);
     }
 }
 
 void makeMarkovChains(struct alphaStore *store)
 /* Return a alphaStore containing all words, and also all chains-of-words of length 
  * chainSize seen in file.  */
 {
 /* We'll build up the tree starting with an empty root node. */
 struct wordTree *wt = store->markovChains = wordTreeNew(alphaStoreAddMonomer(store, ""));	
 int chainSize = store->maxChainSize;
 
 /* Loop through each read. There's special cases at the beginning and end of read, and for 
@@ -1055,30 +1057,33 @@
     struct monomerType *type;
     AllocVar(type);
     type->name = cloneString(name);
     slAddHead(&store->typeList, type);
     while ((word = nextWord(&line)) != NULL)
         {
 	struct monomer *monomer = hashFindVal(store->monomerHash, word);
 	if (monomer == NULL)
 	    errAbort("%s is in %s but not %s", word, lf->fileName, readsFile);
 	struct monomerRef *ref;
 	AllocVar(ref);
 	ref->val = monomer;
 	slAddHead(&type->list, ref);
 	hashAddUnique(store->typeHash, word, type);
 	}
+    if (type->list == NULL)
+        errAbort("Short line %d of %s.  Format should be:\ntype list-of-monomers-of-type\n",
+	    lf->lineIx, lf->fileName);
     }
 slReverse(&store->typeList);
 lineFileClose(&lf);
 hashFree(&uniq);
 verbose(2, "Added %d types containing %d words from %s\n", 
     slCount(store->typeList), store->typeHash->elCount, fileName);
 
 /* Create type array */
 store->typeCount = slCount(store->typeList);
 struct monomerType **types = AllocArray(store->typeArray, store->typeCount);
 struct monomerType *type;
 int i;
 for (i=0, type = store->typeList; i<store->typeCount; ++i, type = type->next)
     types[i] = type;
 }