e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/utils/bestThreshold/bestThreshold.c src/utils/bestThreshold/bestThreshold.c
index 9975b0d..4fce0af 100644
--- src/utils/bestThreshold/bestThreshold.c
+++ src/utils/bestThreshold/bestThreshold.c
@@ -1,141 +1,144 @@
 /* bestThreshold - Given a set of numbers for positive examples and one 
  * for negative examples, figure out threshold that best separates the 
  * two sets. */
+
+/* Copyright (C) 2011 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 
 
 double minDelta = 0.001;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bestThreshold - Given a set of numbers for positive examples and one for\n"
   "negative examples, figure out threshold that best separates the two sets.\n"
   "usage:\n"
   "   bestThreshold good bad\n"
   "options:\n"
   "   -xxx=XXX\n"
   );
 }
 
 static struct optionSpec options[] = {
    {NULL, 0},
 };
 
 int calcErrsAtThreshold(struct slDouble *goodList, struct slDouble *badList, double threshold)
 /* Figure out how many threshold will misclassify. */
 {
 struct slDouble *x;
 int errors = 0;
 for (x=goodList; x != NULL; x = x->next)
     if (x->val < threshold)
         ++errors;
 for (x=badList; x != NULL; x = x->next)
     if (x->val >= threshold)
         ++errors;
 return errors;
 }
 
 struct slDouble *readNumFile(char *fileName)
 /* Read in number file */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[2];
 int wordCount;
 struct slDouble *list = NULL, *el;
 for (;;)
     {
     wordCount = lineFileChop(lf, row);
     if (wordCount == 0)
          break;
     if (wordCount > 1)
         errAbort("Just expecting one number per line in %s", fileName);
     double x = lineFileNeedDouble(lf, row, 0);
     el = slDoubleNew(x);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 double calcMin(struct slDouble *list)
 /* Figure min value. */
 {
 double minVal = list->val;
 struct slDouble *el;
 for (el = list->next; el != NULL; el = el->next)
     minVal = min(minVal, el->val);
 return minVal;
 }
 
 double calcMax(struct slDouble *list)
 /* Figure max value. */
 {
 double maxVal = list->val;
 struct slDouble *el;
 for (el = list->next; el != NULL; el = el->next)
     maxVal = max(maxVal, el->val);
 return maxVal;
 }
 
 void bestThreshold(char *goodFile, char *badFile)
 /* bestThreshold - Given a set of numbers for positive examples and one 
  * for negative examples, figure out threshold that best separates the 
  * two sets. */
 {
 struct slDouble *goodList = readNumFile(goodFile);
 struct slDouble *badList = readNumFile(badFile);
 if (goodList == NULL)
     errAbort("%s is empty", goodFile);
 if (badList == NULL)
     errAbort("%s is empty", badFile);
 int total = slCount(goodList) + slCount(badList);
 double goodMin = calcMin(goodList), badMin = calcMin(badList);
 double goodMax = calcMax(goodList), badMax = calcMax(badList);
 double minVal = min(goodMin, badMin);
 double maxVal = max(goodMax, badMax);
 
 double threshold = (minVal + maxVal)/2;
 int errors = calcErrsAtThreshold(goodList, badList, threshold);
 double delta = threshold;
 
 verbose(2, "min %g, max %g, threshold %g, errors %d, accuracy %5.3f%%\n", 
 	minVal, maxVal, threshold, errors, 100 - 100.0*errors/total);
 while (delta > minDelta)
     {
     double threshPlus = threshold + delta;
     double threshMinus = threshold - delta;
     int errPlus = calcErrsAtThreshold(goodList, badList, threshPlus);
     int errMinus = calcErrsAtThreshold(goodList, badList, threshMinus);
     if (errPlus < errors)
          {
 	 threshold = threshPlus;
 	 errors = errPlus;
 	 }
     else if (errMinus < errors)
          {
 	 threshold = threshMinus;
 	 errors = errMinus;
 	 }
     delta *= 0.5;
     verbose(2, "delta %g, threshold %g, errors %d, accuracy %5.3f%%\n", 
 	    delta, threshold, errors, 100 - 100.0*errors/total);
     }
 printf("Threshold %g, errors %d, total %d, accuracy %5.3f%%\n",
 	threshold, errors, total, 100 - 100.0*errors/total);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 bestThreshold(argv[1], argv[2]);
 return 0;
 }