c09c891ffe34df8d4980e13bfbdd3c3dc239a296
kate
  Fri May 15 08:11:15 2015 -0700
Add 'did you mean' suggestion to hubCheck settings check. Similarity test from JK based on axt alignment scoring. refs #10015

diff --git src/hg/lib/trackHubCheck.c src/hg/lib/trackHubCheck.c
index 6ca105a..802c20e 100644
--- src/hg/lib/trackHubCheck.c
+++ src/hg/lib/trackHubCheck.c
@@ -1,51 +1,135 @@
 #include "common.h"
 #include "dystring.h"
 #include "trackDb.h"
 #include "bigWig.h"
 #include "bigBed.h"
 #include "errCatch.h"
 #include "vcf.h"
 #include "hgBam.h"
 #include "net.h"
 #include "htmshell.h"
 #include "htmlPage.h"
 #include "trackHub.h"
+#include "axt.h"
 
 #ifdef USE_HAL
 #include "halBlockViz.h"
 #endif
 
+
+/* Mini English spell-check using axt sequence alignment code!  From JK
+ * Works in this context when thresholded high.  */
+
+static struct axtScoreScheme *scoreSchemeEnglish()
+/* Return something that will match just English words more or less. */
+{
+struct axtScoreScheme *ss;
+AllocVar(ss);
+ss->gapOpen = 4;
+ss->gapExtend = 2;
+
+/* Set up diagonal to match */
+int i;
+for (i=0; i<256; ++i)
+    ss->matrix[i][i] = 2;
+
+/* Set up upper and lower case to match mostly */
+int caseDiff = 'A' - 'a';
+for (i='a'; i<='z'; ++i)
+    {
+    ss->matrix[i][i+caseDiff] = 1;
+    ss->matrix[i+caseDiff][i] = 1;
+    }
+return ss;
+}
+
+
+static int scoreWordMatch(char *a, char *b, struct axtScoreScheme *ss)
+/* Return alignment score of two words */
+{
+struct dnaSeq aSeq = { .name = "a", .dna = a, .size = strlen(a)};
+struct dnaSeq bSeq = { .name = "b", .dna = b, .size = strlen(b)};
+struct axt *axt = axtAffine(&aSeq, &bSeq, ss);
+int result = 0;
+if (axt != NULL)
+    {
+    result = axt->score;
+    axtFree(&axt);
+    }
+return result;
+}
+
+
+static char *suggestSetting(char *setting, struct trackHubCheckOptions *options)
+/* Suggest a similar word from settings lists.  Suggest only if there is a single good match */
+{
+char *best;
+int bestScore = 0;
+int bestCount = 0;
+struct slName *suggest;
+
+struct axtScoreScheme *ss = scoreSchemeEnglish();
+for (suggest = options->suggest; suggest != NULL; suggest = suggest->next)
+    {
+    int score = scoreWordMatch(setting, suggest->name, ss);
+    if (score < bestScore)
+        continue;
+    if (score > bestScore)
+        {
+        best = suggest->name;
+        bestScore = score;
+        bestCount = 1;
+        }
+    else
+        {
+        // same score
+        bestCount++;
+        }
+    }
+if (bestCount == 1 && bestScore > 15)
+    {
+    verbose(3, "suggest %s score: %d\n", best, bestScore);
+    return best;
+    }
+return NULL;
+}
+
+
 static int hubCheckTrackSetting(struct trackHub *hub, struct trackDb *tdb, char *setting, 
                                 struct trackHubCheckOptions *options, struct dyString *errors)
 /* Check trackDb setting to spec (by version and level). Returns non-zero if error, msg in errors */
 {
 verbose(4, "    Check setting '%s'\n", setting);
 
 /* skip internally added/used settings */
 if (sameString(setting, "polished") || sameString(setting, "group"))
     return 0;
 
 /* check setting is in extra file of supported settings */
 if (options->extra && hashLookup(options->extra, setting))
         return 0;
 
 /* check setting is supported in this version */
 struct trackHubSetting *hubSetting = hashFindVal(options->settings, setting);
 if (hubSetting == NULL)
     {
-    dyStringPrintf(errors, "Setting '%s' is unknown/unsupported\n", setting);
+    dyStringPrintf(errors, "Setting '%s' is unknown/unsupported", setting);
+    char *suggest = suggestSetting(setting, options);
+    if (suggest != NULL)
+        dyStringPrintf(errors, " (did you mean '%s' ?) ", suggest);
+    dyStringPrintf(errors, "\n");
     return 1;
     }
 
 if (!options->strict)
     return 0;
 
 // check level
 if (differentString(hubSetting->level, "core"))
     {
     dyStringPrintf(errors, 
                 "Setting '%s' is level '%s'\n", setting, hubSetting->level);
     return 1;
     }
 return 0;
 }
@@ -362,32 +446,48 @@
     else if (differentString(hub->level, "all"))
         {
         dyStringPrintf(errors, 
             "Unknown hub support level: %s (expecting 'core' or 'all'). Defaulting to 'all'.\n", hub->level);
         retVal = 1;
         }
     }
 verbose(2, "Checking hub '%s'%s\n", hub->longLabel, options->strict ? " for compliance to 'core' (use -settings to view)": "");
 
 errCatch = errCatchNew();
 if (errCatchStart(errCatch))
     {
     /* make hash of settings for this version, saving in options */
     struct trackHubSetting *setting, *settings = trackHubSettingsForVersion(options->version);
     options->settings = newHash(0);
+    options->suggest = NULL;
     for (setting = settings; setting != NULL; setting = setting->next)
+        {
         hashAdd(options->settings, setting->name, setting);
+        slNameAddHead(&options->suggest, setting->name);
+        }
+    /* TODO: ? also need to check settings not in this list (other tdb fields) */
+
+    // TODO: move extra file handling here (out of hubCheck)
+    if (options->extra != NULL)
+        {
+        struct hashCookie cookie = hashFirst(options->extra);
+        struct hashEl *hel;
+        while ((hel = hashNext(&cookie)) != NULL)
+            slNameAddHead(&options->suggest, hel->name);
+        }
+    slNameSort(&options->suggest);
+    verbose(3, "Suggest list has %d settings\n", slCount(options->suggest));
     }
 errCatchEnd(errCatch);
 if (errCatch->gotError)
     {
     retVal = 1;
     dyStringPrintf(errors, "%s", errCatch->message->string);
     }
 errCatchFree(&errCatch);
 
 struct trackHubGenome *genome;
 for (genome = hub->genomeList; genome != NULL; genome = genome->next)
     {
     retVal |= hubCheckGenome(hub, genome, options, errors);
     }
 trackHubClose(&hub);