c09c891ffe34df8d4980e13bfbdd3c3dc239a296 kate Fri May 15 08:11:15 2015 -0700 Add 'did you mean' suggestion to hubCheck settings check. Similarity test from JK based on axt alignment scoring. refs #10015 diff --git src/hg/lib/trackHubCheck.c src/hg/lib/trackHubCheck.c index 6ca105a..802c20e 100644 --- src/hg/lib/trackHubCheck.c +++ src/hg/lib/trackHubCheck.c @@ -1,51 +1,135 @@ #include "common.h" #include "dystring.h" #include "trackDb.h" #include "bigWig.h" #include "bigBed.h" #include "errCatch.h" #include "vcf.h" #include "hgBam.h" #include "net.h" #include "htmshell.h" #include "htmlPage.h" #include "trackHub.h" +#include "axt.h" #ifdef USE_HAL #include "halBlockViz.h" #endif + +/* Mini English spell-check using axt sequence alignment code! From JK + * Works in this context when thresholded high. */ + +static struct axtScoreScheme *scoreSchemeEnglish() +/* Return something that will match just English words more or less. */ +{ +struct axtScoreScheme *ss; +AllocVar(ss); +ss->gapOpen = 4; +ss->gapExtend = 2; + +/* Set up diagonal to match */ +int i; +for (i=0; i<256; ++i) + ss->matrix[i][i] = 2; + +/* Set up upper and lower case to match mostly */ +int caseDiff = 'A' - 'a'; +for (i='a'; i<='z'; ++i) + { + ss->matrix[i][i+caseDiff] = 1; + ss->matrix[i+caseDiff][i] = 1; + } +return ss; +} + + +static int scoreWordMatch(char *a, char *b, struct axtScoreScheme *ss) +/* Return alignment score of two words */ +{ +struct dnaSeq aSeq = { .name = "a", .dna = a, .size = strlen(a)}; +struct dnaSeq bSeq = { .name = "b", .dna = b, .size = strlen(b)}; +struct axt *axt = axtAffine(&aSeq, &bSeq, ss); +int result = 0; +if (axt != NULL) + { + result = axt->score; + axtFree(&axt); + } +return result; +} + + +static char *suggestSetting(char *setting, struct trackHubCheckOptions *options) +/* Suggest a similar word from settings lists. Suggest only if there is a single good match */ +{ +char *best; +int bestScore = 0; +int bestCount = 0; +struct slName *suggest; + +struct axtScoreScheme *ss = scoreSchemeEnglish(); +for (suggest = options->suggest; suggest != NULL; suggest = suggest->next) + { + int score = scoreWordMatch(setting, suggest->name, ss); + if (score < bestScore) + continue; + if (score > bestScore) + { + best = suggest->name; + bestScore = score; + bestCount = 1; + } + else + { + // same score + bestCount++; + } + } +if (bestCount == 1 && bestScore > 15) + { + verbose(3, "suggest %s score: %d\n", best, bestScore); + return best; + } +return NULL; +} + + static int hubCheckTrackSetting(struct trackHub *hub, struct trackDb *tdb, char *setting, struct trackHubCheckOptions *options, struct dyString *errors) /* Check trackDb setting to spec (by version and level). Returns non-zero if error, msg in errors */ { verbose(4, " Check setting '%s'\n", setting); /* skip internally added/used settings */ if (sameString(setting, "polished") || sameString(setting, "group")) return 0; /* check setting is in extra file of supported settings */ if (options->extra && hashLookup(options->extra, setting)) return 0; /* check setting is supported in this version */ struct trackHubSetting *hubSetting = hashFindVal(options->settings, setting); if (hubSetting == NULL) { - dyStringPrintf(errors, "Setting '%s' is unknown/unsupported\n", setting); + dyStringPrintf(errors, "Setting '%s' is unknown/unsupported", setting); + char *suggest = suggestSetting(setting, options); + if (suggest != NULL) + dyStringPrintf(errors, " (did you mean '%s' ?) ", suggest); + dyStringPrintf(errors, "\n"); return 1; } if (!options->strict) return 0; // check level if (differentString(hubSetting->level, "core")) { dyStringPrintf(errors, "Setting '%s' is level '%s'\n", setting, hubSetting->level); return 1; } return 0; } @@ -362,32 +446,48 @@ else if (differentString(hub->level, "all")) { dyStringPrintf(errors, "Unknown hub support level: %s (expecting 'core' or 'all'). Defaulting to 'all'.\n", hub->level); retVal = 1; } } verbose(2, "Checking hub '%s'%s\n", hub->longLabel, options->strict ? " for compliance to 'core' (use -settings to view)": ""); errCatch = errCatchNew(); if (errCatchStart(errCatch)) { /* make hash of settings for this version, saving in options */ struct trackHubSetting *setting, *settings = trackHubSettingsForVersion(options->version); options->settings = newHash(0); + options->suggest = NULL; for (setting = settings; setting != NULL; setting = setting->next) + { hashAdd(options->settings, setting->name, setting); + slNameAddHead(&options->suggest, setting->name); + } + /* TODO: ? also need to check settings not in this list (other tdb fields) */ + + // TODO: move extra file handling here (out of hubCheck) + if (options->extra != NULL) + { + struct hashCookie cookie = hashFirst(options->extra); + struct hashEl *hel; + while ((hel = hashNext(&cookie)) != NULL) + slNameAddHead(&options->suggest, hel->name); + } + slNameSort(&options->suggest); + verbose(3, "Suggest list has %d settings\n", slCount(options->suggest)); } errCatchEnd(errCatch); if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); struct trackHubGenome *genome; for (genome = hub->genomeList; genome != NULL; genome = genome->next) { retVal |= hubCheckGenome(hub, genome, options, errors); } trackHubClose(&hub);