2f75f45421c4854066f7572ac8a9f52e09068f29
kate
  Fri May 22 15:06:37 2015 -0700
1. Add -test option to use genome-test spec file.  2. Remove unneeded errCatch (from code review).  3. Simplify code org (delibify unshared code); bye to lib/trackHubCheck.c. refs #10015

diff --git src/hg/lib/trackHubCheck.c src/hg/lib/trackHubCheck.c
deleted file mode 100644
index 59225bb..0000000
--- src/hg/lib/trackHubCheck.c
+++ /dev/null
@@ -1,494 +0,0 @@
-#include "common.h"
-#include "dystring.h"
-#include "trackDb.h"
-#include "bigWig.h"
-#include "bigBed.h"
-#include "errCatch.h"
-#include "vcf.h"
-#include "hgBam.h"
-#include "net.h"
-#include "htmshell.h"
-#include "htmlPage.h"
-#include "trackHub.h"
-#include "axt.h"
-
-#ifdef USE_HAL
-#include "halBlockViz.h"
-#endif
-
-
-/* Mini English spell-check using axt sequence alignment code!  From JK
- * Works in this context when thresholded high.  */
-
-static struct axtScoreScheme *scoreSchemeEnglish()
-/* Return something that will match just English words more or less. */
-{
-struct axtScoreScheme *ss;
-AllocVar(ss);
-ss->gapOpen = 4;
-ss->gapExtend = 2;
-
-/* Set up diagonal to match */
-int i;
-for (i=0; i<256; ++i)
-    ss->matrix[i][i] = 2;
-
-/* Set up upper and lower case to match mostly */
-int caseDiff = 'A' - 'a';
-for (i='a'; i<='z'; ++i)
-    {
-    ss->matrix[i][i+caseDiff] = 1;
-    ss->matrix[i+caseDiff][i] = 1;
-    }
-return ss;
-}
-
-
-static int scoreWordMatch(char *a, char *b, struct axtScoreScheme *ss)
-/* Return alignment score of two words */
-{
-struct dnaSeq aSeq = { .name = "a", .dna = a, .size = strlen(a)};
-struct dnaSeq bSeq = { .name = "b", .dna = b, .size = strlen(b)};
-struct axt *axt = axtAffine(&aSeq, &bSeq, ss);
-int result = 0;
-if (axt != NULL)
-    {
-    result = axt->score;
-    axtFree(&axt);
-    }
-return result;
-}
-
-
-static char *suggestSetting(char *setting, struct trackHubCheckOptions *options)
-/* Suggest a similar word from settings lists.  Suggest only if there is a single good match */
-{
-char *best;
-int bestScore = 0;
-int bestCount = 0;
-struct slName *suggest;
-
-struct axtScoreScheme *ss = scoreSchemeEnglish();
-for (suggest = options->suggest; suggest != NULL; suggest = suggest->next)
-    {
-    int score = scoreWordMatch(setting, suggest->name, ss);
-    if (score < bestScore)
-        continue;
-    if (score > bestScore)
-        {
-        best = suggest->name;
-        bestScore = score;
-        bestCount = 1;
-        }
-    else
-        {
-        // same score
-        bestCount++;
-        }
-    }
-if (bestCount == 1 && bestScore > 15)
-    {
-    verbose(3, "suggest %s score: %d\n", best, bestScore);
-    return best;
-    }
-return NULL;
-}
-
-
-static int hubCheckTrackSetting(struct trackHub *hub, struct trackDb *tdb, char *setting, 
-                                struct trackHubCheckOptions *options, struct dyString *errors)
-/* Check trackDb setting to spec (by version and level). Returns non-zero if error, msg in errors */
-{
-verbose(4, "    Check setting '%s'\n", setting);
-
-int retVal = 0;
-/* skip internally added/used settings */
-if (sameString(setting, "polished") || sameString(setting, "group"))
-    return 0;
-
-/* check setting is in extra file of supported settings */
-if (options->extra && hashLookup(options->extra, setting))
-        return 0;
-
-struct errCatch *errCatch = errCatchNew();
-if (errCatchStart(errCatch))
-    {
-    /* check setting is supported in this version */
-    struct trackHubSetting *hubSetting = hashFindVal(options->settings, setting);
-    if (hubSetting == NULL)
-        {
-        struct dyString *ds = dyStringNew(0);
-        dyStringPrintf(ds, "Setting '%s' is unknown/unsupported", setting);
-        char *suggest = suggestSetting(setting, options);
-        if (suggest != NULL)
-            dyStringPrintf(ds, " (did you mean '%s' ?) ", suggest);
-        errAbort("%s", dyStringCannibalize(&ds));
-        }
-
-    // check level
-    if (options->strict && differentString(hubSetting->level, "core"))
-        errAbort( "Setting '%s' is level '%s'", setting, hubSetting->level);
-    }
-errCatchEnd(errCatch);
-if (errCatch->gotError)
-    {
-    dyStringPrintf(errors, "%s", errCatch->message->string);
-    retVal = 1;
-    }
-errCatchFree(&errCatch);
-return retVal;
-}
-
-
-static void hubCheckTrackFile(struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdb)
-/* Check remote file exists and is of correct type. Wrap this in error catcher */
-{
-char *relativeUrl = trackDbSetting(tdb, "bigDataUrl");
-if (relativeUrl != NULL)
-    {
-    char *type = trackDbRequiredSetting(tdb, "type");
-    char *bigDataUrl = trackHubRelativeUrl(genome->trackDbFile, relativeUrl);
-    verbose(2, "checking %s.%s type %s at %s\n", genome->name, tdb->track, type, bigDataUrl);
-    if (startsWithWord("bigWig", type))
-        {
-        /* Just open and close to verify file exists and is correct type. */
-        struct bbiFile *bbi = bigWigFileOpen(bigDataUrl);
-        bbiFileClose(&bbi);
-        }
-    else if (startsWithWord("bigBed", type) || startsWithWord("bigGenePred", type))
-        {
-        /* Just open and close to verify file exists and is correct type. */
-        struct bbiFile *bbi = bigBedFileOpen(bigDataUrl);
-        char *typeString = cloneString(type);
-        nextWord(&typeString);
-        if (typeString != NULL)
-            {
-            unsigned numFields = sqlUnsigned(nextWord(&typeString));
-            if (numFields > bbi->fieldCount)
-                errAbort("fewer fields in bigBed (%d) than in type statement (%d) for track %s with bigDataUrl %s", bbi->fieldCount, numFields, trackHubSkipHubName(tdb->track), bigDataUrl);
-            }
-        bbiFileClose(&bbi);
-        }
-    else if (startsWithWord("vcfTabix", type))
-        {
-        /* Just open and close to verify file exists and is correct type. */
-        struct vcfFile *vcf = vcfTabixFileMayOpen(bigDataUrl, NULL, 0, 0, 1, 1);
-        if (vcf == NULL)
-            // Warnings already indicated whether the tabix file is missing etc.
-            errAbort("Couldn't open %s and/or its tabix index (.tbi) file.  "
-                     "See http://genome.ucsc.edu/goldenPath/help/vcf.html",
-                     bigDataUrl);
-        vcfFileFree(&vcf);
-        }
-    else if (startsWithWord("bam", type))
-        {
-        bamFileAndIndexMustExist(bigDataUrl);
-        }
-#ifdef USE_HAL
-    else if (startsWithWord("halSnake", type))
-        {
-        char *errString;
-        int handle = halOpenLOD(bigDataUrl, &errString);
-        if (handle < 0)
-            errAbort("HAL open error: %s", errString);
-        if (halClose(handle, &errString) < 0)
-            errAbort("HAL close error: %s", errString);
-        }
-#endif
-    else
-        errAbort("unrecognized type %s in genome %s track %s", type, genome->name, tdb->track);
-    freez(&bigDataUrl);
-    }
-}
-
-
-static int hubCheckTrack(struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdb, 
-                        struct trackHubCheckOptions *options, struct dyString *errors)
-/* Check track settings and optionally, files */
-{
-int retVal = 0;
-
-if (options->checkSettings && options->settings)
-    {
-    //verbose(3, "Found %d settings to check to spec\n", slCount(settings));
-    verbose(3, "Checking track: %s\n", tdb->shortLabel);
-    verbose(3, "Found %d settings to check to spec\n", hashNumEntries(tdb->settingsHash));
-    struct hashEl *hel;
-    struct hashCookie cookie = hashFirst(tdb->settingsHash);
-    while ((hel = hashNext(&cookie)) != NULL)
-        retVal |= hubCheckTrackSetting(hub, tdb, hel->name, options, errors);
-    /* TODO: ? also need to check settings not in this list (other tdb fields) */
-    }
-
-if (!options->checkFiles)
-    return retVal;
-
-struct errCatch *errCatch = errCatchNew();
-if (errCatchStart(errCatch))
-    {
-    hubCheckTrackFile(hub, genome, tdb);
-    }
-errCatchEnd(errCatch);
-if (errCatch->gotError)
-    {
-    retVal = 1;
-    dyStringPrintf(errors, "%s", errCatch->message->string);
-    }
-errCatchFree(&errCatch);
-
-return retVal;
-}
-
-
-static int hubCheckGenome(struct trackHub *hub, struct trackHubGenome *genome,
-                struct trackHubCheckOptions *options, struct dyString *errors)
-/* Check out genome within hub. */
-{
-struct errCatch *errCatch = errCatchNew();
-struct trackDb *tdbList = NULL;
-int retVal = 0;
-
-if (errCatchStart(errCatch))
-    {
-    tdbList = trackHubTracksForGenome(hub, genome);
-    trackHubPolishTrackNames(hub, tdbList);
-    }
-errCatchEnd(errCatch);
-if (errCatch->gotError)
-    {
-    retVal = 1;
-    dyStringPrintf(errors, "%s", errCatch->message->string);
-    }
-errCatchFree(&errCatch);
-
-verbose(2, "%d tracks in %s\n", slCount(tdbList), genome->name);
-struct trackDb *tdb;
-for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
-    {
-    retVal |= hubCheckTrack(hub, genome, tdb, options, errors);
-    }
-
-return retVal;
-}
-
-
-char *trackHubVersionDefault()
-/* Return current version of trackDb settings spec for hubs */
-{
-// TODO: get from goldenPath/help/trackDb/trackDbHub.current.html
-    return "v0";  // minor rev to v1a, etc.
-}
-
-
-int trackHubSettingLevel(struct trackHubSetting *spec)
-/* Get integer for level  (core > full > new > deprecated) */
-{
-if (sameString(spec->level, "core"))
-    return 4;
-if (sameString(spec->level, "full"))
-    return 3;
-if (sameString(spec->level, "new"))
-    return 2;
-if (sameString(spec->level, "deprecated"))
-    return 1;
-return 0; // errAbort ?
-}
-
-
-boolean trackHubSettingLevelCmp(struct trackHubSetting *spec1, struct trackHubSetting *spec2)
-{
-/* Compare setting levels */
-return trackHubSettingLevel(spec1) - trackHubSettingLevel(spec2);
-}
-
-
-struct trackHubSetting *trackHubSettingsForVersion(char *version)
-/* Return list of settings with support level. Version can be version string or spec url */
-{
-if (version == NULL)
-    version = trackHubVersionDefault();
-char *specUrl;
-if (startsWith("http", version))
-    specUrl = version;
-else
-    {
-    char buf[256];
-    char *specHost = "genome.ucsc.edu";
-    safef(buf, sizeof buf, "http://%s/goldenPath/help/trackDb/trackDbHub.%s.html", 
-                        specHost, version);
-    specUrl = buf;
-    }
-verbose(2, "Validating to spec at %s\n", specUrl);
-struct htmlPage *page = htmlPageGet(specUrl);
-if (page == NULL)
-    errAbort("Can't open hub settings spec %s", specUrl);
-
-//TODO: apply page validator
-//htmlPageValidateOrAbort(page);  // would like to use this, but current page doesn't validate
-// Would need to replace empty table (replaced by JS) with div, and assure htmlPageValidateOrAbort
-// is run on any page change.
-
-/* TODO: validate this is a trackDbHub spec */
-/* (e.g. scan tags for the hub version, perhaps limiting to first N tags) */
-
-/* Retrieve specs from file url. 
- * Settings are the first text word within any <code> tag having class="level-" attribute.
- * The level represents the level of support for the setting (e.g. core, full, deprecated)
- * The support level ('level-*') is the class value of the * <code> tag.
- * E.g.  <code class="level-core">boxedConfig on</code> produces:
- *      setting=boxedConfig, class=core */
-
-struct htmlTag *tag;
-struct htmlAttribute *attr;
-struct trackHubSetting *spec, *savedSpec;
-struct hash *specHash = hashNew(0);
-verbose(5, "Found %d tags\n", slCount(page->tags));
-char buf[256];
-for (tag = page->tags; tag != NULL; tag = tag->next)
-    {
-    verbose(6, "    TAG: %s\n", tag->name);
-    if (differentWord(tag->name, "code"))
-        continue;
-    attr = tag->attributes;
-    if (attr == NULL || differentString(attr->name, "class") || !startsWith("level-", attr->val))
-                        continue;
-    AllocVar(spec);
-    int len = min(tag->next->start - tag->end, sizeof buf - 1);
-    memcpy(buf, tag->end, len);
-    buf[len] = 0;
-    verbose(7, "Found spec: %s\n", buf);
-    spec->name = cloneString(firstWordInLine(buf));
-    spec->level = cloneString(chopPrefixAt(attr->val, '-'));
-    verbose(6, "spec: name=%s, level=%s\n", spec->name, spec->level);
-    savedSpec = (struct trackHubSetting *)hashFindVal(specHash, spec->name);
-    if (savedSpec != NULL)
-        verbose(6, "found spec %s level %s in hash\n", savedSpec->name, savedSpec->level);
-    if (savedSpec == NULL)
-        {
-        hashAdd(specHash, spec->name, spec);
-        verbose(6, "added spec %s at level %s\n", spec->name, spec->level);
-        }
-    else if (trackHubSettingLevelCmp(spec, savedSpec) > 0)
-        {
-        hashReplace(specHash, spec->name, spec);
-        verbose(6, "replaced spec %s at level %s, was %s\n", 
-            spec->name, spec->level, savedSpec->level);
-        }
-    }
-struct hashEl *el, *list = hashElListHash(specHash);
-
-int settingsCt = slCount(list);
-verbose(5, "Found %d settings's\n", slCount(list));
-if (settingsCt == 0)
-    errAbort("Can't find hub setting info at %s."
-              " Use -version to indicate a different version number or url.", specUrl);
-
-slSort(&list, hashElCmp);
-struct trackHubSetting *specs = NULL;
-int coreCt = 0;
-for (el = list; el != NULL; el = el->next)
-    {
-    if (sameString(((struct trackHubSetting *)el->val)->level, "core"))
-        coreCt++;
-    slAddHead(&specs, el->val);
-    }
-slReverse(&specs);
-verbose(3, "Found %d supported settings for this version (%d core)\n",
-                        slCount(specs), coreCt);
-return specs;
-}
-
-
-static int hubSettingsCheckInit(struct trackHub *hub,  struct trackHubCheckOptions *options, struct dyString *errors)
-{
-int retVal = 0;
-if (hub->version != NULL && options->version == NULL)
-    options->version = hub->version;
-else if (options->version == NULL)
-    options->version = trackHubVersionDefault();
-
-if (options->strict == FALSE && hub->level != NULL)
-    {
-    if (sameString(hub->level, "core"))
-        options->strict = TRUE;
-    else if (differentString(hub->level, "all"))
-        {
-        dyStringPrintf(errors, 
-            "Unknown hub support level: %s (expecting 'core' or 'all'). Defaulting to 'all'.\n", hub->level);
-        retVal = 1;
-        }
-    }
-verbose(2, "Checking hub '%s'%s\n", hub->longLabel, options->strict ? " for compliance to 'core' (use -settings to view)": "");
-
-struct errCatch *errCatch = errCatchNew();
-if (errCatchStart(errCatch))
-    {
-    /* make hash of settings for this version, saving in options */
-    struct trackHubSetting *setting, *settings = trackHubSettingsForVersion(options->version);
-    options->settings = newHash(0);
-    options->suggest = NULL;
-    for (setting = settings; setting != NULL; setting = setting->next)
-        {
-        hashAdd(options->settings, setting->name, setting);
-        slNameAddHead(&options->suggest, setting->name);
-        }
-    /* TODO: ? also need to check settings not in this list (other tdb fields) */
-
-    // TODO: move extra file handling here (out of hubCheck)
-    if (options->extra != NULL)
-        {
-        struct hashCookie cookie = hashFirst(options->extra);
-        struct hashEl *hel;
-        while ((hel = hashNext(&cookie)) != NULL)
-            slNameAddHead(&options->suggest, hel->name);
-        }
-    slNameSort(&options->suggest);
-    verbose(3, "Suggest list has %d settings\n", slCount(options->suggest));
-    }
-errCatchEnd(errCatch);
-if (errCatch->gotError)
-    {
-    retVal = 1;
-    dyStringPrintf(errors, "%s", errCatch->message->string);
-    }
-errCatchFree(&errCatch);
-return retVal;
-}
-
-
-int trackHubCheck(char *hubUrl, struct trackHubCheckOptions *options, struct dyString *errors)
-/* hubCheck - Check a track data hub for integrity. Put errors in dyString.
- *      return 0 if hub has no errors, 1 otherwise 
- *      if options->checkTracks is TRUE, check remote files of individual tracks
- */
-{
-struct errCatch *errCatch = errCatchNew();
-struct trackHub *hub = NULL;
-int retVal = 0;
-
-if (errCatchStart(errCatch))
-    {
-    hub = trackHubOpen(hubUrl, "hub_0");
-    }
-errCatchEnd(errCatch);
-if (errCatch->gotError)
-    {
-    retVal = 1;
-    dyStringPrintf(errors, "%s", errCatch->message->string);
-    }
-errCatchFree(&errCatch);
-
-if (hub == NULL)
-    return 1;
-
-if (options->checkSettings)
-    retVal |= hubSettingsCheckInit(hub, options, errors);
-
-struct trackHubGenome *genome;
-for (genome = hub->genomeList; genome != NULL; genome = genome->next)
-    {
-    retVal |= hubCheckGenome(hub, genome, options, errors);
-    }
-trackHubClose(&hub);
-return retVal;
-}