a678ada6c54aca4486bef3da5a363ab614e8c733 kate Wed May 13 09:01:24 2015 -0700 Add version check features to hubCheck (-version, -core, -extra, -settings). Initial coding, not yet functional. Also added levels (for testing only) to settings spec. refs #10015 diff --git src/hg/lib/trackHubCheck.c src/hg/lib/trackHubCheck.c index ec5c23a..6fca442 100644 --- src/hg/lib/trackHubCheck.c +++ src/hg/lib/trackHubCheck.c @@ -3,59 +3,69 @@ #include "trackDb.h" #include "bigWig.h" #include "bigBed.h" #include "errCatch.h" #include "vcf.h" #include "hgBam.h" #include "net.h" #include "htmshell.h" #include "htmlPage.h" #include "trackHub.h" #ifdef USE_HAL #include "halBlockViz.h" #endif -static int hubCheckTrackSettings(struct trackHub *hub, struct trackHubGenome *genome, - struct trackDb *tdb, struct trackHubCheckOptions *options, - struct dyString *errors) -/* Check trackDb settings are valid to spec */ +static int hubCheckTrackSetting(struct trackHub *hub, struct trackDb *tdb, char *setting, + struct trackHubCheckOptions *options, struct dyString *errors) +/* Check trackDb setting is by spec (by version and level). Returns non-zero if error, msg in errors */ { -//char *version = hashFindVal(hub->settings, "version"); -//char *level = hashFindVal(hub->settings, "level"); -int retVal = 0; -struct errCatch *errCatch = errCatchNew(); -if (errCatchStart(errCatch)) +struct trackHubSetting *hubSetting = hashFindVal(options->settings, setting); +/* check setting exists in this version */ +if (hubSetting == NULL) { + if (options->extra == NULL) + { + dyStringPrintf(errors, "Unknown/unsupported trackDb setting '%s' in hub version '%s'", + setting, options->version); + return 1; } -if (errCatch->gotError) + if (hashFindVal(options->extra, setting) == NULL) { - retVal = 1; - dyStringPrintf(errors, "%s", errCatch->message->string); + dyStringPrintf(errors, + "Unknown/unsupported trackDb setting '%s' in hub version '%s' with extras file/url '%s'", + setting, options->version, options->extraFile); + return 1; } -errCatchFree(&errCatch); -return retVal; } +// check level +if (!options->strict) + return 0; -static int hubCheckTrackFile(struct trackHub *hub, struct trackHubGenome *genome, - struct trackDb *tdb, struct dyString *errors) -/* Make sure that track is ok. */ -{ -int retVal = 0; -struct errCatch *errCatch = errCatchNew(); -if (errCatchStart(errCatch)) +if (differentString(hubSetting->level, "core")) { + dyStringPrintf(errors, + "Setting '%s' is level '%s' in version '%s' (not 'core')", + setting, hubSetting->level, options->version); + return 1; + } +return 0; +} + + +static void hubCheckTrackFile(struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdb) +/* Check remote file exists and is of correct type. Wrap this in error catcher */ { char *relativeUrl = trackDbSetting(tdb, "bigDataUrl"); if (relativeUrl != NULL) { char *type = trackDbRequiredSetting(tdb, "type"); char *bigDataUrl = trackHubRelativeUrl(genome->trackDbFile, relativeUrl); verbose(2, "checking %s.%s type %s at %s\n", genome->name, tdb->track, type, bigDataUrl); if (startsWithWord("bigWig", type)) { /* Just open and close to verify file exists and is correct type. */ struct bbiFile *bbi = bigWigFileOpen(bigDataUrl); bbiFileClose(&bbi); } else if (startsWithWord("bigBed", type) || startsWithWord("bigGenePred", type)) { @@ -90,32 +100,57 @@ else if (startsWithWord("halSnake", type)) { char *errString; int handle = halOpenLOD(bigDataUrl, &errString); if (handle < 0) errAbort("HAL open error: %s\n", errString); if (halClose(handle, &errString) < 0) errAbort("HAL close error: %s\n", errString); } #endif else errAbort("unrecognized type %s in genome %s track %s", type, genome->name, tdb->track); freez(&bigDataUrl); } } + + +static int hubCheckTrack(struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdb, + struct trackHubCheckOptions *options, struct dyString *errors) +/* Check track settings and optionally, files */ +{ +int retVal = 0; + +if (options->settings) + { + struct slPair *settings = slPairListFromString(tdb->settings, FALSE); + struct slPair *setting; + for (setting = settings; setting != NULL; setting = setting->next) + { + retVal |= hubCheckTrackSetting(hub, tdb, setting->name, options, errors); + } + /* NOTE: also need to check settings not in this list (other tdb fields) */ + } + +if (!options->checkFiles) + return retVal; + +struct errCatch *errCatch = errCatchNew(); +if (errCatchStart(errCatch)) + { + hubCheckTrackFile(hub, genome, tdb); } -errCatchEnd(errCatch); if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); return retVal; } static int hubCheckGenome(struct trackHub *hub, struct trackHubGenome *genome, struct trackHubCheckOptions *options, struct dyString *errors) /* Check out genome within hub. */ { @@ -127,133 +162,168 @@ { tdbList = trackHubTracksForGenome(hub, genome); trackHubPolishTrackNames(hub, tdbList); } errCatchEnd(errCatch); if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); struct trackDb *tdb; for (tdb = tdbList; tdb != NULL; tdb = tdb->next) { - retVal |= hubCheckTrackSettings(hub, genome, tdb, options, errors); - if (options->checkFiles) - retVal |= hubCheckTrackFile(hub, genome, tdb, errors); + retVal |= hubCheckTrack(hub, genome, tdb, options, errors); } verbose(2, "%d tracks in %s\n", slCount(tdbList), genome->name); return retVal; } + char *trackHubVersionDefault() /* Return current version of trackDb settings spec for hubs */ { -// TODO: get from goldenPath/help/trackDb - return "V1"; +// TODO: get from goldenPath/help/trackDb/trackDbHub.current.html + return "v1"; // minor rev to v1a, etc. } -struct trackHubSetting *trackHubSettingsForVersion(char *version, char *specUrl) -/* Return list of settings with support level */ + +struct trackHubSetting *trackHubSettingsForVersion(char *version) +/* Return list of settings with support level. Version can be version string or spec url */ { if (version == NULL) version = trackHubVersionDefault(); -if (specUrl == NULL) +char *specUrl; +if (startsWith("http", version)) + specUrl = version; +else { - char url[256]; - safef(url, sizeof url, "http://genome.ucsc.edu/goldenPath/help/trackDbHub%s%s.html", - version ? "." : "", version ? version : ""); - specUrl = url; + char buf[256]; + safef(buf, sizeof buf, "http://genome.ucsc.edu/goldenPath/help/trackDb/trackDbHub.%s.html", + version); + specUrl = buf; } struct htmlPage *page = htmlPageGet(specUrl); if (page == NULL) errAbort("Can't open trackDb settings spec %s\n", specUrl); verbose(3, "Opened URL %s\n", specUrl); /* Retrieve specs from file url. * Settings are the first text word within a <code> element nested in * a <div> having * attribute class="format". The support level ('level-*') is the class value of the * <code> tag. * E.g. <div class="format"><code class="level-core">boxedConfig on</code></div> produces: * setting=boxedConfig, class=core */ struct htmlTag *tag, *codeTag; struct htmlAttribute *attr, *codeAttr; struct trackHubSetting *spec, *specs = NULL; verbose(3, "Found %d tags\n", slCount(page->tags)); int divCount = 0; char buf[256]; for (tag = page->tags; tag != NULL; tag = tag->next) { - if (differentWord(tag->name, "DIV")) + if (differentWord(tag->name, "div")) continue; divCount++; verbose(5, "<div>%s\n", tag->start); for (attr = tag->attributes; attr != NULL; attr = attr->next) { if (differentWord(attr->name, "class") || differentWord(attr->val, "format")) continue; // TODO: Look on code tags (there may be multiple after "format" codeTag = tag->next; verbose(5, "Found format: tag %s\n", tag->name); if (differentWord(codeTag->name, "CODE")) break; - verbose(5, "Found <code>\n"); + verbose(4, "Found <code>\n"); for (codeAttr = codeTag->attributes; codeAttr != NULL; codeAttr = codeAttr->next) { verbose(5, "attr: name=%s, val=%s\n", codeAttr->name, codeAttr->val); if (differentWord(codeAttr->name, "class") || !startsWith("level-", codeAttr->val)) break; AllocVar(spec); int len = min(codeTag->next->start - codeTag->end, sizeof buf - 1); memcpy(buf, codeTag->end, len); buf[len] = 0; spec->name = cloneString(firstWordInLine(buf)); spec->level = chopPrefixAt(cloneString(codeAttr->val), '-'); // TODO: hash to pickup dupes (retain one with level, warn if multiple differ) slAddHead(&specs, spec); - verbose(5, "spec: name=%s, level=%s\n", spec->name, spec->level); + verbose(4, "spec: name=%s, level=%s\n", spec->name, spec->level); } } } verbose(5, "Found %d <div>'s\n", divCount); return specs; } + int trackHubCheck(char *hubUrl, struct trackHubCheckOptions *options, struct dyString *errors) /* hubCheck - Check a track data hub for integrity. Put errors in dyString. * return 0 if hub has no errors, 1 otherwise - * if checkTracks is TRUE, individual tracks are checked + * if options->checkTracks is TRUE, check remote files of individual tracks */ - { struct errCatch *errCatch = errCatchNew(); struct trackHub *hub = NULL; int retVal = 0; if (errCatchStart(errCatch)) + { hub = trackHubOpen(hubUrl, "hub_0"); + } errCatchEnd(errCatch); - if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); if (hub == NULL) return 1; verbose(2, "hub %s\nshortLabel %s\nlongLabel %s\n", hubUrl, hub->shortLabel, hub->longLabel); verbose(2, "%s has %d elements\n", hub->genomesFile, slCount(hub->genomeList)); +if (hub->version != NULL) + options->version = hub->version; +else if (options->version == NULL) + options->version = trackHubVersionDefault(); + +options->strict = FALSE; +if (hub->level != NULL) + { + if (sameString(hub->level, "core") || sameString(hub->level, "strict")) + options->strict = TRUE; + else if (differentString(hub->level, "all")) + { + dyStringPrintf(errors, + "Unknown hub support level: %s (expecting 'core' or 'all'). Defaulting to 'all'.", hub->level); + retVal = 1; + } + } +struct trackHubSetting *settings = NULL; +errCatch = errCatchNew(); +if (errCatchStart(errCatch)) + { + settings = trackHubSettingsForVersion(options->version); + } +errCatchEnd(errCatch); +if (errCatch->gotError) + { + retVal = 1; + dyStringPrintf(errors, "%s", errCatch->message->string); + } +errCatchFree(&errCatch); + struct trackHubGenome *genome; for (genome = hub->genomeList; genome != NULL; genome = genome->next) { retVal |= hubCheckGenome(hub, genome, options, errors); } trackHubClose(&hub); return retVal; }