93d7e16befec162aaa7fefbce24137788b407b87 kate Mon May 11 12:10:34 2015 -0700 Cleanup hub check code and add new data structs to prep for hub versioning. refs #10015 diff --git src/hg/lib/trackHubCheck.c src/hg/lib/trackHubCheck.c index 07a8110..ec5c23a 100644 --- src/hg/lib/trackHubCheck.c +++ src/hg/lib/trackHubCheck.c @@ -1,66 +1,63 @@ #include "common.h" #include "dystring.h" #include "trackDb.h" #include "bigWig.h" #include "bigBed.h" #include "errCatch.h" #include "vcf.h" #include "hgBam.h" #include "net.h" #include "htmshell.h" +#include "htmlPage.h" #include "trackHub.h" #ifdef USE_HAL #include "halBlockViz.h" #endif -static int hubCheckTrack(struct trackHub *hub, struct trackHubGenome *genome, - struct trackDb *tdb, struct dyString *errors, FILE *searchFp) -/* Make sure that track is ok. */ +static int hubCheckTrackSettings(struct trackHub *hub, struct trackHubGenome *genome, + struct trackDb *tdb, struct trackHubCheckOptions *options, + struct dyString *errors) +/* Check trackDb settings are valid to spec */ { +//char *version = hashFindVal(hub->settings, "version"); +//char *level = hashFindVal(hub->settings, "level"); int retVal = 0; struct errCatch *errCatch = errCatchNew(); - if (errCatchStart(errCatch)) { -#ifdef NOTNOW // at the moment we're not getting text from the HTML - if (searchFp != NULL) - { - addOneDescription(genome->trackDbFile, tdb); - if (tdb->html != NULL) + } +if (errCatch->gotError) { - char *stripHtml =htmlTextStripTags(tdb->html); - strSwapChar(stripHtml, '\n', ' '); - strSwapChar(stripHtml, '\t', ' '); - strSwapChar(stripHtml, '\r', ' '); - strSwapChar(stripHtml, ')', ' '); - strSwapChar(stripHtml, '(', ' '); - strSwapChar(stripHtml, '[', ' '); - strSwapChar(stripHtml, ']', ' '); - fprintf(searchFp, "%s.%s\t%s\t%s\t%s\n",hub->url, tdb->track, - tdb->shortLabel, tdb->longLabel, stripHtml); + retVal = 1; + dyStringPrintf(errors, "%s", errCatch->message->string); } - else - fprintf(searchFp, "%s.%s\t%s\t%s\n",hub->url, tdb->track, - tdb->shortLabel, tdb->longLabel); +errCatchFree(&errCatch); +return retVal; } - else -#endif + +static int hubCheckTrackFile(struct trackHub *hub, struct trackHubGenome *genome, + struct trackDb *tdb, struct dyString *errors) +/* Make sure that track is ok. */ +{ +int retVal = 0; +struct errCatch *errCatch = errCatchNew(); +if (errCatchStart(errCatch)) + { { char *relativeUrl = trackDbSetting(tdb, "bigDataUrl"); - if (relativeUrl != NULL) { char *type = trackDbRequiredSetting(tdb, "type"); char *bigDataUrl = trackHubRelativeUrl(genome->trackDbFile, relativeUrl); verbose(2, "checking %s.%s type %s at %s\n", genome->name, tdb->track, type, bigDataUrl); if (startsWithWord("bigWig", type)) { /* Just open and close to verify file exists and is correct type. */ struct bbiFile *bbi = bigWigFileOpen(bigDataUrl); bbiFileClose(&bbi); } else if (startsWithWord("bigBed", type) || startsWithWord("bigGenePred", type)) { /* Just open and close to verify file exists and is correct type. */ struct bbiFile *bbi = bigBedFileOpen(bigDataUrl); @@ -105,111 +102,158 @@ freez(&bigDataUrl); } } } errCatchEnd(errCatch); if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); return retVal; } + static int hubCheckGenome(struct trackHub *hub, struct trackHubGenome *genome, - struct dyString *errors, boolean checkTracks, FILE *searchFp) + struct trackHubCheckOptions *options, struct dyString *errors) /* Check out genome within hub. */ { struct errCatch *errCatch = errCatchNew(); struct trackDb *tdbList = NULL; int retVal = 0; if (errCatchStart(errCatch)) { tdbList = trackHubTracksForGenome(hub, genome); trackHubPolishTrackNames(hub, tdbList); } errCatchEnd(errCatch); - if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); -if (!checkTracks) - return retVal; - struct trackDb *tdb; for (tdb = tdbList; tdb != NULL; tdb = tdb->next) - retVal |= hubCheckTrack(hub, genome, tdb, errors, searchFp); + { + retVal |= hubCheckTrackSettings(hub, genome, tdb, options, errors); + if (options->checkFiles) + retVal |= hubCheckTrackFile(hub, genome, tdb, errors); + } verbose(2, "%d tracks in %s\n", slCount(tdbList), genome->name); return retVal; } -int trackHubCheck(char *hubUrl, struct dyString *errors, - boolean checkTracks, FILE *searchFp) +char *trackHubVersionDefault() +/* Return current version of trackDb settings spec for hubs */ +{ +// TODO: get from goldenPath/help/trackDb + return "V1"; +} + +struct trackHubSetting *trackHubSettingsForVersion(char *version, char *specUrl) +/* Return list of settings with support level */ +{ +if (version == NULL) + version = trackHubVersionDefault(); +if (specUrl == NULL) + { + char url[256]; + safef(url, sizeof url, "http://genome.ucsc.edu/goldenPath/help/trackDbHub%s%s.html", + version ? "." : "", version ? version : ""); + specUrl = url; + } +struct htmlPage *page = htmlPageGet(specUrl); +if (page == NULL) + errAbort("Can't open trackDb settings spec %s\n", specUrl); +verbose(3, "Opened URL %s\n", specUrl); + +/* Retrieve specs from file url. + * Settings are the first text word within a <code> element nested in * a <div> having + * attribute class="format". The support level ('level-*') is the class value of the * <code> tag. + * E.g. <div class="format"><code class="level-core">boxedConfig on</code></div> produces: + * setting=boxedConfig, class=core */ + +struct htmlTag *tag, *codeTag; +struct htmlAttribute *attr, *codeAttr; +struct trackHubSetting *spec, *specs = NULL; +verbose(3, "Found %d tags\n", slCount(page->tags)); +int divCount = 0; +char buf[256]; +for (tag = page->tags; tag != NULL; tag = tag->next) + { + if (differentWord(tag->name, "DIV")) + continue; + divCount++; + verbose(5, "<div>%s\n", tag->start); + for (attr = tag->attributes; attr != NULL; attr = attr->next) + { + if (differentWord(attr->name, "class") || differentWord(attr->val, "format")) + continue; + // TODO: Look on code tags (there may be multiple after "format" + codeTag = tag->next; + verbose(5, "Found format: tag %s\n", tag->name); + if (differentWord(codeTag->name, "CODE")) + break; + verbose(5, "Found <code>\n"); + for (codeAttr = codeTag->attributes; codeAttr != NULL; codeAttr = codeAttr->next) + { + verbose(5, "attr: name=%s, val=%s\n", codeAttr->name, codeAttr->val); + if (differentWord(codeAttr->name, "class") || !startsWith("level-", codeAttr->val)) + break; + AllocVar(spec); + int len = min(codeTag->next->start - codeTag->end, sizeof buf - 1); + memcpy(buf, codeTag->end, len); + buf[len] = 0; + spec->name = cloneString(firstWordInLine(buf)); + spec->level = chopPrefixAt(cloneString(codeAttr->val), '-'); + // TODO: hash to pickup dupes (retain one with level, warn if multiple differ) + slAddHead(&specs, spec); + verbose(5, "spec: name=%s, level=%s\n", spec->name, spec->level); + } + } + } +verbose(5, "Found %d <div>'s\n", divCount); +return specs; +} + +int trackHubCheck(char *hubUrl, struct trackHubCheckOptions *options, struct dyString *errors) /* hubCheck - Check a track data hub for integrity. Put errors in dyString. * return 0 if hub has no errors, 1 otherwise * if checkTracks is TRUE, individual tracks are checked */ { struct errCatch *errCatch = errCatchNew(); struct trackHub *hub = NULL; int retVal = 0; if (errCatchStart(errCatch)) hub = trackHubOpen(hubUrl, "hub_0"); errCatchEnd(errCatch); if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); if (hub == NULL) return 1; verbose(2, "hub %s\nshortLabel %s\nlongLabel %s\n", hubUrl, hub->shortLabel, hub->longLabel); verbose(2, "%s has %d elements\n", hub->genomesFile, slCount(hub->genomeList)); -if (searchFp != NULL) - { - struct trackHubGenome *genomeList = hub->genomeList; - - for(; genomeList ; genomeList = genomeList->next) - fprintf(searchFp, "%s\t%s\n",hub->url, trackHubSkipHubName(genomeList->name)); - fprintf(searchFp, "%s\t%s\t%s\n",hub->url, hub->shortLabel, hub->longLabel); - - if (hub->descriptionUrl != NULL) - { - char *html = netReadTextFileIfExists(hub->descriptionUrl); - char *stripHtml =htmlTextStripTags(html); - strSwapChar(stripHtml, '\n', ' '); - strSwapChar(stripHtml, '\t', ' '); - strSwapChar(stripHtml, '\015', ' '); - strSwapChar(stripHtml, ')', ' '); - strSwapChar(stripHtml, '(', ' '); - strSwapChar(stripHtml, '[', ' '); - strSwapChar(stripHtml, ']', ' '); - fprintf(searchFp, "%s\t%s\n",hub->url, stripHtml); - } - - return 0; - } - struct trackHubGenome *genome; for (genome = hub->genomeList; genome != NULL; genome = genome->next) { - retVal |= hubCheckGenome(hub, genome, errors, checkTracks, NULL); + retVal |= hubCheckGenome(hub, genome, options, errors); } trackHubClose(&hub); - return retVal; }