e112c27b7a39c6fd9def5b24ed5015087d057778 galt Mon Nov 3 16:27:05 2025 -0800 oops, I accidentally checked in these files, must have missed a -u with git add command so it added everything in the directory. no redmine. diff --git src/hg/utils/hubCheck/hubCheck.c.debugBadFEFFCharsInHubtxtDropBox src/hg/utils/hubCheck/hubCheck.c.debugBadFEFFCharsInHubtxtDropBox deleted file mode 100644 index 8fd10d08a72..00000000000 --- src/hg/utils/hubCheck/hubCheck.c.debugBadFEFFCharsInHubtxtDropBox +++ /dev/null @@ -1,1326 +0,0 @@ -/* Copyright (C) 2014 The Regents of the University of California - * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ - -#include "axt.h" -#include "common.h" -#include "bigWig.h" -#include "bigBed.h" -#include "dystring.h" -#include "errCatch.h" -#include "hgBam.h" -#include "htmshell.h" -#include "htmlPage.h" -#include "hui.h" -#include "net.h" -#include "options.h" -#include "trackDb.h" -#include "trackHub.h" -#include "udc.h" -#include "vcf.h" -#include "bedTabix.h" -#include "knetUdc.h" - -#ifdef USE_HAL -#include "halBlockViz.h" -#endif - -static int cacheTime = 1; - -void usage() -/* Explain usage and exit. */ -{ -errAbort( - "hubCheck - Check a track data hub for integrity.\n" - "usage:\n" - " hubCheck http://yourHost/yourDir/hub.txt\n" - "options:\n" - " -noTracks - don't check remote files for tracks, just trackDb (faster)\n" - " -checkSettings - check trackDb settings to spec\n" - " -version=[v?|url] - version to validate settings against\n" - " (defaults to version in hub.txt, or current standard)\n" - " -extra=[file|url] - accept settings in this file (or url)\n" - " -level=base|required - reject settings below this support level\n" - " -settings - just list settings with support level\n" - " -genome=genome - only check this genome\n" - " -udcDir=/dir/to/cache - place to put cache for remote bigBed/bigWigs.\n" - " Will create this directory if not existing\n" - " -httpsCertCheck=[abort,warn,log,none] - set the ssl certificate verification mode.\n" - " -httpsCertCheckDomainExceptions= - space separated list of domains to whitelist.\n" - " -printMeta - print the metadata for each track\n" - " -cacheTime=N - set cache refresh time in seconds, default %d\n" - " -verbose=2 - output verbosely\n" - , cacheTime - ); -} - -static struct optionSpec options[] = { - {"version", OPTION_STRING}, - {"level", OPTION_STRING}, - {"extra", OPTION_STRING}, - {"noTracks", OPTION_BOOLEAN}, - {"settings", OPTION_BOOLEAN}, - {"checkSettings", OPTION_BOOLEAN}, - {"genome", OPTION_STRING}, - {"test", OPTION_BOOLEAN}, - {"printMeta", OPTION_BOOLEAN}, - {"udcDir", OPTION_STRING}, - {"httpsCertCheck", OPTION_STRING}, - {"httpsCertCheckDomainExceptions", OPTION_STRING}, - {"specHost", OPTION_STRING}, - {"cacheTime", OPTION_INT}, - // intentionally undocumented option for hgHubConnect - {"htmlOut", OPTION_BOOLEAN}, - {NULL, 0}, -}; - -struct trackHubCheckOptions -/* How to check track hub */ - { - boolean checkFiles; /* check remote files exist and are correct type */ - boolean checkSettings; /* check trackDb settings to spec */ - boolean printMeta; /* print out the metadata for each track */ - char *version; /* hub spec version to check */ - char *specHost; /* server hosting hub spec */ - char *level; /* check hub is valid to this support level */ - char *extraFile; /* name of extra file/url with additional settings to accept */ - char *genome; /* only check this genome */ - /* intermediate data */ - struct hash *settings; /* supported settings for this version */ - struct hash *extra; /* additional trackDb settings to accept */ - struct slName *suggest; /* list of supported settings for suggesting */ - /* hgHubConnect only */ - boolean htmlOut; /* put special formatted text into the errors dyString */ - }; - -struct trackHubSettingSpec -/* Setting name and support level, from trackDbHub.html (the spec) */ - { - struct trackHubSettingSpec *next; - char *name; /* setting name */ - char *level; /* support level (required, base, full, new, deprecated) */ - }; - - -/* Mini English spell-check using axt sequence alignment code! From JK - * Works in this context when thresholded high. */ - -static struct axtScoreScheme *scoreSchemeEnglish() -/* Return something that will match just English words more or less. */ -{ -struct axtScoreScheme *ss; -AllocVar(ss); -ss->gapOpen = 4; -ss->gapExtend = 2; - -/* Set up diagonal to match */ -int i; -for (i=0; i<256; ++i) - ss->matrix[i][i] = 2; - -/* Set up upper and lower case to match mostly */ -int caseDiff = 'A' - 'a'; -for (i='a'; i<='z'; ++i) - { - ss->matrix[i][i+caseDiff] = 1; - ss->matrix[i+caseDiff][i] = 1; - } -return ss; -} - - -static int scoreWordMatch(char *a, char *b, struct axtScoreScheme *ss) -/* Return alignment score of two words */ -{ -struct dnaSeq aSeq = { .name = "a", .dna = a, .size = strlen(a)}; -struct dnaSeq bSeq = { .name = "b", .dna = b, .size = strlen(b)}; -struct axt *axt = axtAffine(&aSeq, &bSeq, ss); -int result = 0; -if (axt != NULL) - { - result = axt->score; - axtFree(&axt); - } -return result; -} - - -static char *suggestSetting(char *setting, struct trackHubCheckOptions *options) -/* Suggest a similar word from settings lists. Suggest only if there is a single good match */ -{ -char *best = NULL; -int bestScore = 0; -int bestCount = 0; -struct slName *suggest; - -struct axtScoreScheme *ss = scoreSchemeEnglish(); -for (suggest = options->suggest; suggest != NULL; suggest = suggest->next) - { - int score = scoreWordMatch(setting, suggest->name, ss); - if (score < bestScore) - continue; - if (score > bestScore) - { - best = suggest->name; - bestScore = score; - bestCount = 1; - } - else - { - // same score - bestCount++; - } - } -if (bestCount == 1 && bestScore > 15) - { - verbose(3, "suggest %s score: %d\n", best, bestScore); - return best; - } -return NULL; -} - -struct htmlPage *trackHubVersionSpecMustGet(char *specHost, char *version) -/* Open the trackDbHub.html file and attach html reader. Use default version if NULL */ -{ -char *specUrl; -char buf[256]; -if (version != NULL && startsWith("http", version)) - specUrl = version; -else - { - safef(buf, sizeof buf, "http://%s/goldenPath/help/trackDb/trackDbHub%s%s.html", - specHost, version ? "." : "", version ? version: ""); - specUrl = buf; - } -verbose(2, "Using spec at %s\n", specUrl); -struct htmlPage *page = htmlPageGet(specUrl); -if (page == NULL) - errAbort("Can't open hub settings spec %s", specUrl); - -//TODO: apply page validator -//htmlPageValidateOrAbort(page); // would like to use this, but current page doesn't validate -// Would need to replace empty table (replaced by JS) with div, and assure htmlPageValidateOrAbort -// is run on any page change. - -/* TODO: validate this is a trackDbHub spec */ -/* (e.g. scan tags for the hub version, perhaps limiting to first N tags) */ -return page; -} - - -char *trackHubVersionDefault(char *specHost, struct htmlPage **pageRet) -/* Return default version of hub spec by parsing for version id in the page */ -{ -struct htmlPage *page = trackHubVersionSpecMustGet(specHost, NULL); -struct htmlTag *tag; - -/* Find version tag (span id=) */ -char buf[256]; -verbose(6, "Found %d tags\n", slCount(page->tags)); -for (tag = page->tags; tag != NULL; tag = tag->next) - { - if (sameString(strLower(tag->name), "span") && - tag->attributes != NULL && - sameString(strLower(tag->attributes->name), "id") && - sameString(tag->attributes->val, "trackDbHub_version")) - { - int len = min(tag->next->start - tag->end, sizeof buf - 1); - memcpy(buf, tag->end, len); - buf[len] = 0; - verbose(6, "Found version: %s\n", buf); - return cloneString(buf); - } - } -return NULL; -} - - -int trackHubSettingLevel(struct trackHubSettingSpec *spec) -/* Get integer for level (required > base > full > new > deprecated). -1 for unknown */ -{ -if (sameString(spec->level, "required")) - return 5; -if (sameString(spec->level, "base")) - return 4; -if (sameString(spec->level, "full")) - return 3; -if (sameString(spec->level, "new")) - return 2; -if (sameString(spec->level, "deprecated")) - return 1; -if (sameString(spec->level, "all")) //used only as check option - return 0; -return -1; // unknown -} - - -boolean trackHubSettingLevelCmp(struct trackHubSettingSpec *spec1, struct trackHubSettingSpec *spec2) -{ -/* Compare setting levels */ -return trackHubSettingLevel(spec1) - trackHubSettingLevel(spec2); -} - - -struct trackHubSettingSpec *trackHubSettingsForVersion(char *specHost, char *version) -/* Return list of settings with support level. Version can be version string or spec url */ -{ -struct htmlPage *page = NULL; -if (version == NULL) - { - version = trackHubVersionDefault(specHost, &page); - if (version == NULL) - errAbort("Can't get default spec from host %s", specHost); - } - -/* Retrieve specs from file url. - * Settings are the first text word within any <code> tag having class="level-" attribute. - * The level represents the level of support for the setting (e.g. base, full, deprecated) - * The support level ('level-*') is the class value of the * <code> tag. - * E.g. <code class="level-full">boxedConfig on</code> produces: - * setting=boxedConfig, class=full */ - -if (page == NULL) - page = trackHubVersionSpecMustGet(specHost, version); -if (page == NULL) - errAbort("Can't get settings spec for version %s from host %s", version, specHost); -verbose(5, "Found %d tags\n", slCount(page->tags)); - -struct trackHubSettingSpec *spec, *savedSpec; -struct hash *specHash = hashNew(0); -struct htmlTag *tag; -struct htmlAttribute *attr; -char buf[256]; -for (tag = page->tags; tag != NULL; tag = tag->next) - { - if (differentWord(tag->name, "code")) - continue; - attr = tag->attributes; - if (attr == NULL || differentString(attr->name, "class") || !startsWith("level-", attr->val)) - continue; - AllocVar(spec); - int len = min(tag->next->start - tag->end, sizeof buf - 1); - memcpy(buf, tag->end, len); - buf[len] = 0; - verbose(6, "Found spec: %s\n", buf); - spec->name = cloneString(firstWordInLine(buf)); - if (spec->name == NULL || strlen(spec->name) == 0) - { - warn("ERROR: format problem with trackDbHub.html -- contact UCSC."); - continue; - } - spec->level = cloneString(chopPrefixAt(attr->val, '-')); - verbose(6, "spec: name=%s, level=%s\n", spec->name, spec->level); - savedSpec = (struct trackHubSettingSpec *)hashFindVal(specHash, spec->name); - if (savedSpec != NULL) - verbose(6, "found spec %s level %s in hash\n", savedSpec->name, savedSpec->level); - if (savedSpec == NULL) - { - hashAdd(specHash, spec->name, spec); - verbose(6, "added spec %s at level %s\n", spec->name, spec->level); - } - else if (trackHubSettingLevelCmp(spec, savedSpec) > 0) - { - hashReplace(specHash, spec->name, spec); - verbose(6, "replaced spec %s at level %s, was %s\n", - spec->name, spec->level, savedSpec->level); - } - } -struct hashEl *el, *list = hashElListHash(specHash); - -int settingsCt = slCount(list); -verbose(5, "Found %d settings's\n", slCount(list)); -if (settingsCt == 0) - errAbort("Can't find hub setting info for version %s (host %s)." - " Use -version to indicate a different version number or url.", version, specHost); - -slSort(&list, hashElCmp); -struct trackHubSettingSpec *specs = NULL; -int baseCt = 0; -int requiredCt = 0; -int deprecatedCt = 0; -for (el = list; el != NULL; el = el->next) - { - if (sameString(((struct trackHubSettingSpec *)el->val)->level, "base")) - baseCt++; - else if (sameString(((struct trackHubSettingSpec *)el->val)->level, "required")) - requiredCt++; - else if (sameString(((struct trackHubSettingSpec *)el->val)->level, "deprecated")) - deprecatedCt++; - slAddHead(&specs, el->val); - } -slReverse(&specs); -verbose(3, - "Found %d supported settings for this version (%d required, %d base, %d deprecated)\n", - slCount(specs), requiredCt, baseCt, deprecatedCt); -return specs; -} - - -int hubSettingsCheckInit(struct trackHub *hub, struct trackHubCheckOptions *options, - struct dyString *errors) -{ -int retVal = 0; - -if (hub->version != NULL && options->version == NULL) - options->version = hub->version; - -struct trackHubSettingSpec *hubLevel = NULL; -int level = 0; -if (hub->version != NULL) - { - AllocVar(hubLevel); - if ((level = trackHubSettingLevel(hubLevel)) < 0) - { - dyStringPrintf(errors, "Unknown hub support level: %s. Defaulting to 'all'.\n", - hub->level); - retVal = 1; - } - else - options->level = hub->level; - } -verbose(2, "Checking hub '%s'", hub->longLabel); -if (options->level) - verbose(2, " for compliance to level '%s' (use -settings to view)", options->level); -verbose(2, "\n"); - -struct errCatch *errCatch = errCatchNew(); -if (errCatchStart(errCatch)) - { - /* make hash of settings for this version, saving in options */ - struct trackHubSettingSpec *setting, *settings = - trackHubSettingsForVersion(options->specHost, options->version); - options->settings = newHash(0); - options->suggest = NULL; - for (setting = settings; setting != NULL; setting = setting->next) - { - hashAdd(options->settings, setting->name, setting); - slNameAddHead(&options->suggest, setting->name); - } - /* TODO: ? also need to check settings not in this list (other tdb fields) */ - - // TODO: move extra file handling here (out of hubCheck) - if (options->extra != NULL) - { - struct hashCookie cookie = hashFirst(options->extra); - struct hashEl *hel; - while ((hel = hashNext(&cookie)) != NULL) - slNameAddHead(&options->suggest, hel->name); - } - slNameSort(&options->suggest); - verbose(3, "Suggest list has %d settings\n", slCount(options->suggest)); - } -errCatchEnd(errCatch); -if (errCatch->gotError) - { - retVal = 1; - dyStringPrintf(errors, "%s", errCatch->message->string); - } -errCatchFree(&errCatch); -return retVal; -} - - -int hubCheckTrackSetting(struct trackHub *hub, struct trackDb *tdb, char *setting, - struct trackHubCheckOptions *options, struct dyString *errors) -/* Check trackDb setting to spec (by version and level). Returns non-zero if error, msg in errors */ -{ -int retVal = 0; - -verbose(4, " Check setting '%s'\n", setting); -/* skip internally added/used settings */ -if (sameString(setting, "polished") || sameString(setting, "group")) - return 0; - -/* check setting is in extra file of supported settings */ -if (options->extra && hashLookup(options->extra, setting)) - return 0; - -/* check setting is supported in this version */ -struct trackHubSettingSpec *hubSetting = hashFindVal(options->settings, setting); -if (hubSetting == NULL) - { - dyStringPrintf(errors, "Setting '%s' is unknown/unsupported", setting); - char *suggest = suggestSetting(setting, options); - if (suggest != NULL) - dyStringPrintf(errors, " (did you mean '%s' ?)", suggest); - dyStringPrintf(errors, "\n"); - retVal = 1; - } -else if (sameString(hubSetting->level, "deprecated")) - { - dyStringPrintf(errors, "Setting '%s' is deprecated\n", setting); - retVal = 1; - } -else - { - /* check level */ - struct trackHubSettingSpec *checkLevel = NULL; - AllocVar(checkLevel); - checkLevel->level = options->level; - if (trackHubSettingLevel(hubSetting) < trackHubSettingLevel(checkLevel)) - { - dyStringPrintf(errors, "Setting '%s' is level '%s'\n", setting, hubSetting->level); - retVal = 1; - } - freez(&checkLevel); - } -return retVal; -} - -boolean extFileExists(char *path) -/* Check that a remote URL actually exists, path may be a URL or a local path relative to hub.txt */ -{ -// if the file is local check that it exists: -if (!hasProtocol(path) && udcExists(path)) - return TRUE; -else - { - // netLineFileSilentOpen will handle 301 redirects and the like - if (netLineFileSilentOpen(path) != NULL) - return TRUE; - } -return FALSE; -} - -char *makeFolderObjectString(char *id, char *text, char *parent, char *title, boolean children, boolean openFolder) -/* Construct a folder item for one of the jstree arrays */ -{ -struct dyString *folderString = dyStringNew(0); -dyStringPrintf(folderString, "{icon: '../../images/folderC.png', id: '%s', " - "text:'%s', parent:'%s'," - "li_attr:{title:'%s'}, children:%s, state: {opened: %s}}", - htmlEncode(id), htmlEncode(text), htmlEncode(parent), title, children ? "true" : "false", openFolder ? "true" : "false"); -return dyStringCannibalize(&folderString); -} - -char *makeChildObjectString(char *id, char *title, char *shortLabel, char *longLabel, - char *color, char *name, char *text, char *parent) -/* Construct a single child item for one of the jstree arrays */ -{ -struct dyString *item = dyStringNew(0); -dyStringPrintf(item, "{icon: 'fa fa-plus', id:'%s', li_attr:{class: 'hubError', title: '%s', " - "shortLabel: '%s', longLabel: '%s', color: '%s', name:'%s'}, " - "text:'%s', parent: '%s', state: {opened: true}}", - htmlEncode(id), title, htmlEncode(shortLabel), htmlEncode(longLabel), color, name, htmlEncode(text), htmlEncode(parent)); -return dyStringCannibalize(&item); -} - -void hubErr(struct dyString *errors, char *message, struct trackHub *hub, boolean doHtml) -/* Construct the right javascript for the jstree for a top level hub.txt error. */ -{ -if (!doHtml) - dyStringPrintf(errors, "%s", message); -else - { - char *sl; - char *strippedMessage = NULL; - static int count = 0; // force a unique id for the jstree object - char id[512]; - //TODO: Choose better default labels - if (hub && hub->shortLabel != NULL) - { - sl = hub->shortLabel; - } - else - sl = "Hub Error"; - if (message) - strippedMessage = cloneString(message); - stripChar(strippedMessage, '\n'); - safef(id, sizeof(id), "%s%d", sl, count); - - // make the error message - dyStringPrintf(errors, "trackData['%s'] = [%s];\n", htmlEncode(sl), - makeChildObjectString(id, "Hub Error", htmlEncode(sl), htmlEncode(sl), "#550073", htmlEncode(sl), strippedMessage, sl)); - - count++; - } -} - -void genomeErr(struct dyString *errors, char *message, struct trackHub *hub, - struct trackHubGenome *genome, boolean doHtml) -/* Construct the right javascript for the jstree for a top-level genomes.txt error or - * error opening a trackDb.txt file */ -{ -if (!doHtml) - dyStringPrintf(errors, "%s", message); -else - { - static int count = 0; // forces unique ID's which the jstree object needs - char id[512]; - char *errorMessages[16]; - char *strippedMessage = NULL; - char *genomeName = trackHubSkipHubName(genome->name); - if (message) - strippedMessage = cloneString(message); - // multiple errors may be in a single message, chop by newline and make a node in the tree for each message - int numMessages = chopByChar(strippedMessage, '\n', errorMessages, sizeof(errorMessages)); - int i = 0; - dyStringPrintf(errors, "trackData['%s'] = [", genomeName); - for (; i < numMessages && isNotEmpty(errorMessages[i]); i++) - { - safef(id, sizeof(id), "%s%d", genomeName, count); - dyStringPrintf(errors, "%s,", makeChildObjectString(id, "Genome Error", genomeName, genomeName, "#550073", genomeName, errorMessages[i], genomeName)); - count++; - } - } -} - -void trackDbErr(struct dyString *errors, char *message, struct trackHubGenome *genome, struct trackDb *tdb, boolean doHtml) -/* Adds the right object for a jstree object of trackDb configuration errors. */ -{ -if (!doHtml) - { - dyStringPrintf(errors, "%s", message); - } -else - { - char *strippedMessage = NULL; - char *splitMessages[16]; // SUBGROUP_MAX=9 but add a few extra just in case - char parentOrTrackString[512]; - char id[512]; - static int count = 0; // forces unique ID's which the jstree object needs - int numMessages = 0; - int i = 0; - - // if a subtrack is missing multiple subgroups, then message will contain - // at least two newline separated errors, both should be printed separately: - if (message) - { - strippedMessage = cloneString(message); - while (lastChar(strippedMessage) == '\n') - trimLastChar(strippedMessage); - numMessages = chopByChar(strippedMessage, '\n', splitMessages, sizeof(splitMessages)); - } - - for (; i < numMessages; i++) - { - if (isNotEmpty(splitMessages[i])) - { - safef(id, sizeof(id), "%s%d", trackHubSkipHubName(tdb->track), count); - safef(parentOrTrackString, sizeof(parentOrTrackString), "%s_%s", trackHubSkipHubName(genome->name), trackHubSkipHubName(tdb->track)); - dyStringPrintf(errors, "%s,", - makeChildObjectString(id, "TrackDb Error", tdb->shortLabel, tdb->longLabel, - "#550073", trackHubSkipHubName(tdb->track), splitMessages[i], parentOrTrackString)); - count++; - } - } - } -} - -boolean checkEmptyMembersForAll(membersForAll_t *membersForAll, struct trackDb *parentTdb) -/* membersForAll may be allocated and exist but not have any actual members defined. */ -{ -int i; -for (i = 0; i < ArraySize(membersForAll->members); i++) - { - if (membersForAll->members[i] != NULL) - return TRUE; - } -return FALSE; -} - -int hubCheckSubtrackSettings(struct trackHubGenome *genome, struct trackDb *tdb, struct dyString *errors, struct trackHubCheckOptions *options) -/* Check that 'subgroups' are consistent with what is defined at the parent level */ -{ -int retVal = 0; -if (!tdbIsSubtrack(tdb)) - return retVal; - -int i; -char *subtrackName = trackHubSkipHubName(tdb->track); -sortOrder_t *sortOrder = NULL; -membership_t *membership = NULL; -membersForAll_t *membersForAll = NULL; -struct errCatch *errCatch = errCatchNew(); - -if (errCatchStart(errCatch)) - { - membersForAll = membersForAllSubGroupsGet(tdb->parent, NULL); - - // membersForAllSubGroupsGet() warns about the parent stanza, turn it into an errAbort - if (errCatch->gotWarning) - { - errAbort("%s", errCatch->message->string); - } - - if (membersForAll && checkEmptyMembersForAll(membersForAll, tdb->parent)) - { - membership = subgroupMembershipGet(tdb); - sortOrder = sortOrderGet(NULL, tdb->parent); - - if (membership == NULL) - { - errAbort("missing 'subgroups' setting for subtrack %s", subtrackName); - } - - // if a sortOrder is defined, make sure every subtrack has that membership - if (sortOrder) - { - for (i = 0; i < sortOrder->count; i++) - { - char *col = sortOrder->column[i]; - if ( (!sameString(col, SUBTRACK_COLOR_SUBGROUP)) && (membership == NULL || stringArrayIx(col, membership->subgroups, membership->count) == -1)) - warn("%s not a member of sortOrder subgroup %s", subtrackName, col); - } - } - - // now check that this subtrack is a member of every defined subgroup - for (i = 0; i < ArraySize(membersForAll->members); i++) - { - if (membersForAll->members[i] != NULL) - { - char *subgroupName = membersForAll->members[i]->groupTag; - if (stringArrayIx(subgroupName, membership->subgroups, membership->count) == -1) - { - warn("subtrack %s not a member of subgroup %s", subtrackName, subgroupName); - } - } - } - - // check that the subtrack does not have any bogus subgroups that don't exist in the parent - for (i = 0; i < membership->count; i++) - { - char *subgroupName = membership->subgroups[i]; - if (!subgroupingExists(tdb->parent, subgroupName)) - { - warn("subtrack \"%s\" has a subgroup \"%s\" not defined at parent level", subtrackName, subgroupName); - } - } - } - } -errCatchEnd(errCatch); -if (errCatch->gotError || errCatch->gotWarning) - { - retVal = 1; - trackDbErr(errors, errCatch->message->string, genome, tdb, options->htmlOut); - } -errCatchFree(&errCatch); - -return retVal; -} - -int hubCheckCompositeSettings(struct trackHubGenome *genome, struct trackDb *tdb, struct dyString *errors, struct trackHubCheckOptions *options) -/* Check composite level settings like subgroups, dimensions, sortOrder, etc. - * Note that this function returns an int because we want to warn about all errors in a single - * composite stanza rather than errAbort on the first error */ -{ -int retVal = 0; - -// for now all the combination style stanzas can get checked here, but -// in the future they might need their own routines -if (! (tdbIsComposite(tdb) || tdbIsCompositeView(tdb) || tdbIsContainer(tdb)) ) - return retVal; - -struct errCatch *errCatch = errCatchNew(); -if (errCatchStart(errCatch)) - { - // check that subgroup lines are syntactically correct: - // "subGroup name Title tag1=value1 ..." - (void)membersForAllSubGroupsGet(tdb, NULL); - - // check that multiWigs have > 1 track - char *multiWigSetting = trackDbLocalSetting(tdb, "container"); - if (multiWigSetting && slCount(tdb->subtracks) < 2) - { - errAbort("container multiWig %s has only one subtrack. multiWigs must have more than one subtrack", tdb->track); - } - } -errCatchEnd(errCatch); - -if (errCatch->gotError || errCatch->gotWarning) - { - // don't add a new line because one will already be inserted by the errCatch->message - trackDbErr(errors, errCatch->message->string, genome, tdb, options->htmlOut); - retVal = 1; - } - -return retVal; -} - -void hubCheckParentsAndChildren(struct trackDb *tdb) -/* Check that a single trackDb stanza has the correct parent and subtrack pointers */ -{ -if (tdbIsSuper(tdb) || tdbIsComposite(tdb) || tdbIsCompositeView(tdb) || tdbIsContainer(tdb)) - { - if (tdb->subtracks == NULL) - { - errAbort("Track \"%s\" is declared superTrack, compositeTrack, view or " - "container, but has no subtracks", tdb->track); - } - - // Containers should not have a bigDataUrl setting - if (trackDbLocalSetting(tdb, "bigDataUrl")) - { - errAbort("Track \"%s\" is declared superTrack, compositeTrack, view or " - "container, and also has a bigDataUrl", tdb->track); - } - - // multiWigs cannot be the child of a composite - if (tdbIsContainer(tdb) && - (tdb->parent != NULL && - (tdbIsComposite(tdb->parent) || tdbIsCompositeView(tdb->parent)))) - { - errAbort("Track \"%s\" is declared container multiWig and has parent \"%s\"." - " Container multiWig tracks cannot be children of composites or views", - tdb->track, tdb->parent->track); - } - } -else if (tdb->subtracks != NULL) - { - errAbort("Track \"%s\" has children tracks (e.g: \"%s\"), but is not a " - "compositeTrack, container, view or superTrack", tdb->track, tdb->subtracks->track); - } -} - -boolean checkTypeLine(struct trackHubGenome *genome, struct trackDb *tdb, struct dyString *errors, struct trackHubCheckOptions *options) -{ -boolean retVal = FALSE; -struct errCatch *errCatch = errCatchNew(); -if (errCatchStart(errCatch)) - { - char *type = trackDbRequiredSetting(tdb, "type"); - char *splitType[4]; - int numWords = chopByWhite(cloneString(type), splitType, sizeof(splitType)); - char *trackType = splitType[0]; - boolean isParentTrack = (tdbIsComposite(tdb) || tdbIsCompositeView(tdb) || tdbIsContainer(tdb)); - if (!isParentTrack && - // these are the valid trackDb types for hub data tracks: - !(sameString("bigNarrowPeak", trackType) || sameString("bigBed", trackType) || - sameString("bigGenePred", trackType) || sameString("bigPsl", trackType)|| - sameString("bigChain", trackType)|| sameString("bigMaf", trackType) || - sameString("bigBarChart", trackType) || sameString("bigInteract", trackType) || - sameString("bigLolly", trackType) || sameString("bigRmsk", trackType) || - sameString("bigWig", trackType) || sameString("longTabix", trackType) || - sameString("vcfTabix", trackType) || sameString("vcfPhasedTrio", trackType) || - sameString("bam", trackType) || sameString("hic", trackType) - #ifdef USE_HAL - || sameString("halSnake", trackType) - #endif - )) - { - errAbort("error in type line \"%s\" for track \"%s\". The only valid types for tracks that are not composites, views or supertracks are: bigWig, bigBed and bigBed variants like bigGenePred/bigChain/bigBarChart/etc, longTabix, vcfTabix, vcfPhasedTrio, bam, hic and halSnake.", trackType, tdb->track); - } - - if (sameString(splitType[0], "bigBed")) - { - if (numWords > 1 && (strchr(splitType[1], '+') || strchr(splitType[1], '.'))) - { - errAbort("error in type line \"%s\" for track \"%s\". " - "A space is needed after the \"+\" or \".\" character.", type, tdb->track); - } - if (numWords > 2 && (!sameString(splitType[2], "+") && !sameString(splitType[2], "."))) - { - errAbort("error in type line \"%s\" for track \"%s\". " - "Only \"+\" or \".\" is allowed after bigBed numFields setting.", type, tdb->track); - } - } - } -errCatchEnd(errCatch); -if (errCatch->gotError) - { - trackDbErr(errors, errCatch->message->string, genome, tdb, options->htmlOut); - retVal = TRUE; - } -return retVal; -} - -int hubCheckTrack(struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdb, - struct trackHubCheckOptions *options, struct dyString *errors) -/* Check track settings and optionally, files */ -{ -int retVal = 0; -int trackDbErrorCount = 0; - -if (options->checkSettings && options->settings) - { - //verbose(3, "Found %d settings to check to spec\n", slCount(settings)); - verbose(3, "Checking track: %s\n", tdb->shortLabel); - verbose(3, "Found %d settings to check to spec\n", hashNumEntries(tdb->settingsHash)); - struct hashEl *hel; - struct hashCookie cookie = hashFirst(tdb->settingsHash); - while ((hel = hashNext(&cookie)) != NULL) - retVal |= hubCheckTrackSetting(hub, tdb, hel->name, options, errors); - /* TODO: ? also need to check settings not in this list (other tdb fields) */ - } - -if (options->printMeta) - { - struct slPair *metaPairs = trackDbMetaPairs(tdb); - - if (metaPairs != NULL) - { - printf("%s\n", trackHubSkipHubName(tdb->track)); - struct slPair *pair; - for(pair = metaPairs; pair; pair = pair->next) - { - printf("\t%s : %s\n", pair->name, (char *)pair->val); - } - printf("\n"); - } - slPairFreeValsAndList(&metaPairs); - } - -struct trackDb *tempTdb = NULL; -char *textName = NULL; -char idName[512]; -struct errCatch *errCatch = errCatchNew(); -boolean trackIsContainer = (tdbIsComposite(tdb) || tdbIsCompositeView(tdb) || tdbIsContainer(tdb)); - -// first get down into the subtracks -if (tdb->subtracks != NULL) - { - for (tempTdb = tdb->subtracks; tempTdb != NULL; tempTdb = tempTdb->next) - retVal |= hubCheckTrack(hub, genome, tempTdb, options, errors); - } - -// for when assembly hubs have tracks with the same name, prepend assembly name to id -safef(idName, sizeof(idName), "%s_%s", trackHubSkipHubName(genome->name), trackHubSkipHubName(tdb->track)); - -if (options->htmlOut) - { - dyStringPrintf(errors, "trackData['%s'] = [", idName); - } - -if (errCatchStart(errCatch)) - { - if (tdb->errMessage) // if we found any errors when first reading in the trackDb - errAbort("%s",tdb->errMessage); - - hubCheckParentsAndChildren(tdb); - if (trackIsContainer) - retVal |= hubCheckCompositeSettings(genome, tdb, errors, options); - - if (tdbIsSubtrack(tdb)) - retVal |= hubCheckSubtrackSettings(genome, tdb, errors, options); - - // check that type line is syntactically correct regardless of - // if we actually want to check the data file itself - boolean foundTypeError = checkTypeLine(genome, tdb, errors, options); - - // No point in checking the data file if the type setting is incorrect, - // since hubCheckBigDataUrl will error out early with a less clear message - // if the type line is messed up. This has the added benefit of providing - // consistent messaging on command line interface vs web interface - if (!foundTypeError && options->checkFiles) - hubCheckBigDataUrl(hub, genome, tdb); - - if (!sameString(tdb->track, "cytoBandIdeo")) - { - trackHubAddDescription(genome->trackDbFile, tdb); - if (!tdb->html) - warn("warning: missing description page for track: '%s'", tdb->track); - } - - if (!trackIsContainer && sameString(trackDbRequiredSetting(tdb, "type"), "bigWig")) - { - char *autoScaleSetting = trackDbLocalSetting(tdb, "autoScale"); - if (autoScaleSetting && !sameString(autoScaleSetting, "off") && !sameString(autoScaleSetting, "on")) - { - errAbort("track \"%s\" has value \"%s\" for autoScale setting, " - "valid autoScale values for individual bigWig tracks are \"off\" or \"on\" only. " - "If \"%s\" is part of a bigWig composite track and you want to use the " - "\"%s\" setting, only declare \"autoScale group\" in the parent stanza", - trackHubSkipHubName(tdb->track), autoScaleSetting, trackHubSkipHubName(tdb->track), - autoScaleSetting); - } - } - } -errCatchEnd(errCatch); -if (errCatch->gotError || errCatch->gotWarning) - { - retVal = 1; - trackDbErr(errors, errCatch->message->string, genome, tdb, options->htmlOut); - if (errCatch->gotError) - trackDbErrorCount += 1; - } -errCatchFree(&errCatch); - -if (options->htmlOut) - { - if (trackIsContainer) - { - for (tempTdb = tdb->subtracks; tempTdb != NULL; tempTdb = tempTdb->next) - { - char subtrackName[512]; - safef(subtrackName, sizeof(subtrackName), "%s_%s", trackHubSkipHubName(genome->name), trackHubSkipHubName(tempTdb->track)); - textName = trackHubSkipHubName(tempTdb->longLabel); - dyStringPrintf(errors, "%s,", makeFolderObjectString(subtrackName, textName, idName, "TRACK", TRUE, retVal)); - } - } - else if (!retVal) - { - // add "Error" to the trackname to force uniqueness for the jstree - dyStringPrintf(errors, "{icon: 'fa fa-plus', " - "id:'%sError', text:'No trackDb configuration errors', parent:'%s'}", idName, idName); - } - dyStringPrintf(errors, "];\n"); - } - -return retVal; -} - - -int hubCheckGenome(struct trackHub *hub, struct trackHubGenome *genome, - struct trackHubCheckOptions *options, struct dyString *errors) -/* Check out genome within hub. */ -{ -struct errCatch *errCatch = errCatchNew(); -struct trackDb *tdbList = NULL; -int genomeErrorCount = 0; -boolean openedGenome = FALSE; -verbose(3, "checking genome %s\n", trackHubSkipHubName(genome->name)); - -if (errCatchStart(errCatch)) - { - if (genome->twoBitPath != NULL) - { - // check that twoBitPath is a valid file, warn instead of errAbort so we can continue checking - // the genome stanza - char *twoBit = genome->twoBitPath; - if (!extFileExists(twoBit)) - warn("Error: '%s' twoBitPath does not exist or is not accessible: '%s'", genome->name, twoBit); - - // groups and htmlPath are optional settings, again only warn if they are malformed - char *groupsFile = genome->groups; - if (groupsFile != NULL && !extFileExists(groupsFile)) - warn("warning: '%s' groups file does not exist or is not accessible: '%s'", genome->name, groupsFile); - - char *htmlPath = hashFindVal(genome->settingsHash, "htmlPath"); - if (htmlPath == NULL) - warn("warning: missing htmlPath setting for assembly hub '%s'", genome->name); - else if (!extFileExists(htmlPath)) - warn("warning: '%s' htmlPath file does not exist or is not accessible: '%s'", genome->name, htmlPath); - } - boolean foundFirstGenome = FALSE; - tdbList = trackHubTracksForGenome(hub, genome, NULL, &foundFirstGenome); - tdbList = trackDbLinkUpGenerations(tdbList); - tdbList = trackDbPolishAfterLinkup(tdbList, genome->name); - trackHubPolishTrackNames(hub, tdbList); - } -errCatchEnd(errCatch); -if (errCatch->gotError || errCatch->gotWarning) - { - openedGenome = TRUE; - genomeErr(errors, errCatch->message->string, hub, genome, options->htmlOut); - if (errCatch->gotError || errCatch->gotWarning) - genomeErrorCount += 1; - } -errCatchFree(&errCatch); - -verbose(2, "%d tracks in %s\n", slCount(tdbList), genome->name); -struct trackDb *tdb; -int tdbCheckVal; -static struct dyString *tdbDyString = NULL; -if (!tdbDyString) - tdbDyString = dyStringNew(0); - -// build up the track results list, keep track of number of errors, then -// open up genomes folder -char *genomeName = trackHubSkipHubName(genome->name); -for (tdb = tdbList; tdb != NULL; tdb = tdb->next) - { - if (options->htmlOut) - { - // if we haven't already found an error then open up the array - if (!openedGenome) - { - dyStringPrintf(errors, "trackData['%s'] = [", genomeName); - openedGenome = TRUE; - } - } - // use different dyString for the actual errors generated by each track - tdbCheckVal = hubCheckTrack(hub, genome, tdb, options, tdbDyString); - genomeErrorCount += tdbCheckVal; - if (options->htmlOut) - { - // when assembly hubs have tracks with the same name, prepend assembly name to id - char name[512]; - safef(name, sizeof(name), "%s_%s", trackHubSkipHubName(genome->name), trackHubSkipHubName(tdb->track)); - dyStringPrintf(errors, "%s", makeFolderObjectString(name, tdb->longLabel, genomeName, "TRACK", TRUE, tdbCheckVal ? TRUE : FALSE)); - if (tdb->next != NULL) - dyStringPrintf(errors, ","); - } - } -if (options->htmlOut) - dyStringPrintf(errors, "];\n"); - -dyStringPrintf(errors, "%s", tdbDyString->string); -dyStringClear(tdbDyString); - -return genomeErrorCount; -} - -void checkGenomeRestriction(struct trackHubCheckOptions *options, struct trackHub *hub) -/* check the a genome restriction from the command line is a valid genome */ -{ -if (options->genome == NULL) - return; // OK -for (struct trackHubGenome *genome = hub->genomeList; genome != NULL; genome = genome->next) - { - if (sameString(trackHubSkipHubName(genome->name), options->genome)) - return; // OK - } -errAbort("Genome %s not found in hub", options->genome); -} - -boolean shouldCheckGenomes(struct trackHubCheckOptions *options, struct trackHubGenome *genome) -/* should this genome be check based on command line restrictions */ -{ -return (options->genome == NULL) || - sameString(trackHubSkipHubName(genome->name), options->genome); -} - - -int trackHubCheck(char *hubUrl, struct trackHubCheckOptions *options, struct dyString *errors) -/* Check a track data hub for integrity. Put errors in dyString. - * return 0 if hub has no errors, 1 otherwise - * if options->checkTracks is TRUE, check remote files of individual tracks - */ -{ -struct errCatch *errCatch = errCatchNew(); -struct trackHub *hub = NULL; -struct dyString *hubErrors = dyStringNew(0); -int retVal = 0; - -if (errCatchStart(errCatch)) - { - hub = trackHubOpen(hubUrl, ""); - verbose(3, "trackHubOpen just after trackHubOpen()\n"); // DEBUG GALT REMOVE - char *descUrl = hub->descriptionUrl; - verbose(3, "descUrl = [%s]\n", descUrl); // DEBUG GALT REMOVE - if (descUrl == NULL) - warn("warning: missing hub overview description page (descriptionUrl setting)"); - else if (!extFileExists(descUrl)) - warn("warning: %s descriptionUrl setting does not exist", hub->descriptionUrl); - } -errCatchEnd(errCatch); -if (errCatch->gotError || errCatch->gotWarning) - { - verbose(3, "trackHubOpen had errCatch->gotError or gotWarning\n"); // DEBUG GALT REMOVE - - retVal = 1; - hubErr(hubErrors, errCatch->message->string, hub, options->htmlOut); - - if (options->htmlOut) - { - if (hub && hub->shortLabel) - { - dyStringPrintf(errors, "trackData['#'] = [%s,", - makeFolderObjectString(hub->shortLabel, "Hub Errors", "#", - "Click to open node", TRUE, TRUE)); - } - else - { - dyStringPrintf(errors, "trackData['#'] = [%s,", - makeFolderObjectString("Hub Error", "Hub Errors", "#", - "Click to open node", TRUE, TRUE)); - } - } - } -errCatchFree(&errCatch); - -if (hub == NULL) - { - verbose(3, "trackHubCheck hub==NULL aftger trackHubOpen on hubUrl=[%s]\n", hubUrl); // DEBUG GALT REMOVE - - // the reason we couldn't close the array in the previous block is because - // there may be non-fatal errors and we still want to keep trying to check - // the genomes settings, which need to be children of the root '#' node. - // Here we are at a fatal error so we can close the array and return - if (options->htmlOut) - dyStringPrintf(errors, "];\n%s", dyStringCannibalize(&hubErrors)); - else - dyStringPrintf(errors, "%s", dyStringCannibalize(&hubErrors)); - return 1; - } -if (options->htmlOut && retVal != 1) - dyStringPrintf(errors, "trackData['#'] = ["); - -if (options->checkSettings) - retVal |= hubSettingsCheckInit(hub, options, errors); - -struct trackHubGenome *genome; -checkGenomeRestriction(options, hub); -char genomeTitleString[128]; -struct dyString *genomeErrors = dyStringNew(0); -verbose(3, "hub->genomeList = [%s]\n", (char *)hub->genomeList); // DEBUG GALT REMOVE -for (genome = hub->genomeList; genome != NULL; genome = genome->next) - { - verbose(3, "genome = [%s]\n", genome->name); // DEBUG GALT REMOVE - - if (shouldCheckGenomes(options, genome)) - { - verbose(3, "shouldCheckGenomes(options ... \n"); // DEBUG GALT REMOVE - - int numGenomeErrors = hubCheckGenome(hub, genome, options, genomeErrors); - - verbose(3, "numGenomeError=%d after hubCheckGenome()\n", numGenomeErrors); // DEBUG GALT REMOVE - - if (options->htmlOut) - { - char *genomeName = trackHubSkipHubName(genome->name); - safef(genomeTitleString, sizeof(genomeTitleString), - "%s (%d configuration error%s)", genomeName, numGenomeErrors, - numGenomeErrors == 1 ? "" : "s"); - dyStringPrintf(errors, "%s,", makeFolderObjectString(genomeName, genomeTitleString, "#", - "Click to open node", TRUE, numGenomeErrors > 0 ? TRUE : FALSE)); - } - retVal |= numGenomeErrors; - } - } -if (options->htmlOut) - { - dyStringPrintf(errors, "];\n"); - } -dyStringPrintf(errors, "%s", dyStringCannibalize(&hubErrors)); -dyStringPrintf(errors, "%s", dyStringCannibalize(&genomeErrors)); -trackHubClose(&hub); -return retVal; -} - - -static void addExtras(char *extraFile, struct trackHubCheckOptions *checkOptions) -/* Add settings from extra file (e.g. for specific hub display site) */ -{ -verbose(2, "Accepting extra settings in '%s'\n", extraFile); -checkOptions->extraFile = extraFile; -checkOptions->extra = hashNew(0); -struct lineFile *lf = NULL; -if (startsWith("http", extraFile)) - { - struct dyString *ds = netSlurpUrl(extraFile); - char *s = dyStringCannibalize(&ds); - lf = lineFileOnString(extraFile, TRUE, s); - } -else - { - lf = lineFileOpen(extraFile, TRUE); - } -char *line; -while (lineFileNextReal(lf, &line)) - { - hashAdd(checkOptions->extra, line, NULL); - } -lineFileClose(&lf); -verbose(3, "Found %d extra settings\n", hashNumEntries(checkOptions->extra)); -} - - -static void showSettings(struct trackHubCheckOptions *checkOptions) -/* Print settings and levels for the indicated version */ -{ -struct trackHubSettingSpec *settings = - trackHubSettingsForVersion(checkOptions->specHost, checkOptions->version); -struct trackHubSettingSpec *setting = NULL; -AllocVar(setting); -setting->level = checkOptions->level; -int checkLevel = trackHubSettingLevel(setting); -verbose(3, "Showing level %d (%s) and higher\n", checkLevel, setting->level); -freez(&setting); -for (setting = settings; setting != NULL; setting = setting->next) - { - if (trackHubSettingLevel(setting) >= checkLevel) - printf("%s\t%s\n", setting->name, setting->level); - } -} - - -int main(int argc, char *argv[]) -/* Process command line. */ -{ -optionInit(&argc, argv, options); - -if (argc != 2 && !optionExists("settings")) - usage(); - -struct trackHubCheckOptions *checkOptions = NULL; -AllocVar(checkOptions); - -checkOptions->specHost = (optionExists("test") ? "genome-test.soe.ucsc.edu" : "genome.ucsc.edu"); -checkOptions->specHost = optionVal("specHost", checkOptions->specHost); - -checkOptions->printMeta = optionExists("printMeta"); -checkOptions->checkFiles = !optionExists("noTracks"); -checkOptions->checkSettings = optionExists("checkSettings"); -checkOptions->genome = optionVal("genome", NULL); - -struct trackHubSettingSpec *setting = NULL; -AllocVar(setting); -setting->level = optionVal("level", "all"); -if (trackHubSettingLevel(setting) < 0) - { - fprintf(stderr, "ERROR: Unrecognized support level %s\n\n", setting->level); - usage(); - } -checkOptions->level = setting->level; - -char *version = NULL; -if (optionExists("version")) - version = optionVal("version", NULL); -checkOptions->version = version; - -char *extraFile = optionVal("extra", NULL); -if (extraFile != NULL) - addExtras(extraFile, checkOptions); - -cacheTime = optionInt("cacheTime", cacheTime); -udcSetCacheTimeout(cacheTime); -// UDC cache dir: first check for hg.conf setting, then override with command line option if given. -setUdcCacheDir(); -udcSetDefaultDir(optionVal("udcDir", udcDefaultDir())); - -char *httpsCertCheck = optionVal("httpsCertCheck", NULL); -if (httpsCertCheck) - { - // level log for testing, but you only see something if SCRIPT_NAME env variable is set like CGIs have. - if (sameString(httpsCertCheck, "abort") || sameString(httpsCertCheck, "warn") || sameString(httpsCertCheck, "log") || sameString(httpsCertCheck, "none")) - { - setenv("https_cert_check", httpsCertCheck, 1); - } - else - { - // log level is not very useful, but included it for completeness. - verbose(1, "The value of -httpsCertCheck should be either abort to avoid Man-in-middle attack,\n" - "warn to warn about failed certs,\n" - "none indicating the verify is skipped entirely."); - usage(); - } - } - -// should be space separated list, if that lists contains "noHardwiredExceptions" then the built-in hardwired whitelist in https.c is skipped. -char *httpsCertCheckDomainExceptions = optionVal("httpsCertCheckDomainExceptions", NULL); -if (httpsCertCheckDomainExceptions) - { - setenv("https_cert_check_domain_exceptions", httpsCertCheckDomainExceptions, 1); - } - -knetUdcInstall(); // make the htslib library use udc - -if (optionExists("settings")) - { - showSettings(checkOptions); - return 0; - } - -// hgHubConnect specific option for generating a jstree of the hub errors -checkOptions->htmlOut = optionExists("htmlOut"); -struct dyString *errors = dyStringNew(1024); -if (trackHubCheck(argv[1], checkOptions, errors) || checkOptions->htmlOut) - { - if (checkOptions->htmlOut) // just dump errors string to stdout - { - printf("%s", errors->string); - return 1; - } - else - { - // uniquify and count errors - struct slName *errs = slNameListFromString(errors->string, '\n'); - slUniqify(&errs, slNameCmp, slNameFree); - int errCount = slCount(errs); - printf("Found %d problem%s:\n", errCount, errCount == 1 ? "" : "s"); - printf("%s\n", slNameListToString(errs, '\n')); - return 1; - } - } -return 0; -}