1a48366ee8ad179a1e490e6f9b065cf4e4d53db4 braney Thu Jul 14 17:06:33 2011 -0700 move hubCheck utility to library, fix up some error messages, facilitate using hubCheck as a hub crawler called from cron to validate public hubs. diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c index e349e27..c0a67da 100644 --- src/hg/lib/trackHub.c +++ src/hg/lib/trackHub.c @@ -13,30 +13,34 @@ * trackHubClose(&hub); * Note that the tdbList returned does not have the parent/subtrack pointers set. * It is just a simple list of tracks, not a tree. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "udc.h" #include "ra.h" #include "filePath.h" #include "htmlPage.h" #include "trackDb.h" #include "trackHub.h" +#include "errCatch.h" +#include "bamFile.h" +#include "bigWig.h" +#include "bigBed.h" static boolean hasProtocol(char *urlOrPath) /* Return TRUE if it looks like it has http://, ftp:// etc. */ { return stringIn("://", urlOrPath) != NULL; } char *trackHubRelativeUrl(char *hubUrl, char *path) /* Return full path (in URL form if it's a remote hub) given * path possibly relative to hubUrl. Do a freeMem of result * when done. */ { /* If path itself is a URL then just return a copy of it. */ if (hasProtocol(path)) return cloneString(path); @@ -91,31 +95,31 @@ slReverse(&list); return list; } char *trackHubSetting(struct trackHub *hub, char *name) /* Return setting if it exists, otherwise NULL. */ { return hashFindVal(hub->settings, name); } char *trackHubRequiredSetting(struct trackHub *hub, char *name) /* Return named setting. Abort with error message if not found. */ { char *val = trackHubSetting(hub, name); if (val == NULL) - errAbort("Missing required setting %s from %s", name, hub->url); + errAbort("Missing required setting '%s' from %s", name, hub->url); return val; } struct trackHub *trackHubOpen(char *url, char *hubName) /* Open up a track hub from url. Reads and parses hub.txt and the genomesFile. * The hubName is generally just the asciified ID number. */ { struct lineFile *lf = udcWrapShortLineFile(url, NULL, 256*1024); struct hash *hubRa = raNextRecord(lf); if (hubRa == NULL) errAbort("empty %s in trackHubOpen", url); if (raNextRecord(lf) != NULL) errAbort("multiple records in %s", url); /* Allocate hub and fill in settings field and url. */ @@ -174,31 +178,31 @@ for (el = *pList; el != NULL; el = next) { next = el->next; trackHubGenomeFree(&el); } *pList = NULL; } static char *requiredSetting(struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdb, char *setting) /* Fetch setting or give an error message, a little more specific than the * error message from trackDbRequiredSetting(). */ { char *val = trackDbSetting(tdb, setting); if (val == NULL) - errAbort("Missing required %s setting in hub %s genome %s track %s", setting, + errAbort("Missing required '%s' setting in hub %s genome %s track %s", setting, hub->url, genome->name, tdb->track); return val; } static void expandBigDataUrl(struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdb) /* Expand bigDataUrls so that no longer relative to genome->trackDbFile */ { struct hashEl *hel = hashLookup(tdb->settingsHash, "bigDataUrl"); if (hel != NULL) { char *oldVal = hel->val; hel->val = trackHubRelativeUrl(genome->trackDbFile, oldVal); freeMem(oldVal); } @@ -231,31 +235,31 @@ isSuper)) { errAbort("Parent track %s is not compositeTrack, container, or superTrack in hub %s genome %s", tdb->track, hub->url, genome->name); } } else { /* Check type field. */ char *type = requiredSetting(hub, genome, tdb, "type"); if (!(startsWithWord("bigWig", type) || startsWithWord("bigBed", type) || startsWithWord("vcfTabix", type) || startsWithWord("bam", type))) { - errAbort("Unsupported type %s in hub %s genome %s track %s", type, + errAbort("Unsupported type '%s' in hub %s genome %s track %s", type, hub->url, genome->name, tdb->track); } requiredSetting(hub, genome, tdb, "bigDataUrl"); } } static void markContainers( struct trackHub *hub, struct trackHubGenome *genome, struct trackDb *tdbList) /* mark containers that are parents, or have them */ { struct hash *hash = hashNew(0); struct trackDb *tdb; // add all the track names to a hash @@ -379,15 +383,122 @@ char namePrefix[PATH_LEN]; safef(namePrefix, sizeof(namePrefix), "%s_", hubName); trackDbListAddNamePrefix(tdbList, namePrefix); } void trackHubAddGroupName(char *hubName, struct trackDb *tdbList) /* Add group tag that references the hubs symbolic name. */ { struct trackDb *tdb; for (tdb = tdbList; tdb != NULL; tdb = tdb->next) { tdb->grp = cloneString(hubName); hashReplace(tdb->settingsHash, "group", tdb->grp); } } + +static int hubCheckTrack(struct trackHub *hub, struct trackHubGenome *genome, + struct trackDb *tdb, struct dyString *errors) +/* Make sure that track is ok. */ +{ +struct errCatch *errCatch = errCatchNew(); +char *relativeUrl = trackDbSetting(tdb, "bigDataUrl"); +int retVal = 0; + +if (relativeUrl != NULL) + { + if (errCatchStart(errCatch)) + { + char *bigDataUrl = trackHubRelativeUrl(genome->trackDbFile, relativeUrl); + char *type = trackDbRequiredSetting(tdb, "type"); + verbose(2, "checking %s.%s type %s at %s\n", genome->name, tdb->track, type, bigDataUrl); + + if (startsWithWord("bigWig", type)) + { + /* Just open and close to verify file exists and is correct type. */ + struct bbiFile *bbi = bigWigFileOpen(bigDataUrl); + bbiFileClose(&bbi); + } + else if (startsWithWord("bigBed", type)) + { + /* Just open and close to verify file exists and is correct type. */ + struct bbiFile *bbi = bigBedFileOpen(bigDataUrl); + bbiFileClose(&bbi); + } + else if (startsWithWord("bam", type)) + { + /* For bam files, the following call checks both main file and index. */ + bamFileExists(bigDataUrl); + } + else + errAbort("unrecognized type %s in genome %s track %s", type, genome->name, tdb->track); + freez(&bigDataUrl); + } + errCatchEnd(errCatch); + if (errCatch->gotError) + { + retVal = 1; + dyStringPrintf(errors, "%s", errCatch->message->string); + } + errCatchFree(&errCatch); + } + +return retVal; +} + +static int hubCheckGenome(struct trackHub *hub, struct trackHubGenome *genome, + struct dyString *errors) +/* Check out genome within hub. */ +{ +struct errCatch *errCatch = errCatchNew(); +struct trackDb *tdbList = NULL; +int retVal = 0; + +if (errCatchStart(errCatch)) + tdbList = trackHubTracksForGenome(hub, genome); +errCatchEnd(errCatch); + +if (errCatch->gotError) + { + retVal = 1; + dyStringPrintf(errors, "%s", errCatch->message->string); + } +errCatchFree(&errCatch); + +struct trackDb *tdb; +for (tdb = tdbList; tdb != NULL; tdb = tdb->next) + retVal |= hubCheckTrack(hub, genome, tdb, errors); +verbose(2, "%d tracks in %s\n", slCount(tdbList), genome->name); + +return retVal; +} + +int trackHubCheck(char *hubUrl, struct dyString *errors) +/* hubCheck - Check a track data hub for integrity. Put errors in dyString. + * return 0 if hub has no errors, 1 otherwise */ +{ +struct errCatch *errCatch = errCatchNew(); +struct trackHub *hub = NULL; +int retVal = 0; + +if (errCatchStart(errCatch)) + hub = trackHubOpen(hubUrl, ""); +errCatchEnd(errCatch); + +if (errCatch->gotError) + { + retVal = 1; + dyStringPrintf(errors, "%s", errCatch->message->string); + } +errCatchFree(&errCatch); + +verbose(2, "hub %s\nshortLabel %s\nlongLabel %s\n", hubUrl, hub->shortLabel, hub->longLabel); +verbose(2, "%s has %d elements\n", hub->genomesFile, slCount(hub->genomeList)); +struct trackHubGenome *genome; +for (genome = hub->genomeList; genome != NULL; genome = genome->next) + { + retVal |= hubCheckGenome(hub, genome, errors); + } +trackHubClose(&hub); + +return retVal; +}