1a48366ee8ad179a1e490e6f9b065cf4e4d53db4
braney
  Thu Jul 14 17:06:33 2011 -0700
move hubCheck utility to library, fix up some error messages, facilitate using hubCheck as a hub crawler called from cron to validate public hubs.
diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c
index e349e27..c0a67da 100644
--- src/hg/lib/trackHub.c
+++ src/hg/lib/trackHub.c
@@ -13,30 +13,34 @@
  *     trackHubClose(&hub);
  * Note that the tdbList returned does not have the parent/subtrack pointers set.
  * It is just a simple list of tracks, not a tree.  
  */
 
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "udc.h"
 #include "ra.h"
 #include "filePath.h"
 #include "htmlPage.h"
 #include "trackDb.h"
 #include "trackHub.h"
+#include "errCatch.h"
+#include "bamFile.h"
+#include "bigWig.h"
+#include "bigBed.h"
 
 static boolean hasProtocol(char *urlOrPath)
 /* Return TRUE if it looks like it has http://, ftp:// etc. */
 {
 return stringIn("://", urlOrPath) != NULL;
 }
 
 char *trackHubRelativeUrl(char *hubUrl, char *path)
 /* Return full path (in URL form if it's a remote hub) given
  * path possibly relative to hubUrl. Do a freeMem of result
  * when done. */
 {
 /* If path itself is a URL then just return a copy of it. */
 if (hasProtocol(path))
     return cloneString(path);
@@ -91,31 +95,31 @@
 slReverse(&list);
 return list;
 }
 
 char *trackHubSetting(struct trackHub *hub, char *name)
 /* Return setting if it exists, otherwise NULL. */
 {
 return hashFindVal(hub->settings, name);
 }
 
 char *trackHubRequiredSetting(struct trackHub *hub, char *name)
 /* Return named setting.  Abort with error message if not found. */
 {
 char *val = trackHubSetting(hub, name);
 if (val == NULL)
-    errAbort("Missing required setting %s from %s", name, hub->url);
+    errAbort("Missing required setting '%s' from %s", name, hub->url);
 return val;
 }
 
 struct trackHub *trackHubOpen(char *url, char *hubName)
 /* Open up a track hub from url.  Reads and parses hub.txt and the genomesFile. 
  * The hubName is generally just the asciified ID number. */
 {
 struct lineFile *lf = udcWrapShortLineFile(url, NULL, 256*1024);
 struct hash *hubRa = raNextRecord(lf);
 if (hubRa == NULL)
     errAbort("empty %s in trackHubOpen", url);
 if (raNextRecord(lf) != NULL)
     errAbort("multiple records in %s", url);
 
 /* Allocate hub and fill in settings field and url. */
@@ -174,31 +178,31 @@
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     trackHubGenomeFree(&el);
     }
 *pList = NULL;
 }
 
 static char *requiredSetting(struct trackHub *hub, struct trackHubGenome *genome,
 	struct trackDb *tdb, char *setting)
 /* Fetch setting or give an error message, a little more specific than the
  * error message from trackDbRequiredSetting(). */
 {
 char *val = trackDbSetting(tdb, setting);
 if (val == NULL)
-    errAbort("Missing required %s setting in hub %s genome %s track %s", setting,
+    errAbort("Missing required '%s' setting in hub %s genome %s track %s", setting,
     	hub->url, genome->name, tdb->track);
 return val;
 }
 
 static void expandBigDataUrl(struct trackHub *hub, struct trackHubGenome *genome,
 	struct trackDb *tdb)
 /* Expand bigDataUrls so that no longer relative to genome->trackDbFile */
 {
 struct hashEl *hel = hashLookup(tdb->settingsHash, "bigDataUrl");
 if (hel != NULL)
     {
     char *oldVal = hel->val;
     hel->val = trackHubRelativeUrl(genome->trackDbFile, oldVal);
     freeMem(oldVal);
     }
@@ -231,31 +235,31 @@
 	  isSuper))
         {
 	errAbort("Parent track %s is not compositeTrack, container, or superTrack in hub %s genome %s", 
 		tdb->track, hub->url, genome->name);
 	}
     }
 else
     {
     /* Check type field. */
     char *type = requiredSetting(hub, genome, tdb, "type");
     if (!(startsWithWord("bigWig", type) ||
           startsWithWord("bigBed", type) ||
           startsWithWord("vcfTabix", type) ||
           startsWithWord("bam", type)))
 	{
-	errAbort("Unsupported type %s in hub %s genome %s track %s", type,
+	errAbort("Unsupported type '%s' in hub %s genome %s track %s", type,
 	    hub->url, genome->name, tdb->track);
 	}
 
     requiredSetting(hub, genome, tdb, "bigDataUrl");
     }
 }
 
 static void markContainers( struct trackHub *hub, 
     struct trackHubGenome *genome, struct trackDb *tdbList)
 /* mark containers that are parents, or have them */
 {
 struct hash *hash = hashNew(0);
 struct trackDb *tdb;
 
 // add all the track names to a hash
@@ -379,15 +383,122 @@
 char namePrefix[PATH_LEN];
 safef(namePrefix, sizeof(namePrefix), "%s_", hubName);
 trackDbListAddNamePrefix(tdbList, namePrefix);
 }
 
 void trackHubAddGroupName(char *hubName, struct trackDb *tdbList)
 /* Add group tag that references the hubs symbolic name. */
 {
 struct trackDb *tdb;
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     {
     tdb->grp = cloneString(hubName);
     hashReplace(tdb->settingsHash, "group", tdb->grp);
     }
 }
+
+static int hubCheckTrack(struct trackHub *hub, struct trackHubGenome *genome, 
+    struct trackDb *tdb, struct dyString *errors)
+/* Make sure that track is ok. */
+{
+struct errCatch *errCatch = errCatchNew();
+char *relativeUrl = trackDbSetting(tdb, "bigDataUrl");
+int retVal = 0;
+
+if (relativeUrl != NULL)
+    {
+    if (errCatchStart(errCatch))
+	{
+	char *bigDataUrl = trackHubRelativeUrl(genome->trackDbFile, relativeUrl);
+	char *type = trackDbRequiredSetting(tdb, "type");
+	verbose(2, "checking %s.%s type %s at %s\n", genome->name, tdb->track, type, bigDataUrl);
+
+	if (startsWithWord("bigWig", type))
+	    {
+	    /* Just open and close to verify file exists and is correct type. */
+	    struct bbiFile *bbi = bigWigFileOpen(bigDataUrl);
+	    bbiFileClose(&bbi);
+	    }
+	else if (startsWithWord("bigBed", type))
+	    {
+	    /* Just open and close to verify file exists and is correct type. */
+	    struct bbiFile *bbi = bigBedFileOpen(bigDataUrl);
+	    bbiFileClose(&bbi);
+	    }
+	else if (startsWithWord("bam", type))
+	    {
+	    /* For bam files, the following call checks both main file and index. */
+	    bamFileExists(bigDataUrl);
+	    }
+	else
+	    errAbort("unrecognized type %s in genome %s track %s", type, genome->name, tdb->track);
+	freez(&bigDataUrl);
+	}
+    errCatchEnd(errCatch);
+    if (errCatch->gotError)
+	{
+	retVal = 1;
+	dyStringPrintf(errors, "%s", errCatch->message->string);
+	}
+    errCatchFree(&errCatch);
+    }
+
+return retVal;
+}
+
+static int hubCheckGenome(struct trackHub *hub, struct trackHubGenome *genome,
+    struct dyString *errors)
+/* Check out genome within hub. */
+{
+struct errCatch *errCatch = errCatchNew();
+struct trackDb *tdbList = NULL;
+int retVal = 0;
+
+if (errCatchStart(errCatch))
+    tdbList = trackHubTracksForGenome(hub, genome);
+errCatchEnd(errCatch);
+
+if (errCatch->gotError)
+    {
+    retVal = 1;
+    dyStringPrintf(errors, "%s", errCatch->message->string);
+    }
+errCatchFree(&errCatch);
+
+struct trackDb *tdb;
+for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
+    retVal |= hubCheckTrack(hub, genome, tdb, errors);
+verbose(2, "%d tracks in %s\n", slCount(tdbList), genome->name);
+
+return retVal;
+}
+
+int trackHubCheck(char *hubUrl, struct dyString *errors)
+/* hubCheck - Check a track data hub for integrity. Put errors in dyString.
+ *      return 0 if hub has no errors, 1 otherwise */
+{
+struct errCatch *errCatch = errCatchNew();
+struct trackHub *hub = NULL;
+int retVal = 0;
+
+if (errCatchStart(errCatch))
+    hub = trackHubOpen(hubUrl, "");
+errCatchEnd(errCatch);
+
+if (errCatch->gotError)
+    {
+    retVal = 1;
+    dyStringPrintf(errors, "%s", errCatch->message->string);
+    }
+errCatchFree(&errCatch);
+
+verbose(2, "hub %s\nshortLabel %s\nlongLabel %s\n", hubUrl, hub->shortLabel, hub->longLabel);
+verbose(2, "%s has %d elements\n", hub->genomesFile, slCount(hub->genomeList));
+struct trackHubGenome *genome;
+for (genome = hub->genomeList; genome != NULL; genome = genome->next)
+    {
+    retVal |= hubCheckGenome(hub, genome, errors);
+    }
+trackHubClose(&hub);
+
+return retVal;
+}