946812f7ebe9bd45e3e3fbaaa3db102e9696a973
braney
  Thu Mar 24 14:21:20 2011 -0700
add support for supertracks in hubs  #3381
diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c
index 09a845b..e65aaec 100644
--- src/hg/lib/trackHub.c
+++ src/hg/lib/trackHub.c
@@ -1,361 +1,392 @@
 /* trackHub - supports collections of tracks hosted on a remote site.
  * The basic layout of a data hub is:
  *        hub.ra - contains information about the hub itself
  *        genomes.ra - says which genomes are supported by hub
  *                 Contains file name of trackDb.ra for each genome
  *        trackDb.ra - contains a stanza for each track.  Stanzas
  *                 are in a subset of the usual trackDb format. 
  * How you use the routines here most commonly is as so:
  *     struct trackHub *hub = trackHubOpen(hubRaUrl);
  *     struct trackHubGenome *hubGenome = trackHubFindGenome(hub, "hg19");
  *     struct trackDb *tdbList = trackHubTracksForGenome(hub, hubGenome);
  *          // do something with tdbList
  *     trackHubClose(&hub);
  * Note that the tdbList returned does not have the parent/subtrack pointers set.
  * It is just a simple list of tracks, not a tree.  
  */
 
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "udc.h"
 #include "ra.h"
 #include "filePath.h"
 #include "htmlPage.h"
 #include "trackDb.h"
 #include "trackHub.h"
 
 static boolean hasProtocol(char *urlOrPath)
 /* Return TRUE if it looks like it has http://, ftp:// etc. */
 {
 return stringIn("://", urlOrPath) != NULL;
 }
 
 char *trackHubRelativeUrl(char *hubUrl, char *path)
 /* Return full path (in URL form if it's a remote hub) given
  * path possibly relative to hubUrl. Do a freeMem of result
  * when done. */
 {
 /* If path itself is a URL then just return a copy of it. */
 if (hasProtocol(path))
     return cloneString(path);
 
 /* If it's a remote hub, let html path expander handle it. */
 if (hasProtocol(hubUrl))
     return htmlExpandUrl(hubUrl, path);
 
 /* If we got to here hub is local, and so is path.  Do standard
  * path parsing. */
 return pathRelativeToFile(hubUrl, path);
 }
 
 static void badGenomeStanza(struct lineFile *lf)
 /* Put up semi-informative error message about a genome stanza being bad. */
 {
 errAbort("Genome stanza should have exactly two lines, one with 'genome' and one with 'trackDb'\n"
          "Bad stanza format ending line %d of %s", lf->lineIx, lf->fileName);
 }
 
 static struct trackHubGenome *trackHubGenomeReadRa(char *url, struct hash *hash)
 /* Read in a genome.ra format url and return it as a list of trackHubGenomes. 
  * Also add it to hash, which is keyed by genome. */
 {
 struct lineFile *lf = udcWrapShortLineFile(url, NULL, 16*1024*1024);
 struct trackHubGenome *list = NULL, *el;
 
 struct hash *ra;
 while ((ra = raNextRecord(lf)) != NULL)
     {
     if (ra->elCount != 2)
 	badGenomeStanza(lf);
     char *genome = hashFindVal(ra, "genome");
     if (genome == NULL)
         badGenomeStanza(lf);
     if (hashLookup(hash, genome) != NULL)
         errAbort("Duplicate genome %s in stanza ending line %d of %s",
 		genome, lf->lineIx, lf->fileName);
     char *trackDb = hashFindVal(ra, "trackDb");
     if (trackDb == NULL)
         badGenomeStanza(lf);
     AllocVar(el);
     el->name = cloneString(genome);
     el->trackDbFile = trackHubRelativeUrl(url, trackDb);
     hashAdd(hash, el->name, el);
     slAddHead(&list, el);
     hashFree(&ra);
     }
 
 /* Clean up and go home. */
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 char *trackHubSetting(struct trackHub *hub, char *name)
 /* Return setting if it exists, otherwise NULL. */
 {
 return hashFindVal(hub->settings, name);
 }
 
 char *trackHubRequiredSetting(struct trackHub *hub, char *name)
 /* Return named setting.  Abort with error message if not found. */
 {
 char *val = trackHubSetting(hub, name);
 if (val == NULL)
     errAbort("Missing required setting %s from %s", name, hub->url);
 return val;
 }
 
 struct trackHub *trackHubOpen(char *url, char *hubName)
-/* Open up a track hub from url.  Reads and parses hub.ra and the genomesFile. 
+/* Open up a track hub from url.  Reads and parses hub.txt and the genomesFile. 
  * The hubName is generally just the asciified ID number. */
 {
 struct lineFile *lf = udcWrapShortLineFile(url, NULL, 256*1024);
 struct hash *hubRa = raNextRecord(lf);
 if (hubRa == NULL)
     errAbort("empty %s in trackHubOpen", url);
 if (raNextRecord(lf) != NULL)
     errAbort("multiple records in %s", url);
 
 /* Allocate hub and fill in settings field and url. */
 struct trackHub *hub;
 AllocVar(hub);
 hub->url = cloneString(url);
 hub->name = cloneString(hubName);
 hub->settings = hubRa;
 
 /* Fill in required fields from settings. */
 hub->shortLabel = trackHubRequiredSetting(hub, "shortLabel");
 hub->longLabel = trackHubRequiredSetting(hub, "longLabel");
 hub->genomesFile = trackHubRequiredSetting(hub, "genomesFile");
 
 lineFileClose(&lf);
 char *genomesUrl = trackHubRelativeUrl(hub->url, hub->genomesFile);
 
 hub->genomeHash = hashNew(8);
 hub->genomeList = trackHubGenomeReadRa(genomesUrl, hub->genomeHash);
 freez(&genomesUrl);
 
 return hub;
 }
 
 void trackHubClose(struct trackHub **pHub)
 /* Close up and free resources from hub. */
 {
 struct trackHub *hub = *pHub;
 if (hub != NULL)
     {
     trackHubGenomeFreeList(&hub->genomeList);
     freeMem(hub->url);
     hashFree(&hub->settings);
     hashFree(&hub->genomeHash);
     freez(pHub);
     }
 }
 
 void trackHubGenomeFree(struct trackHubGenome **pGenome)
 /* Free up genome info. */
 {
 struct trackHubGenome *genome = *pGenome;
 if (genome != NULL)
     {
     freeMem(genome->name);
     freeMem(genome->trackDbFile);
     freez(pGenome);
     }
 }
 
 void trackHubGenomeFreeList(struct trackHubGenome **pList)
 /* Free a list of dynamically allocated trackHubGenome's */
 {
 struct trackHubGenome *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     trackHubGenomeFree(&el);
     }
 *pList = NULL;
 }
 
 static char *requiredSetting(struct trackHub *hub, struct trackHubGenome *genome,
 	struct trackDb *tdb, char *setting)
 /* Fetch setting or give an error message, a little more specific than the
  * error message from trackDbRequiredSetting(). */
 {
 char *val = trackDbSetting(tdb, setting);
 if (val == NULL)
     errAbort("Missing required %s setting in hub %s genome %s track %s", setting,
     	hub->url, genome->name, tdb->track);
 return val;
 }
 
 static void expandBigDataUrl(struct trackHub *hub, struct trackHubGenome *genome,
 	struct trackDb *tdb)
 /* Expand bigDataUrls so that no longer relative to genome->trackDbFile */
 {
 struct hashEl *hel = hashLookup(tdb->settingsHash, "bigDataUrl");
 if (hel != NULL)
     {
     char *oldVal = hel->val;
     hel->val = trackHubRelativeUrl(genome->trackDbFile, oldVal);
     freeMem(oldVal);
     }
 }
 
-static void checkTagsLegal(struct trackHub *hub, struct trackHubGenome *genome,
-	struct trackDb *tdb)
-/* Make sure that tdb has all the required tags and is of a supported type. */
+struct trackHubGenome *trackHubFindGenome(struct trackHub *hub, char *genomeName)
+/* Return trackHubGenome of given name associated with hub.  Return NULL if no
+ * such genome. */
+{
+return hashFindVal(hub->genomeHash, genomeName);
+}
+
+static void validateOneTrack( struct trackHub *hub, 
+    struct trackHubGenome *genome, struct trackDb *tdb)
 {
 /* Check for existence of fields required in all tracks */
 requiredSetting(hub, genome, tdb, "shortLabel");
 requiredSetting(hub, genome, tdb, "longLabel");
 
-/* Further checks depend whether it is a container. */
+// subtracks is not NULL if a track said we were its parent
 if (tdb->subtracks != NULL)
     {
-    if (trackDbSetting(tdb, "compositeTrack"))
-        {
-	}
-    else if (trackDbSetting(tdb, "container"))
-        {
-	}
-    else
+    boolean isSuper = FALSE;
+    char *superTrack = trackDbSetting(tdb, "superTrack");
+    if ((superTrack != NULL) && sameString(superTrack, "on"))
+	isSuper = TRUE;
+
+    if (!(trackDbSetting(tdb, "compositeTrack") ||
+          trackDbSetting(tdb, "container") || 
+	  isSuper))
         {
-	errAbort("Parent track %s is not compositeTrack or container in hub %s genome %s", 
+	errAbort("Parent track %s is not compositeTrack, container, or superTrack in hub %s genome %s", 
 		tdb->track, hub->url, genome->name);
 	}
     }
 else
     {
     /* Check type field. */
     char *type = requiredSetting(hub, genome, tdb, "type");
-    if (startsWithWord("bigWig", type))
-	;
-    else if (startsWithWord("bigBed", type))
-	;
-    else if (startsWithWord("bam", type))
-	;
-    else
+    if (!(startsWithWord("bigWig", type) ||
+          startsWithWord("bigBed", type) ||
+          startsWithWord("bam", type)))
+	{
 	errAbort("Unsupported type %s in hub %s genome %s track %s", type,
 	    hub->url, genome->name, tdb->track);
-
-    requiredSetting(hub, genome, tdb, "bigDataUrl");
     }
 
+    requiredSetting(hub, genome, tdb, "bigDataUrl");
 }
-
-struct trackHubGenome *trackHubFindGenome(struct trackHub *hub, char *genomeName)
-/* Return trackHubGenome of given name associated with hub.  Return NULL if no
- * such genome. */
-{
-return hashFindVal(hub->genomeHash, genomeName);
 }
 
-struct trackDb *trackHubTracksForGenome(struct trackHub *hub, struct trackHubGenome *genome)
-/* Get list of tracks associated with genome.  Check that it only is composed of legal
- * types.  Do a few other quick checks to catch errors early. */
+static void markContainers( struct trackHub *hub, 
+    struct trackHubGenome *genome, struct trackDb *tdbList)
+/* mark containers that are parents, or have them */
 {
-struct lineFile *lf = udcWrapShortLineFile(genome->trackDbFile, NULL, 16*1024*1024);
-struct trackDb *tdbList = trackDbFromOpenRa(lf, NULL);
-lineFileClose(&lf);
-
-/* Make bigDataUrls more absolute rather than relative to genome.ra dir */
+struct hash *hash = hashNew(0);
 struct trackDb *tdb;
-for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
-    expandBigDataUrl(hub, genome, tdb);
 
-/* Connect up subtracks and parents.  Note this loop does not actually move tracks
- * from list to parent subtracks, it just uses the field as a marker. Just do this
- * so when doing error checking can distinguish between container tracks and others.
- * This does have the pleasant side effect of making good error messages for
- * non-existant parents. */
-struct hash *hash = hashNew(0);
+// add all the track names to a hash
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     hashAdd(hash, tdb->track, tdb);
+
+// go through and find the container tracks
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     {
     char *parentLine = trackDbLocalSetting(tdb, "parent");
+
+    // maybe it's a child of a supertrack?
+    if (parentLine == NULL)
+	{
+	parentLine = trackDbLocalSetting(tdb, "superTrack");
+	if ((parentLine != NULL) && sameString(parentLine, "on"))
+	    parentLine = NULL;
+	}
+
     if (parentLine != NULL)
          {
 	 char *parentName = cloneFirstWord(parentLine);
 	 struct trackDb *parent = hashFindVal(hash, parentName);
 	 if (parent == NULL)
 	    errAbort("Parent %s of track %s doesn't exist in hub %s genome %s", parentName,
 		tdb->track, hub->url, genome->name);
-	 tdb->parent = parent;
+	 // mark the parent as a container
 	 parent->subtracks = tdb;
+
+	 // ugh...do this so requiredSetting looks at parent
+	 // in the case of views.  We clear this after 
+	 // validating anyway
+	 tdb->parent = parent;
+
 	 freeMem(parentName);
 	 }
     }
 hashFree(&hash);
+}
+
+static void validateTracks( struct trackHub *hub, struct trackHubGenome *genome,
+    struct trackDb *tdbList)
+/* make sure a hub track list has the right settings and its parents exist */
+{
+// mark the containers by setting their subtracks pointer
+markContainers(hub, genome, tdbList);
 
-/* Loop through list checking tags and removing ad-hoc use of parent and subtracks tags. */
+/* Loop through list checking tags */
+struct trackDb *tdb;
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     {
-    checkTagsLegal(hub, genome, tdb);
-    tdb->parent = tdb->subtracks = NULL;
+    validateOneTrack(hub, genome, tdb);
+
+    // clear these two pointers which we set in markContainers
+    tdb->subtracks = NULL;
+    tdb->parent = NULL;
+    }
     }
 
+struct trackDb *trackHubTracksForGenome(struct trackHub *hub, struct trackHubGenome *genome)
+/* Get list of tracks associated with genome.  Check that it only is composed of legal
+ * types.  Do a few other quick checks to catch errors early. */
+{
+struct lineFile *lf = udcWrapShortLineFile(genome->trackDbFile, NULL, 16*1024*1024);
+struct trackDb *tdbList = trackDbFromOpenRa(lf, NULL);
+lineFileClose(&lf);
+
+/* Make bigDataUrls more absolute rather than relative to genome.ra dir */
+struct trackDb *tdb;
+for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
+    expandBigDataUrl(hub, genome, tdb);
+
+validateTracks(hub, genome, tdbList);
+
 trackDbAddTableField(tdbList);
 trackHubAddNamePrefix(hub->name, tdbList);
 trackHubAddGroupName(hub->name, tdbList);
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     {
     trackDbFieldsFromSettings(tdb);
     trackDbPolish(tdb);
     }
 return tdbList;
 }
 
 static void reprefixString(char **pString, char *prefix)
 /* Replace *pString with prefix + *pString, freeing
  * whatever was in *pString before. */
 {
 char *oldName = *pString;
 *pString = catTwoStrings(prefix, oldName);
 freeMem(oldName);
 }
 
 static void addPrefixToSetting(struct hash *settings, char *key, char *prefix)
 /* Given a settings hash, which is string valued.  Old values will be freed. */
 {
 struct hashEl *hel = hashLookup(settings, key);
 if (hel != NULL)
     reprefixString((char **)&hel->val, prefix);
 }
 
 static void trackDbListAddNamePrefix(struct trackDb *tdbList, char *prefix)
 /* Surgically alter tdbList so that it works as if every track was
  * renamed so as to add a prefix to it's name. */
 {
 struct trackDb *tdb;
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     {
     addPrefixToSetting(tdb->settingsHash, "track", prefix);
     addPrefixToSetting(tdb->settingsHash, "parent", prefix);
     reprefixString(&tdb->track, prefix);
     if (tdb->table != NULL)
         reprefixString(&tdb->table, prefix);
     }
 }
 
 void trackHubAddNamePrefix(char *hubName, struct trackDb *tdbList)
 /* For a hub named "hub_1" add the prefix "hub_1_" to each track and parent field. */
 {
 char namePrefix[PATH_LEN];
 safef(namePrefix, sizeof(namePrefix), "%s_", hubName);
 trackDbListAddNamePrefix(tdbList, namePrefix);
 }
 
 void trackHubAddGroupName(char *hubName, struct trackDb *tdbList)
 /* Add group tag that references the hubs symbolic name. */
 {
 struct trackDb *tdb;
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     {
     tdb->grp = cloneString(hubName);
     hashReplace(tdb->settingsHash, "group", tdb->grp);
     }
 }