9440548e67d816a02d14939e7114ab57a12f6965
braney
  Sat Apr 12 15:35:48 2014 -0700
first cut at writing out search terms from public hubs #11633
diff --git src/hg/lib/trackHub.c src/hg/lib/trackHub.c
index 6e476ee..05d16cd 100644
--- src/hg/lib/trackHub.c
+++ src/hg/lib/trackHub.c
@@ -29,30 +29,32 @@
 #include "hgBam.h"
 #include "bigWig.h"
 #include "bigBed.h"
 #include "hdb.h"
 #include "chromInfo.h"
 #include "grp.h"
 #include "twoBit.h"
 #include "dbDb.h"
 #include "net.h"
 #include "bbiFile.h"
 #include "bPlusTree.h"
 #include "hgFind.h"
 #include "hubConnect.h"
 #include "trix.h"
 #include "vcf.h"
+#include "htmshell.h"
+#include "hubConnect.h"
 
 static struct hash *hubCladeHash;  // mapping of clade name to hub pointer
 static struct hash *hubAssemblyHash; // mapping of assembly name to genome struct
 static struct hash *hubOrgHash;   // mapping from organism name to hub pointer
 struct trackHub *globalAssemblyHubList; // list of trackHubs in the user's cart
 
 char *trackHubRelativeUrl(char *hubUrl, char *path)
 /* Return full path (in URL form if it's a remote hub) given
  * path possibly relative to hubUrl. Do a freeMem of result
  * when done. */
 {
 /* If path itself is a URL then just return a copy of it. */
 if (hasProtocol(path))
     return cloneString(path);
 
@@ -724,30 +726,31 @@
 /* Get list of tracks associated with genome.  Check that it only is composed of legal
  * types.  Do a few other quick checks to catch errors early. */
 {
 struct lineFile *lf = udcWrapShortLineFile(genome->trackDbFile, NULL, 64*1024*1024);
 struct trackDb *tdbList = trackDbFromOpenRa(lf, NULL);
 lineFileClose(&lf);
 
 /* Make bigDataUrls more absolute rather than relative to genome.ra dir */
 struct trackDb *tdb;
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     expandBigDataUrl(hub, genome, tdb);
 
 validateTracks(hub, genome, tdbList);
 
 trackDbAddTableField(tdbList);
+if (!isEmpty(hub->name))
     trackHubAddNamePrefix(hub->name, tdbList);
 if (genome->twoBitPath == NULL)
     trackHubAddGroupName(hub->name, tdbList);
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     {
     trackDbFieldsFromSettings(tdb);
     trackDbPolish(tdb);
     }
 return tdbList;
 }
 
 static void reprefixString(char **pString, char *prefix)
 /* Replace *pString with prefix + *pString, freeing
  * whatever was in *pString before. */
 {
@@ -794,46 +797,96 @@
     return name;
 return strchr(&name[4], '_') + 1;
 }
 
 void trackHubAddGroupName(char *hubName, struct trackDb *tdbList)
 /* Add group tag that references the hubs symbolic name. */
 {
 struct trackDb *tdb;
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
     {
     tdb->grp = cloneString(hubName);
     hashReplace(tdb->settingsHash, "group", tdb->grp);
     }
 }
 
+static void addOneDescription(char *trackDbFile, struct trackDb *tdb)
+/* Fetch tdb->track's html description and store in tdb->html. */
+{
+/* html setting should always be set because we set it at load time */
+char *htmlName = trackDbSetting(tdb, "html");
+if (htmlName == NULL)
+    return;
+
+char *simpleName = hubConnectSkipHubPrefix(htmlName);
+char *url = trackHubRelativeUrl(trackDbFile, simpleName);
+char buffer[10*1024];
+safef(buffer, sizeof buffer, "%s.html", url);
+tdb->html = netReadTextFileIfExists(buffer);
+freez(&url);
+}
+
+void trackHubAddDescription(char *trackDbFile, struct trackDb *tdb)
+/* Fetch tdb->track's html description (or nearest ancestor's non-empty description)
+ * and store in tdb->html. */
+{
+addOneDescription(trackDbFile, tdb);
+if (isEmpty(tdb->html))
+    {
+    struct trackDb *parent;
+    for (parent = tdb->parent;  isEmpty(tdb->html) && parent != NULL;  parent = parent->parent)
+	{
+	addOneDescription(trackDbFile, parent);
+	if (isNotEmpty(parent->html))
+	    tdb->html = cloneString(parent->html);
+	}
+    }
+}
+
 static int hubCheckTrack(struct trackHub *hub, struct trackHubGenome *genome, 
-    struct trackDb *tdb, struct dyString *errors)
+    struct trackDb *tdb, struct dyString *errors, FILE *searchFp)
 /* Make sure that track is ok. */
 {
+int retVal = 0;
 struct errCatch *errCatch = errCatchNew();
+
+if (errCatchStart(errCatch))
+    {
+    if (searchFp != NULL)
+	{
+	addOneDescription(genome->trackDbFile, tdb);
+	if (tdb->html != NULL)
+	    {
+	    char *stripHtml =htmlTextStripTags(tdb->html);
+	    strSwapChar(stripHtml, '\n', ' ');
+	    strSwapChar(stripHtml, '\t', ' ');
+	    fprintf(searchFp, "%s.%s\t%s\t%s\t%s\n",hub->url, tdb->track, 
+		tdb->shortLabel, tdb->longLabel, stripHtml);
+	    }
+	else
+	    fprintf(searchFp, "%s.%s\t%s\t%s\n",hub->url, tdb->track, 
+		tdb->shortLabel, tdb->longLabel);
+	}
+    else 
+	{
 	char *relativeUrl = trackDbSetting(tdb, "bigDataUrl");
-int retVal = 0;
+	char *type = trackDbRequiredSetting(tdb, "type");
 
 	if (relativeUrl != NULL)
 	    {
-    if (errCatchStart(errCatch))
-	{
 	    char *bigDataUrl = trackHubRelativeUrl(genome->trackDbFile, relativeUrl);
-	char *type = trackDbRequiredSetting(tdb, "type");
 	    verbose(2, "checking %s.%s type %s at %s\n", genome->name, tdb->track, type, bigDataUrl);
-
 	    if (startsWithWord("bigWig", type))
 		{
 		/* Just open and close to verify file exists and is correct type. */
 		struct bbiFile *bbi = bigWigFileOpen(bigDataUrl);
 		bbiFileClose(&bbi);
 		}
 	    else if (startsWithWord("bigBed", type))
 		{
 		/* Just open and close to verify file exists and is correct type. */
 		struct bbiFile *bbi = bigBedFileOpen(bigDataUrl);
 		bbiFileClose(&bbi);
 		}
 	    else if (startsWithWord("vcfTabix", type))
 		{
 		/* Just open and close to verify file exists and is correct type. */
@@ -841,38 +894,39 @@
 		if (vcf == NULL)
 		    // Warnings already indicated whether the tabix file is missing etc.
 		    errAbort("Couldn't open %s and/or its tabix index (.tbi) file.  "
 			     "See http://genome.ucsc.edu/goldenPath/help/vcf.html",
 			     bigDataUrl);
 		vcfFileFree(&vcf);
 		}
 	    else if (startsWithWord("bam", type))
 		{
 		bamFileAndIndexMustExist(bigDataUrl);
 		}
 	    else
 		errAbort("unrecognized type %s in genome %s track %s", type, genome->name, tdb->track);
 	    freez(&bigDataUrl);
 	    }
+	}
+    }
 errCatchEnd(errCatch);
 if (errCatch->gotError)
     {
     retVal = 1;
     dyStringPrintf(errors, "%s", errCatch->message->string);
     }
 errCatchFree(&errCatch);
-    }
 
 return retVal;
 }
 
 void trackHubFixName(char *name)
 /* Change all characters other than alphanumeric, dash, and underbar
  * to underbar. */
 {
 if (name == NULL)
     return;
 
 char *in = name;
 char c;
 
 for(; (c = *in) != 0; in++)
@@ -918,93 +972,94 @@
 
 for (tdb = tdbList; tdb != NULL; tdb = next)
     {
     if (tdb->parent != NULL)
 	polishOneTrack(hub, tdb->parent, nameHash);
     next = tdb->next;
     polishOneTrack(hub, tdb, nameHash);
     if (tdb->subtracks != NULL)
 	{
 	trackHubPolishTrackNames(hub, tdb->subtracks);
 	}
     }
 }
 
 static int hubCheckGenome(struct trackHub *hub, struct trackHubGenome *genome,
-    struct dyString *errors, boolean checkTracks)
+    struct dyString *errors, boolean checkTracks, FILE *searchFp)
 /* Check out genome within hub. */
 {
 struct errCatch *errCatch = errCatchNew();
 struct trackDb *tdbList = NULL;
 int retVal = 0;
 
 if (errCatchStart(errCatch))
     {
     tdbList = trackHubTracksForGenome(hub, genome);
     trackHubPolishTrackNames(hub, tdbList);
     }
 errCatchEnd(errCatch);
 
 if (errCatch->gotError)
     {
     retVal = 1;
     dyStringPrintf(errors, "%s", errCatch->message->string);
     }
 errCatchFree(&errCatch);
 
 if (!checkTracks)
     return retVal;
 
 struct trackDb *tdb;
 for (tdb = tdbList; tdb != NULL; tdb = tdb->next)
-    retVal |= hubCheckTrack(hub, genome, tdb, errors);
+    retVal |= hubCheckTrack(hub, genome, tdb, errors, searchFp);
 verbose(2, "%d tracks in %s\n", slCount(tdbList), genome->name);
 
 return retVal;
 }
 
-int trackHubCheck(char *hubUrl, struct dyString *errors, boolean checkTracks)
+int trackHubCheck(char *hubUrl, struct dyString *errors, 
+    boolean checkTracks, FILE *searchFp)
 /* hubCheck - Check a track data hub for integrity. Put errors in dyString.
  *      return 0 if hub has no errors, 1 otherwise 
  *      if checkTracks is TRUE, individual tracks are checked
  */
 
 {
 struct errCatch *errCatch = errCatchNew();
 struct trackHub *hub = NULL;
 int retVal = 0;
 
 if (errCatchStart(errCatch))
     hub = trackHubOpen(hubUrl, "");
 errCatchEnd(errCatch);
 
 if (errCatch->gotError)
     {
     retVal = 1;
     dyStringPrintf(errors, "%s", errCatch->message->string);
     }
 errCatchFree(&errCatch);
 
 if (hub == NULL)
     return 1;
 
 verbose(2, "hub %s\nshortLabel %s\nlongLabel %s\n", hubUrl, hub->shortLabel, hub->longLabel);
 verbose(2, "%s has %d elements\n", hub->genomesFile, slCount(hub->genomeList));
 struct trackHubGenome *genome;
 for (genome = hub->genomeList; genome != NULL; genome = genome->next)
     {
-    retVal |= hubCheckGenome(hub, genome, errors, checkTracks);
+    retVal |= hubCheckGenome(hub, genome, errors, checkTracks, searchFp);
     }
 trackHubClose(&hub);
 
 return retVal;
 }
 
 
 static struct hgPos *bigBedIntervalListToHgPositions(struct bbiFile *bbi, char *term, struct bigBedInterval *intervalList, char *description)
 /* Given an open bigBed file, and an interval list, return a pointer to a list of hgPos structures. */
 {
 struct hgPos *posList = NULL;
 char chromName[bbi->chromBpt->keySize+1];
 int lastChromId = -1;
 struct bigBedInterval *interval;
 
@@ -1126,15 +1181,16 @@
 
 void trackHubFindPos(char *db, char *term, struct hgPositions *hgp)
 /* Look for term in track hubs.  Update hgp if found */
 {
 struct trackDb *tdbList = NULL;
 if (trackHubDatabase(db))
     {
     struct trackHubGenome *genome = trackHubGetGenome(db);
     tdbList = trackHubTracksForGenome(genome->trackHub, genome);
     }
 else
     tdbList = hubCollectTracks(db, NULL);
 
 findPosInTdbList(tdbList, term, hgp);
 }
+