744a3b2e1774459ae0348822c983a24746776f96
jcasper
  Fri Feb 16 12:26:31 2018 -0800
Hub text search now includes metadata tags, behaves better when encountering missing tracks/assemblies, and includes a MySQL index on the track field of hubSearchText refs #20761, #18865, #20694

diff --git src/hg/hgHubConnect/hgHubConnect.c src/hg/hgHubConnect/hgHubConnect.c
index 0cea06c..81455f0 100644
--- src/hg/hgHubConnect/hgHubConnect.c
+++ src/hg/hgHubConnect/hgHubConnect.c
@@ -27,54 +27,57 @@
 #include "hgConfig.h"
 #include "trix.h"
 #include "net.h"
 #include "hubSearchText.h"
 
 struct cart *cart;	/* The user's ui state. */
 struct hash *oldVars = NULL;
 
 static char *pageTitle = "Track Data Hubs";
 char *database = NULL;
 char *organism = NULL;
 
 struct hubOutputStructure
     {
     struct hubOutputStructure *next;
+    struct dyString *metaTags;
     struct dyString *descriptionMatch;
     struct genomeOutputStructure *genomes;
     int genomeCount;
     struct hash *genomeOutHash;
     };
 
 struct genomeOutputStructure
     {
     struct genomeOutputStructure *next;
     struct dyString *shortLabel;
+    struct dyString *metaTags;
     struct dyString *descriptionMatch;
     struct tdbOutputStructure *tracks;
     struct dyString *assemblyLink;
     char *genomeName;
     char *positionString;
     int trackCount;
     struct hash *tdbOutHash;
     int hitCount;
     };
 
 struct tdbOutputStructure
     {
     struct tdbOutputStructure *next;
     struct dyString *shortLabel;
+    struct dyString *metaTags;
     struct dyString *descriptionMatch;
     struct dyString *configUrl;
     struct tdbOutputStructure *children;
     int childCount;
     };
 
 struct hubEntry
 // for entries pulled from hubPublic
     {
     struct hubEntry *next;
     char *hubUrl;
     char *shortLabel;
     char *longLabel;
     char *dbList;
     char *errorMessage;
@@ -648,59 +651,66 @@
     }
 
 printGenomeList(dbListNames, count); 
 printf("</tr>\n");
 }
 
 
 void printSearchOutputForTrack(struct tdbOutputStructure *tdbOut)
 /* Write out a <li> entry for a search hit on a track, along with a nested
  * <ul> for any included hits to subtracks */
 {
 printf("<li configLink='%s' nodeType='track'>\n", dyStringContents(tdbOut->configUrl));
 printf("%s", dyStringContents(tdbOut->shortLabel));
 if (tdbOut->childCount > 0)
     printf(" (%d subtrack%s)", tdbOut->childCount, tdbOut->childCount==1?"":"s");
-printf("<br>\n");
+if (isNotEmpty(dyStringContents(tdbOut->metaTags)))
+    {
+    printf("<br><span class='descriptionMatch'><em>Metadata: %s</em></span>\n", dyStringContents(tdbOut->metaTags));
+    }
 if (isNotEmpty(dyStringContents(tdbOut->descriptionMatch)))
     {
-    printf("<span class='descriptionMatch'><em>%s</em></span>\n", dyStringContents(tdbOut->descriptionMatch));
+    printf("<br><span class='descriptionMatch'><em>Description: %s</em></span>\n", dyStringContents(tdbOut->descriptionMatch));
     }
 if (tdbOut->children != NULL)
     {
     struct tdbOutputStructure *child = tdbOut->children;
     printf("<ul>\n");
     while (child != NULL)
         {
         printSearchOutputForTrack(child);
         child = child->next;
         }
     printf("</ul>\n");
     }
 printf("</li>\n");
 }
 
 
 void printSearchOutputForGenome(struct genomeOutputStructure *genomeOut)
 /* Write out a chunk of search results for a genome as a <li>, with a nested ul
  * element for hits to tracks within that genome */
 {
 printf("<li assemblyLink='%s' nodeType='assembly'>%s",
         dyStringContents(genomeOut->assemblyLink), dyStringContents(genomeOut->shortLabel));
 if (genomeOut->trackCount > 0)
     printf(" (%d track%s)", genomeOut->trackCount, genomeOut->trackCount==1?"":"s");
 
+if (isNotEmpty(dyStringContents(genomeOut->metaTags)))
+    {
+    printf("<br><span class='descriptionMatch'><em>%s</em></span>\n", dyStringContents(genomeOut->metaTags));
+    }
 if (isNotEmpty(dyStringContents(genomeOut->descriptionMatch)))
     {
     printf("<br>\n<em>Assembly Description:</em> %s\n", dyStringContents(genomeOut->descriptionMatch));
     }
 if (genomeOut->tracks != NULL)
     {
     printf("<ul>\n");
     struct tdbOutputStructure *tdbOut = genomeOut->tracks;
     while (tdbOut != NULL)
         {
         printSearchOutputForTrack(tdbOut);
         tdbOut = tdbOut->next;
         }
     printf("</ul>\n");
     }
@@ -731,67 +741,87 @@
 
 
 struct tdbOutputStructure *addOrUpdateTrackOut(char *track, struct genomeOutputStructure *genomeOut,
         struct hash *tdbHash, struct trackHub *hub)
 /* If an output structure already exists for the track within genomeOut, return that.  Otherwise,
  * create one for it and add it to genomeOut.  Any missing parent tracks are also added at
  * the same time.
  * tdbHash takes track names to the struct trackDb * for that track */
 {
 struct tdbOutputStructure *tdbOut = hashFindVal(genomeOut->tdbOutHash, track);
 if (tdbOut == NULL)
     {
     genomeOut->trackCount++;
     AllocVar(tdbOut);
     tdbOut->shortLabel = dyStringNew(0);
+    tdbOut->metaTags = dyStringNew(0);
     tdbOut->descriptionMatch = dyStringNew(0);
     tdbOut->configUrl = dyStringNew(0);
     struct trackDb *trackInfo = (struct trackDb *) hashFindVal(tdbHash, track);
     if (trackInfo == NULL)
         {
         // Some tracks are prefixed with the hub name; try that
         char withHubName[4096];
         safef(withHubName, sizeof(withHubName), "%s_%s", hub->name, track);
-        trackInfo = hashMustFindVal(tdbHash, withHubName);
+        trackInfo = hashFindVal(tdbHash, withHubName);
+        if (trackInfo == NULL)
+            {
+            warn("Error: Unable to locate info for matching track '%s'.  Skipping ...\n", withHubName);
+            return NULL;
+            }
         }
     if (isNotEmpty(trackInfo->longLabel))
         dyStringPrintf(tdbOut->shortLabel, "%s", trackInfo->longLabel);
     else if (isNotEmpty(trackInfo->shortLabel))
         dyStringPrintf(tdbOut->shortLabel, "%s", trackInfo->shortLabel);
     else
         dyStringPrintf(tdbOut->shortLabel, "%s", trackHubSkipHubName(trackInfo->track));
 
     if (tdbIsCompositeView(trackInfo) || tdbIsCompositeChild(trackInfo))
         {
         struct trackDb *parentTdb = tdbGetComposite(trackInfo);
         dyStringPrintf(tdbOut->configUrl, "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s", hub->url,
                 genomeOut->genomeName, parentTdb->track, cartSessionId(cart), genomeOut->positionString);
         }
     else
         {
         dyStringPrintf(tdbOut->configUrl, "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s", hub->url,
                 genomeOut->genomeName, trackInfo->track, cartSessionId(cart), genomeOut->positionString);
         }
 
     if (trackInfo->parent != NULL)
         {
         struct trackDb *parent = trackInfo->parent;
         struct tdbOutputStructure *parentOut = addOrUpdateTrackOut(parent->track, genomeOut, tdbHash, hub);
+        if (parentOut != NULL)
+            {
+            // addOrUpdateTrackOut only returns NULL if it can't find the parent here.
+            // This probably means the trackDb is corrupted, which should have already
+            // generated a fatal error.  All the same ...
             slAddTail(&(parentOut->children), tdbOut);
             parentOut->childCount++;
             }
         else
+            {
+            // If we can't find the track's rightful parent, we can't report its position
+            // in the track hierarchy accurately.  Time to abort.  A warning will already
+            // have been generated by addOrUpdateTrackOut(parent) failing.
+            return NULL;
+            }
+        }
+    else
+        // No parent track, so add it to the root level track list for output
         slAddTail(&(genomeOut->tracks), tdbOut);
     hashAdd(genomeOut->tdbOutHash, track, tdbOut);
     }
 return tdbOut;
 }
 
 
 void buildTdbHash(struct hash *tdbHash, struct trackDb *tdbList)
 /* Recursively add all tracks from tdbList to the hash (indexed by track),
  * along with all parents and children of those tracks */
 {
 struct trackDb *tdb = tdbList;
 while (tdb != NULL)
     {
     hashAdd(tdbHash, tdb->track, tdb);
@@ -818,105 +848,140 @@
     struct dyString *tmp = dyStringCreate("position=");
     if (genome->defaultPos != NULL)
         dyStringAppend(tmp, genome->defaultPos);
     else
         dyStringAppend(tmp, hDefaultPos(genome->name)); // memory leak from hDefaultPos return value
     position = dyStringCannibalize(&tmp);
     }
 return position;
 }
 
 
 struct hubOutputStructure *buildHubSearchOutputStructure(struct trackHub *hub,
         struct hubSearchText *searchResults)
 /* Build a structure that contains the data for writing out the hub search results for this hub */
 {
+struct hash *missingGenomes = hashNew(0);
 struct hubOutputStructure *hubOut = NULL;
 AllocVar(hubOut);
+hubOut->metaTags = dyStringNew(0);
 hubOut->descriptionMatch = dyStringNew(0);
 hubOut->genomeOutHash = newHash(5);
 
 struct hash *tdbHashHash = newHash(5);  // takes genome names to trackDb hashes
 
 struct hubSearchText *hst = NULL;
 for (hst = searchResults; hst != NULL; hst = hst->next)
     {
     if (isEmpty(hst->db))
         {
         // must be a hit to the hub itself, not an assembly or track within it
         if (hst->textLength == hubSearchTextLong)
             {
             dyStringPrintf(hubOut->descriptionMatch, "%s", hst->text);
             }
+        else if (hst->textLength == hubSearchTextMeta)
+            {
+            if (isNotEmpty(dyStringContents(hubOut->metaTags)))
+                dyStringPrintf(hubOut->metaTags, ", %s", hst->text);
+            else
+                dyStringPrintf(hubOut->metaTags, "%s", hst->text);
+            }
         continue;
         }
 
     char *db = cloneString(hst->db);
+    if (hashLookup(missingGenomes, db) != NULL)
+        continue;
     struct trackHubGenome *genome = hashFindVal(hub->genomeHash, db);
     if (genome == NULL)
         {
         // assembly hub genomes are stored with a prefix; try that
         char withHubName[4096];
         safef(withHubName, sizeof(withHubName), "%s_%s", hub->name, db);
-        genome = hashMustFindVal(hub->genomeHash, withHubName);
+        genome = hashFindVal(hub->genomeHash, withHubName);
+        if (genome == NULL)
+            {
+            hashStoreName(missingGenomes, db);
+            warn("Error: Unable to find info for matching assembly '%s'.  Skipping ...\n", withHubName);
+            continue;
+            }
         }
     struct genomeOutputStructure *genomeOut = hashFindVal(hubOut->genomeOutHash, db);
     if (genomeOut == NULL)
         {
         AllocVar(genomeOut);
         genomeOut->tdbOutHash = newHash(5);
+        genomeOut->metaTags = dyStringNew(0);
         genomeOut->descriptionMatch = dyStringNew(0);
         genomeOut->shortLabel = dyStringNew(0);
         genomeOut->assemblyLink = dyStringNew(0);
         genomeOut->positionString = getPositionStringForDb(genome);
         dyStringPrintf(genomeOut->assemblyLink, "../cgi-bin/hgTracks?hubUrl=%s&db=%s&hgsid=%s&%s",
                 hub->url, genome->name, cartSessionId(cart), genomeOut->positionString);
         char *name = trackHubSkipHubName(genome->name);
         if (isNotEmpty(genome->description))
             dyStringPrintf(genomeOut->shortLabel, "%s (%s)", genome->description, name);
         else if (isNotEmpty(genome->organism))
             dyStringPrintf(genomeOut->shortLabel, "%s %s", genome->organism, name);
         else
             dyStringPrintf(genomeOut->shortLabel, "%s", name);
         genomeOut->genomeName = cloneString(genome->name);
         hashAdd(hubOut->genomeOutHash, db, genomeOut);
         slAddTail(&(hubOut->genomes), genomeOut);
         hubOut->genomeCount++;
         }
-    if (isEmpty(hst->track) && hst->textLength == hubSearchTextLong)
+    if (isEmpty(hst->track))
         {
-        // Genome description match
+        if (hst->textLength == hubSearchTextLong) // Genome description match
             dyStringPrintf(genomeOut->descriptionMatch, "%s", hst->text);
+        else if (hst->textLength == hubSearchTextMeta)
+            {
+            if (isNotEmpty(dyStringContents(genomeOut->metaTags)))
+                dyStringPrintf(genomeOut->metaTags, ", %s", hst->text);
+            else
+                dyStringPrintf(genomeOut->metaTags, "%s", hst->text);
+            }
         }
 
     if (isNotEmpty(hst->track))
         {
         // Time to add a track! (or add info to one, maybe)
         struct hash *tdbHash = (struct hash *) hashFindVal(tdbHashHash, db);
         if (tdbHash == NULL)
             {
             tdbHash = newHash(5);
             hashAdd(tdbHashHash, db, tdbHash);
             struct trackDb *tdbList = trackHubTracksForGenome(hub, genome);
             tdbList = trackDbLinkUpGenerations(tdbList);
             tdbList = trackDbPolishAfterLinkup(tdbList, db);
             trackHubPolishTrackNames(hub, tdbList);
             buildTdbHash(tdbHash, tdbList);
             }
         struct tdbOutputStructure *tdbOut = addOrUpdateTrackOut(hst->track, genomeOut, tdbHash, hub);
+        if (tdbOut != NULL)
+            {
             if (hst->textLength == hubSearchTextLong)
                 dyStringPrintf(tdbOut->descriptionMatch, "%s", hst->text);
+            else if (hst->textLength == hubSearchTextMeta)
+                {
+                if (isNotEmpty(dyStringContents(tdbOut->metaTags)))
+                    dyStringPrintf(tdbOut->metaTags, ", %s", hst->text);
+                else
+                    dyStringPrintf(tdbOut->metaTags, "%s", hst->text);
+                }
+            }
         }
     }
 return hubOut;
 }
 
 
 static void printOutputForHub(struct hubEntry *hubInfo, struct hubSearchText *hubSearchResult, int count)
 /* Given a hub's info and a structure listing the search hits within the hub, first print
  * a basic line of hub information with a "connect" button.  Then, if the search results
  * are non-NULL, write out information about the genomes and tracks from the search hits that
  * match the db filter.
  * If there are no search results to print, the basic hub lines are combined into a single HTML table
  * that is defined outside this function.
  * Otherwise, each hub line is printed in its own table followed by a <ul> containing details
  * about the search results. */
@@ -1277,30 +1342,31 @@
 if (cartVarExists(cart, hgHubCheckUrl))
     {
     doResetHub(cart);
     }
 
 if (cartVarExists(cart, hgHubDoRedirect))
     {
     if (doRedirect(cart))
 	{
 	cartWebEnd();
 	return;
 	}
     }
 
 cartWebStart(cart, NULL, "%s", pageTitle);
+
 printf(
 "<link rel=\"stylesheet\" href=\"https://cdnjs.cloudflare.com/ajax/libs/jstree/3.3.4/themes/default/style.min.css\" />\n"
 "<script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/1.12.1/jquery.min.js\"></script>\n"
 "<script src=\"https://cdnjs.cloudflare.com/ajax/libs/jstree/3.3.4/jstree.min.js\"></script>\n"
 "<style>.jstree-default .jstree-anchor { height: initial; } </style>\n"
 );
 jsIncludeFile("utils.js", NULL);
 jsIncludeFile("jquery-ui.js", NULL);
 webIncludeResourceFile("jquery-ui.css");
 jsIncludeFile("ajax.js", NULL);
 jsIncludeFile("hgHubConnect.js", NULL);
 webIncludeResourceFile("hgHubConnect.css");
 jsIncludeFile("jquery.cookie.js", NULL);
 
 printf("<div id=\"hgHubConnectUI\"> <div id=\"description\"> \n");