67ce69b2d6be585fcad3cb0dd5fc927e95c327ff
chmalee
  Mon Oct 14 13:48:54 2019 -0700
Rework of hgHubConnect hub searching to not use trackDb or udc so searches can be sped up. The hubSearchText table now has an extra column for the parent track names (if any) of a search result to a track. hgHubConnect has been changed to use this field of the table instead of using trackDb. hubCrawl has been changed to generate this additional column, refs #23812

diff --git src/hg/hgHubConnect/hgHubConnect.c src/hg/hgHubConnect/hgHubConnect.c
index 6e87bfb..d5dd50f 100644
--- src/hg/hgHubConnect/hgHubConnect.c
+++ src/hg/hgHubConnect/hgHubConnect.c
@@ -939,42 +939,137 @@
 safef(positionVar, sizeof(positionVar), "position.%s", genome->name);
 char *position = cartOptionalString(cart, positionVar);
 if (position == NULL)
     {
     struct dyString *tmp = dyStringCreate("position=");
     if (genome->defaultPos != NULL)
         dyStringAppend(tmp, genome->defaultPos);
     else
         dyStringAppend(tmp, hDefaultPos(genome->name)); // memory leak from hDefaultPos return value
     position = dyStringCannibalize(&tmp);
     }
 return position;
 }
 
 
+struct tdbOutputStructure *hstToTdbOutput(struct hubSearchText *hst, struct genomeOutputStructure *genomeOut, struct trackHub *hub)
+/* Convert a hubSearchText entry to a (list of) tdbOutputStructure(s) */
+{
+struct tdbOutputStructure *tdbOut = hashFindVal(genomeOut->tdbOutHash, hst->track);
+if (tdbOut == NULL)
+    {
+    genomeOut->trackCount++;
+    AllocVar(tdbOut);
+    tdbOut->shortLabel = dyStringNew(0);
+    tdbOut->metaTags = dyStringNew(0);
+    tdbOut->descriptionMatch = dyStringNew(0);
+    tdbOut->configUrl = dyStringNew(0);
+    dyStringPrintf(tdbOut->shortLabel, "%s", hst->label);
+
+    if (isNotEmpty(hst->parents))
+        {
+        // hst->parents is a comma-sep list like "track1","track1Label","track2","track2Label"
+        int i;
+        int parentCount;
+        char *parentTrack = NULL;
+        char *parentLabel = NULL;
+        char *parentTrackLabels[16]; // 2 slots per parent, can tracks nest more than 8 deep?
+        struct tdbOutputStructure *parentTdbOut = NULL;
+        struct tdbOutputStructure *savedParent = NULL;
+
+        parentCount = chopByCharRespectDoubleQuotes(cloneString(hst->parents), ',', parentTrackLabels, sizeof(parentTrackLabels));
+        if (parentCount == 0 || parentCount % 2 != 0)
+            {
+            errAbort("error parsing hubSearchText->parents for %s.%s in hub: '%s'",
+                genomeOut->genomeName, hst->track, hub->url);
+            }
+        dyStringPrintf(tdbOut->configUrl, "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s",
+            hub->url, genomeOut->genomeName, parentTrackLabels[0], cartSessionId(cart),
+            genomeOut->positionString);
+
+        boolean foundParent = FALSE;
+        boolean doAddSaveParent = FALSE;
+        for (i = 0; i < parentCount; i += 2)
+            {
+            parentTrack = stripEnclosingDoubleQuotes(cloneString(parentTrackLabels[i]));
+            parentLabel = stripEnclosingDoubleQuotes(cloneString(parentTrackLabels[i+1]));
+            parentTdbOut = hashFindVal(genomeOut->tdbOutHash, parentTrack);
+            if (parentTdbOut != NULL)
+                {
+                foundParent = TRUE; // don't add this track to the genomeOut->tracks hash again
+                if (savedParent && doAddSaveParent)
+                    {
+                    parentTdbOut->childCount += 1;
+                    slAddHead(&(parentTdbOut->children), savedParent);
+                    }
+                else if (!savedParent)
+                    {
+                    parentTdbOut->childCount += 1;
+                    slAddHead(&(parentTdbOut->children), tdbOut);
+                    }
+                savedParent = parentTdbOut;
+                doAddSaveParent = FALSE;
+                }
+            else
+                {
+                AllocVar(parentTdbOut);
+                parentTdbOut->shortLabel = dyStringNew(0);
+                parentTdbOut->metaTags = dyStringNew(0);
+                parentTdbOut->descriptionMatch = dyStringNew(0);
+                parentTdbOut->configUrl = dyStringNew(0);
+                dyStringPrintf(tdbOut->configUrl,
+                    "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s",
+                    hub->url, genomeOut->genomeName, parentTrack, cartSessionId(cart), genomeOut->positionString);
+                dyStringPrintf(parentTdbOut->shortLabel, "%s", parentLabel);
+                parentTdbOut->childCount += 1;
+                if (savedParent)
+                    slAddHead(&(parentTdbOut->children), savedParent);
+                else
+                    slAddHead(&(parentTdbOut->children), tdbOut);
+                savedParent = parentTdbOut;
+                doAddSaveParent = TRUE;
+                hashAdd(genomeOut->tdbOutHash, parentTrack, parentTdbOut);
+                }
+            }
+        if (!foundParent)
+            {
+            slAddHead(&(genomeOut->tracks), parentTdbOut);
+            }
+        }
+    else
+        {
+        dyStringPrintf(tdbOut->configUrl, "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s",
+            hub->url, genomeOut->genomeName, hst->track, cartSessionId(cart),
+            genomeOut->positionString);
+        slAddHead(&(genomeOut->tracks), tdbOut);
+        }
+    hashAdd(genomeOut->tdbOutHash, hst->track, tdbOut);
+    }
+return tdbOut;
+}
+
 struct hubOutputStructure *buildHubSearchOutputStructure(struct trackHub *hub,
         struct hubSearchText *searchResults)
 /* Build a structure that contains the data for writing out the hub search results for this hub */
 {
 struct hash *missingGenomes = hashNew(0);
 struct hubOutputStructure *hubOut = NULL;
 AllocVar(hubOut);
 hubOut->metaTags = dyStringNew(0);
 hubOut->descriptionMatch = dyStringNew(0);
 hubOut->genomeOutHash = newHash(5);
 
-struct hash *tdbHashHash = newHash(5);  // takes genome names to trackDb hashes
 
 struct hubSearchText *hst = NULL;
 for (hst = searchResults; hst != NULL; hst = hst->next)
     {
     if (isEmpty(hst->db))
         {
         // must be a hit to the hub itself, not an assembly or track within it
         if (hst->textLength == hubSearchTextLong)
             {
             dyStringPrintf(hubOut->descriptionMatch, "%s", hst->text);
             }
         else if (hst->textLength == hubSearchTextMeta)
             {
             if (isNotEmpty(dyStringContents(hubOut->metaTags)))
                 dyStringPrintf(hubOut->metaTags, ", %s", hst->text);
@@ -1029,42 +1124,31 @@
         {
         if (hst->textLength == hubSearchTextLong) // Genome description match
             dyStringPrintf(genomeOut->descriptionMatch, "%s", hst->text);
         else if (hst->textLength == hubSearchTextMeta)
             {
             if (isNotEmpty(dyStringContents(genomeOut->metaTags)))
                 dyStringPrintf(genomeOut->metaTags, ", %s", hst->text);
             else
                 dyStringPrintf(genomeOut->metaTags, "%s", hst->text);
             }
         }
 
     if (isNotEmpty(hst->track))
         {
         // Time to add a track! (or add info to one, maybe)
-        struct hash *tdbHash = (struct hash *) hashFindVal(tdbHashHash, db);
-        if (tdbHash == NULL)
-            {
-            tdbHash = newHash(5);
-            hashAdd(tdbHashHash, db, tdbHash);
-            struct trackDb *tdbList = trackHubTracksForGenome(hub, genome);
-            tdbList = trackDbLinkUpGenerations(tdbList);
-            tdbList = trackDbPolishAfterLinkup(tdbList, db);
-            trackHubPolishTrackNames(hub, tdbList);
-            buildTdbHash(tdbHash, tdbList);
-            }
-        struct tdbOutputStructure *tdbOut = addOrUpdateTrackOut(hst->track, genomeOut, tdbHash, hub);
+        struct tdbOutputStructure *tdbOut = hstToTdbOutput(hst, genomeOut, hub);
         if (tdbOut != NULL)
             {
             if (hst->textLength == hubSearchTextLong)
                 dyStringPrintf(tdbOut->descriptionMatch, "%s", hst->text);
             else if (hst->textLength == hubSearchTextMeta)
                 {
                 if (isNotEmpty(dyStringContents(tdbOut->metaTags)))
                     dyStringPrintf(tdbOut->metaTags, ", %s", hst->text);
                 else
                     dyStringPrintf(tdbOut->metaTags, "%s", hst->text);
                 }
             }
         }
     }
 return hubOut;
@@ -1136,30 +1220,31 @@
     tdbArrayDy = dyStringNew(0);
 if (idString == NULL)
     idString = dyStringNew(0);
 
 // The structure here is:
 // trackData[genome] = [{track 1 obj}, {track2 obj}, {track3 obj}, ... ]
 // trackData[track1] = [{search hit text}, {subtrack1 search hit}, {subtrack2 search hit}, ... ]
 //
 // if track1, track2, track3 are container tracks, then the recursive function
 // tdbOutputStructureToDystring creates the above trackData[track1] = [{}] for 
 // each of the containers, otherwise a single child of the genome is sufficient
 dyStringPrintf(dy, "trackData['%s'] = [", genomeNameId);
 if (genomeOut->tracks != NULL)
     {
     tdbOut = genomeOut->tracks;
+    slReverse(&tdbOut);
     while (tdbOut != NULL)
         {
         dyStringPrintf(idString, "%s", tdbOutputStructureLabelToId(tdbOut));
         dyStringPrintf(dy, "\n\t{\n\t'id': '%s',\n\t'parent': '%s',\n\t"
             "'li_attr': {'nodetype':'track', configlink: '%s'},\n\t'text': \'%s ",
             idString->string, genomeNameId, dyStringContents(tdbOut->configUrl), idString->string);
         if (isNotEmpty(dyStringContents(tdbOut->metaTags)))
             {
             dyStringPrintf(dy, "<br><span class=\\'descriptionMatch\\'><em>Metadata: %s</em></span>",
                 htmlEncode(dyStringContents(tdbOut->metaTags)));
             }
         if (isNotEmpty(dyStringContents(tdbOut->descriptionMatch)))
             {
             dyStringPrintf(dy, "<br><span class=\\'descriptionMatch\\'><em>Description: %s</em></span>",
                 htmlEncode(dyStringContents(tdbOut->descriptionMatch)));