67ce69b2d6be585fcad3cb0dd5fc927e95c327ff chmalee Mon Oct 14 13:48:54 2019 -0700 Rework of hgHubConnect hub searching to not use trackDb or udc so searches can be sped up. The hubSearchText table now has an extra column for the parent track names (if any) of a search result to a track. hgHubConnect has been changed to use this field of the table instead of using trackDb. hubCrawl has been changed to generate this additional column, refs #23812 diff --git src/hg/hgHubConnect/hgHubConnect.c src/hg/hgHubConnect/hgHubConnect.c index 6e87bfb..d5dd50f 100644 --- src/hg/hgHubConnect/hgHubConnect.c +++ src/hg/hgHubConnect/hgHubConnect.c @@ -939,42 +939,137 @@ safef(positionVar, sizeof(positionVar), "position.%s", genome->name); char *position = cartOptionalString(cart, positionVar); if (position == NULL) { struct dyString *tmp = dyStringCreate("position="); if (genome->defaultPos != NULL) dyStringAppend(tmp, genome->defaultPos); else dyStringAppend(tmp, hDefaultPos(genome->name)); // memory leak from hDefaultPos return value position = dyStringCannibalize(&tmp); } return position; } +struct tdbOutputStructure *hstToTdbOutput(struct hubSearchText *hst, struct genomeOutputStructure *genomeOut, struct trackHub *hub) +/* Convert a hubSearchText entry to a (list of) tdbOutputStructure(s) */ +{ +struct tdbOutputStructure *tdbOut = hashFindVal(genomeOut->tdbOutHash, hst->track); +if (tdbOut == NULL) + { + genomeOut->trackCount++; + AllocVar(tdbOut); + tdbOut->shortLabel = dyStringNew(0); + tdbOut->metaTags = dyStringNew(0); + tdbOut->descriptionMatch = dyStringNew(0); + tdbOut->configUrl = dyStringNew(0); + dyStringPrintf(tdbOut->shortLabel, "%s", hst->label); + + if (isNotEmpty(hst->parents)) + { + // hst->parents is a comma-sep list like "track1","track1Label","track2","track2Label" + int i; + int parentCount; + char *parentTrack = NULL; + char *parentLabel = NULL; + char *parentTrackLabels[16]; // 2 slots per parent, can tracks nest more than 8 deep? + struct tdbOutputStructure *parentTdbOut = NULL; + struct tdbOutputStructure *savedParent = NULL; + + parentCount = chopByCharRespectDoubleQuotes(cloneString(hst->parents), ',', parentTrackLabels, sizeof(parentTrackLabels)); + if (parentCount == 0 || parentCount % 2 != 0) + { + errAbort("error parsing hubSearchText->parents for %s.%s in hub: '%s'", + genomeOut->genomeName, hst->track, hub->url); + } + dyStringPrintf(tdbOut->configUrl, "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s", + hub->url, genomeOut->genomeName, parentTrackLabels[0], cartSessionId(cart), + genomeOut->positionString); + + boolean foundParent = FALSE; + boolean doAddSaveParent = FALSE; + for (i = 0; i < parentCount; i += 2) + { + parentTrack = stripEnclosingDoubleQuotes(cloneString(parentTrackLabels[i])); + parentLabel = stripEnclosingDoubleQuotes(cloneString(parentTrackLabels[i+1])); + parentTdbOut = hashFindVal(genomeOut->tdbOutHash, parentTrack); + if (parentTdbOut != NULL) + { + foundParent = TRUE; // don't add this track to the genomeOut->tracks hash again + if (savedParent && doAddSaveParent) + { + parentTdbOut->childCount += 1; + slAddHead(&(parentTdbOut->children), savedParent); + } + else if (!savedParent) + { + parentTdbOut->childCount += 1; + slAddHead(&(parentTdbOut->children), tdbOut); + } + savedParent = parentTdbOut; + doAddSaveParent = FALSE; + } + else + { + AllocVar(parentTdbOut); + parentTdbOut->shortLabel = dyStringNew(0); + parentTdbOut->metaTags = dyStringNew(0); + parentTdbOut->descriptionMatch = dyStringNew(0); + parentTdbOut->configUrl = dyStringNew(0); + dyStringPrintf(tdbOut->configUrl, + "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s", + hub->url, genomeOut->genomeName, parentTrack, cartSessionId(cart), genomeOut->positionString); + dyStringPrintf(parentTdbOut->shortLabel, "%s", parentLabel); + parentTdbOut->childCount += 1; + if (savedParent) + slAddHead(&(parentTdbOut->children), savedParent); + else + slAddHead(&(parentTdbOut->children), tdbOut); + savedParent = parentTdbOut; + doAddSaveParent = TRUE; + hashAdd(genomeOut->tdbOutHash, parentTrack, parentTdbOut); + } + } + if (!foundParent) + { + slAddHead(&(genomeOut->tracks), parentTdbOut); + } + } + else + { + dyStringPrintf(tdbOut->configUrl, "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s", + hub->url, genomeOut->genomeName, hst->track, cartSessionId(cart), + genomeOut->positionString); + slAddHead(&(genomeOut->tracks), tdbOut); + } + hashAdd(genomeOut->tdbOutHash, hst->track, tdbOut); + } +return tdbOut; +} + struct hubOutputStructure *buildHubSearchOutputStructure(struct trackHub *hub, struct hubSearchText *searchResults) /* Build a structure that contains the data for writing out the hub search results for this hub */ { struct hash *missingGenomes = hashNew(0); struct hubOutputStructure *hubOut = NULL; AllocVar(hubOut); hubOut->metaTags = dyStringNew(0); hubOut->descriptionMatch = dyStringNew(0); hubOut->genomeOutHash = newHash(5); -struct hash *tdbHashHash = newHash(5); // takes genome names to trackDb hashes struct hubSearchText *hst = NULL; for (hst = searchResults; hst != NULL; hst = hst->next) { if (isEmpty(hst->db)) { // must be a hit to the hub itself, not an assembly or track within it if (hst->textLength == hubSearchTextLong) { dyStringPrintf(hubOut->descriptionMatch, "%s", hst->text); } else if (hst->textLength == hubSearchTextMeta) { if (isNotEmpty(dyStringContents(hubOut->metaTags))) dyStringPrintf(hubOut->metaTags, ", %s", hst->text); @@ -1029,42 +1124,31 @@ { if (hst->textLength == hubSearchTextLong) // Genome description match dyStringPrintf(genomeOut->descriptionMatch, "%s", hst->text); else if (hst->textLength == hubSearchTextMeta) { if (isNotEmpty(dyStringContents(genomeOut->metaTags))) dyStringPrintf(genomeOut->metaTags, ", %s", hst->text); else dyStringPrintf(genomeOut->metaTags, "%s", hst->text); } } if (isNotEmpty(hst->track)) { // Time to add a track! (or add info to one, maybe) - struct hash *tdbHash = (struct hash *) hashFindVal(tdbHashHash, db); - if (tdbHash == NULL) - { - tdbHash = newHash(5); - hashAdd(tdbHashHash, db, tdbHash); - struct trackDb *tdbList = trackHubTracksForGenome(hub, genome); - tdbList = trackDbLinkUpGenerations(tdbList); - tdbList = trackDbPolishAfterLinkup(tdbList, db); - trackHubPolishTrackNames(hub, tdbList); - buildTdbHash(tdbHash, tdbList); - } - struct tdbOutputStructure *tdbOut = addOrUpdateTrackOut(hst->track, genomeOut, tdbHash, hub); + struct tdbOutputStructure *tdbOut = hstToTdbOutput(hst, genomeOut, hub); if (tdbOut != NULL) { if (hst->textLength == hubSearchTextLong) dyStringPrintf(tdbOut->descriptionMatch, "%s", hst->text); else if (hst->textLength == hubSearchTextMeta) { if (isNotEmpty(dyStringContents(tdbOut->metaTags))) dyStringPrintf(tdbOut->metaTags, ", %s", hst->text); else dyStringPrintf(tdbOut->metaTags, "%s", hst->text); } } } } return hubOut; @@ -1136,30 +1220,31 @@ tdbArrayDy = dyStringNew(0); if (idString == NULL) idString = dyStringNew(0); // The structure here is: // trackData[genome] = [{track 1 obj}, {track2 obj}, {track3 obj}, ... ] // trackData[track1] = [{search hit text}, {subtrack1 search hit}, {subtrack2 search hit}, ... ] // // if track1, track2, track3 are container tracks, then the recursive function // tdbOutputStructureToDystring creates the above trackData[track1] = [{}] for // each of the containers, otherwise a single child of the genome is sufficient dyStringPrintf(dy, "trackData['%s'] = [", genomeNameId); if (genomeOut->tracks != NULL) { tdbOut = genomeOut->tracks; + slReverse(&tdbOut); while (tdbOut != NULL) { dyStringPrintf(idString, "%s", tdbOutputStructureLabelToId(tdbOut)); dyStringPrintf(dy, "\n\t{\n\t'id': '%s',\n\t'parent': '%s',\n\t" "'li_attr': {'nodetype':'track', configlink: '%s'},\n\t'text': \'%s ", idString->string, genomeNameId, dyStringContents(tdbOut->configUrl), idString->string); if (isNotEmpty(dyStringContents(tdbOut->metaTags))) { dyStringPrintf(dy, "
Metadata: %s", htmlEncode(dyStringContents(tdbOut->metaTags))); } if (isNotEmpty(dyStringContents(tdbOut->descriptionMatch))) { dyStringPrintf(dy, "
Description: %s", htmlEncode(dyStringContents(tdbOut->descriptionMatch)));