744a3b2e1774459ae0348822c983a24746776f96 jcasper Fri Feb 16 12:26:31 2018 -0800 Hub text search now includes metadata tags, behaves better when encountering missing tracks/assemblies, and includes a MySQL index on the track field of hubSearchText refs #20761, #18865, #20694 diff --git src/hg/hgHubConnect/hgHubConnect.c src/hg/hgHubConnect/hgHubConnect.c index 0cea06c..81455f0 100644 --- src/hg/hgHubConnect/hgHubConnect.c +++ src/hg/hgHubConnect/hgHubConnect.c @@ -27,54 +27,57 @@ #include "hgConfig.h" #include "trix.h" #include "net.h" #include "hubSearchText.h" struct cart *cart; /* The user's ui state. */ struct hash *oldVars = NULL; static char *pageTitle = "Track Data Hubs"; char *database = NULL; char *organism = NULL; struct hubOutputStructure { struct hubOutputStructure *next; + struct dyString *metaTags; struct dyString *descriptionMatch; struct genomeOutputStructure *genomes; int genomeCount; struct hash *genomeOutHash; }; struct genomeOutputStructure { struct genomeOutputStructure *next; struct dyString *shortLabel; + struct dyString *metaTags; struct dyString *descriptionMatch; struct tdbOutputStructure *tracks; struct dyString *assemblyLink; char *genomeName; char *positionString; int trackCount; struct hash *tdbOutHash; int hitCount; }; struct tdbOutputStructure { struct tdbOutputStructure *next; struct dyString *shortLabel; + struct dyString *metaTags; struct dyString *descriptionMatch; struct dyString *configUrl; struct tdbOutputStructure *children; int childCount; }; struct hubEntry // for entries pulled from hubPublic { struct hubEntry *next; char *hubUrl; char *shortLabel; char *longLabel; char *dbList; char *errorMessage; @@ -648,59 +651,66 @@ } printGenomeList(dbListNames, count); printf("</tr>\n"); } void printSearchOutputForTrack(struct tdbOutputStructure *tdbOut) /* Write out a <li> entry for a search hit on a track, along with a nested * <ul> for any included hits to subtracks */ { printf("<li configLink='%s' nodeType='track'>\n", dyStringContents(tdbOut->configUrl)); printf("%s", dyStringContents(tdbOut->shortLabel)); if (tdbOut->childCount > 0) printf(" (%d subtrack%s)", tdbOut->childCount, tdbOut->childCount==1?"":"s"); -printf("<br>\n"); +if (isNotEmpty(dyStringContents(tdbOut->metaTags))) + { + printf("<br><span class='descriptionMatch'><em>Metadata: %s</em></span>\n", dyStringContents(tdbOut->metaTags)); + } if (isNotEmpty(dyStringContents(tdbOut->descriptionMatch))) { - printf("<span class='descriptionMatch'><em>%s</em></span>\n", dyStringContents(tdbOut->descriptionMatch)); + printf("<br><span class='descriptionMatch'><em>Description: %s</em></span>\n", dyStringContents(tdbOut->descriptionMatch)); } if (tdbOut->children != NULL) { struct tdbOutputStructure *child = tdbOut->children; printf("<ul>\n"); while (child != NULL) { printSearchOutputForTrack(child); child = child->next; } printf("</ul>\n"); } printf("</li>\n"); } void printSearchOutputForGenome(struct genomeOutputStructure *genomeOut) /* Write out a chunk of search results for a genome as a <li>, with a nested ul * element for hits to tracks within that genome */ { printf("<li assemblyLink='%s' nodeType='assembly'>%s", dyStringContents(genomeOut->assemblyLink), dyStringContents(genomeOut->shortLabel)); if (genomeOut->trackCount > 0) printf(" (%d track%s)", genomeOut->trackCount, genomeOut->trackCount==1?"":"s"); +if (isNotEmpty(dyStringContents(genomeOut->metaTags))) + { + printf("<br><span class='descriptionMatch'><em>%s</em></span>\n", dyStringContents(genomeOut->metaTags)); + } if (isNotEmpty(dyStringContents(genomeOut->descriptionMatch))) { printf("<br>\n<em>Assembly Description:</em> %s\n", dyStringContents(genomeOut->descriptionMatch)); } if (genomeOut->tracks != NULL) { printf("<ul>\n"); struct tdbOutputStructure *tdbOut = genomeOut->tracks; while (tdbOut != NULL) { printSearchOutputForTrack(tdbOut); tdbOut = tdbOut->next; } printf("</ul>\n"); } @@ -731,67 +741,87 @@ struct tdbOutputStructure *addOrUpdateTrackOut(char *track, struct genomeOutputStructure *genomeOut, struct hash *tdbHash, struct trackHub *hub) /* If an output structure already exists for the track within genomeOut, return that. Otherwise, * create one for it and add it to genomeOut. Any missing parent tracks are also added at * the same time. * tdbHash takes track names to the struct trackDb * for that track */ { struct tdbOutputStructure *tdbOut = hashFindVal(genomeOut->tdbOutHash, track); if (tdbOut == NULL) { genomeOut->trackCount++; AllocVar(tdbOut); tdbOut->shortLabel = dyStringNew(0); + tdbOut->metaTags = dyStringNew(0); tdbOut->descriptionMatch = dyStringNew(0); tdbOut->configUrl = dyStringNew(0); struct trackDb *trackInfo = (struct trackDb *) hashFindVal(tdbHash, track); if (trackInfo == NULL) { // Some tracks are prefixed with the hub name; try that char withHubName[4096]; safef(withHubName, sizeof(withHubName), "%s_%s", hub->name, track); - trackInfo = hashMustFindVal(tdbHash, withHubName); + trackInfo = hashFindVal(tdbHash, withHubName); + if (trackInfo == NULL) + { + warn("Error: Unable to locate info for matching track '%s'. Skipping ...\n", withHubName); + return NULL; + } } if (isNotEmpty(trackInfo->longLabel)) dyStringPrintf(tdbOut->shortLabel, "%s", trackInfo->longLabel); else if (isNotEmpty(trackInfo->shortLabel)) dyStringPrintf(tdbOut->shortLabel, "%s", trackInfo->shortLabel); else dyStringPrintf(tdbOut->shortLabel, "%s", trackHubSkipHubName(trackInfo->track)); if (tdbIsCompositeView(trackInfo) || tdbIsCompositeChild(trackInfo)) { struct trackDb *parentTdb = tdbGetComposite(trackInfo); dyStringPrintf(tdbOut->configUrl, "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s", hub->url, genomeOut->genomeName, parentTdb->track, cartSessionId(cart), genomeOut->positionString); } else { dyStringPrintf(tdbOut->configUrl, "../cgi-bin/hgTrackUi?hubUrl=%s&db=%s&g=%s&hgsid=%s&%s", hub->url, genomeOut->genomeName, trackInfo->track, cartSessionId(cart), genomeOut->positionString); } if (trackInfo->parent != NULL) { struct trackDb *parent = trackInfo->parent; struct tdbOutputStructure *parentOut = addOrUpdateTrackOut(parent->track, genomeOut, tdbHash, hub); + if (parentOut != NULL) + { + // addOrUpdateTrackOut only returns NULL if it can't find the parent here. + // This probably means the trackDb is corrupted, which should have already + // generated a fatal error. All the same ... slAddTail(&(parentOut->children), tdbOut); parentOut->childCount++; } else + { + // If we can't find the track's rightful parent, we can't report its position + // in the track hierarchy accurately. Time to abort. A warning will already + // have been generated by addOrUpdateTrackOut(parent) failing. + return NULL; + } + } + else + // No parent track, so add it to the root level track list for output slAddTail(&(genomeOut->tracks), tdbOut); hashAdd(genomeOut->tdbOutHash, track, tdbOut); } return tdbOut; } void buildTdbHash(struct hash *tdbHash, struct trackDb *tdbList) /* Recursively add all tracks from tdbList to the hash (indexed by track), * along with all parents and children of those tracks */ { struct trackDb *tdb = tdbList; while (tdb != NULL) { hashAdd(tdbHash, tdb->track, tdb); @@ -818,105 +848,140 @@ struct dyString *tmp = dyStringCreate("position="); if (genome->defaultPos != NULL) dyStringAppend(tmp, genome->defaultPos); else dyStringAppend(tmp, hDefaultPos(genome->name)); // memory leak from hDefaultPos return value position = dyStringCannibalize(&tmp); } return position; } struct hubOutputStructure *buildHubSearchOutputStructure(struct trackHub *hub, struct hubSearchText *searchResults) /* Build a structure that contains the data for writing out the hub search results for this hub */ { +struct hash *missingGenomes = hashNew(0); struct hubOutputStructure *hubOut = NULL; AllocVar(hubOut); +hubOut->metaTags = dyStringNew(0); hubOut->descriptionMatch = dyStringNew(0); hubOut->genomeOutHash = newHash(5); struct hash *tdbHashHash = newHash(5); // takes genome names to trackDb hashes struct hubSearchText *hst = NULL; for (hst = searchResults; hst != NULL; hst = hst->next) { if (isEmpty(hst->db)) { // must be a hit to the hub itself, not an assembly or track within it if (hst->textLength == hubSearchTextLong) { dyStringPrintf(hubOut->descriptionMatch, "%s", hst->text); } + else if (hst->textLength == hubSearchTextMeta) + { + if (isNotEmpty(dyStringContents(hubOut->metaTags))) + dyStringPrintf(hubOut->metaTags, ", %s", hst->text); + else + dyStringPrintf(hubOut->metaTags, "%s", hst->text); + } continue; } char *db = cloneString(hst->db); + if (hashLookup(missingGenomes, db) != NULL) + continue; struct trackHubGenome *genome = hashFindVal(hub->genomeHash, db); if (genome == NULL) { // assembly hub genomes are stored with a prefix; try that char withHubName[4096]; safef(withHubName, sizeof(withHubName), "%s_%s", hub->name, db); - genome = hashMustFindVal(hub->genomeHash, withHubName); + genome = hashFindVal(hub->genomeHash, withHubName); + if (genome == NULL) + { + hashStoreName(missingGenomes, db); + warn("Error: Unable to find info for matching assembly '%s'. Skipping ...\n", withHubName); + continue; + } } struct genomeOutputStructure *genomeOut = hashFindVal(hubOut->genomeOutHash, db); if (genomeOut == NULL) { AllocVar(genomeOut); genomeOut->tdbOutHash = newHash(5); + genomeOut->metaTags = dyStringNew(0); genomeOut->descriptionMatch = dyStringNew(0); genomeOut->shortLabel = dyStringNew(0); genomeOut->assemblyLink = dyStringNew(0); genomeOut->positionString = getPositionStringForDb(genome); dyStringPrintf(genomeOut->assemblyLink, "../cgi-bin/hgTracks?hubUrl=%s&db=%s&hgsid=%s&%s", hub->url, genome->name, cartSessionId(cart), genomeOut->positionString); char *name = trackHubSkipHubName(genome->name); if (isNotEmpty(genome->description)) dyStringPrintf(genomeOut->shortLabel, "%s (%s)", genome->description, name); else if (isNotEmpty(genome->organism)) dyStringPrintf(genomeOut->shortLabel, "%s %s", genome->organism, name); else dyStringPrintf(genomeOut->shortLabel, "%s", name); genomeOut->genomeName = cloneString(genome->name); hashAdd(hubOut->genomeOutHash, db, genomeOut); slAddTail(&(hubOut->genomes), genomeOut); hubOut->genomeCount++; } - if (isEmpty(hst->track) && hst->textLength == hubSearchTextLong) + if (isEmpty(hst->track)) { - // Genome description match + if (hst->textLength == hubSearchTextLong) // Genome description match dyStringPrintf(genomeOut->descriptionMatch, "%s", hst->text); + else if (hst->textLength == hubSearchTextMeta) + { + if (isNotEmpty(dyStringContents(genomeOut->metaTags))) + dyStringPrintf(genomeOut->metaTags, ", %s", hst->text); + else + dyStringPrintf(genomeOut->metaTags, "%s", hst->text); + } } if (isNotEmpty(hst->track)) { // Time to add a track! (or add info to one, maybe) struct hash *tdbHash = (struct hash *) hashFindVal(tdbHashHash, db); if (tdbHash == NULL) { tdbHash = newHash(5); hashAdd(tdbHashHash, db, tdbHash); struct trackDb *tdbList = trackHubTracksForGenome(hub, genome); tdbList = trackDbLinkUpGenerations(tdbList); tdbList = trackDbPolishAfterLinkup(tdbList, db); trackHubPolishTrackNames(hub, tdbList); buildTdbHash(tdbHash, tdbList); } struct tdbOutputStructure *tdbOut = addOrUpdateTrackOut(hst->track, genomeOut, tdbHash, hub); + if (tdbOut != NULL) + { if (hst->textLength == hubSearchTextLong) dyStringPrintf(tdbOut->descriptionMatch, "%s", hst->text); + else if (hst->textLength == hubSearchTextMeta) + { + if (isNotEmpty(dyStringContents(tdbOut->metaTags))) + dyStringPrintf(tdbOut->metaTags, ", %s", hst->text); + else + dyStringPrintf(tdbOut->metaTags, "%s", hst->text); + } + } } } return hubOut; } static void printOutputForHub(struct hubEntry *hubInfo, struct hubSearchText *hubSearchResult, int count) /* Given a hub's info and a structure listing the search hits within the hub, first print * a basic line of hub information with a "connect" button. Then, if the search results * are non-NULL, write out information about the genomes and tracks from the search hits that * match the db filter. * If there are no search results to print, the basic hub lines are combined into a single HTML table * that is defined outside this function. * Otherwise, each hub line is printed in its own table followed by a <ul> containing details * about the search results. */ @@ -1277,30 +1342,31 @@ if (cartVarExists(cart, hgHubCheckUrl)) { doResetHub(cart); } if (cartVarExists(cart, hgHubDoRedirect)) { if (doRedirect(cart)) { cartWebEnd(); return; } } cartWebStart(cart, NULL, "%s", pageTitle); + printf( "<link rel=\"stylesheet\" href=\"https://cdnjs.cloudflare.com/ajax/libs/jstree/3.3.4/themes/default/style.min.css\" />\n" "<script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/1.12.1/jquery.min.js\"></script>\n" "<script src=\"https://cdnjs.cloudflare.com/ajax/libs/jstree/3.3.4/jstree.min.js\"></script>\n" "<style>.jstree-default .jstree-anchor { height: initial; } </style>\n" ); jsIncludeFile("utils.js", NULL); jsIncludeFile("jquery-ui.js", NULL); webIncludeResourceFile("jquery-ui.css"); jsIncludeFile("ajax.js", NULL); jsIncludeFile("hgHubConnect.js", NULL); webIncludeResourceFile("hgHubConnect.css"); jsIncludeFile("jquery.cookie.js", NULL); printf("<div id=\"hgHubConnectUI\"> <div id=\"description\"> \n");