d3853cf1ab1559c73f28a8bf220aac601c16c015 hiram Wed Dec 4 13:55:17 2024 -0800 when search matches a dbDb name from the genArk table get that result into the dbDb list for selection refs #34862 diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c index 75c6365..9455fdf 100644 --- src/hg/hgGateway/hgGateway.c +++ src/hg/hgGateway/hgGateway.c @@ -584,49 +584,56 @@ safef(value, sizeof(value), "%s (%s)", dbDb->scientificName, dbDb->genome); char *bolded = boldTerm(dbDb->scientificName, term, match->offset, match->type); safef(label, sizeof(label), "%s (%s)", bolded, dbDb->genome); freeMem(bolded); } else if (match->type == ddmtGenome) { safecpy(value, sizeof(value), dbDb->genome); char *bolded = boldTerm(dbDb->genome, term, match->offset, match->type); safecpy(label, sizeof(label), bolded); freeMem(bolded); } else if (match->type == ddmtDb) { safecpy(value, sizeof(value), dbDb->name); + // label just in case the bolded doesn't work + safef(label, sizeof(label), "%s (%s %s)", + dbDb->name, dbDb->genome, dbDb->description); + if (sameWord(dbDb->name, term)) // when genArk match, not the same word + { char *bolded = boldTerm(dbDb->name, term, match->offset, match->type); safef(label, sizeof(label), "%s (%s %s)", bolded, dbDb->genome, dbDb->description); freeMem(bolded); + } jsonWriteString(jw, "db", dbDb->name); } else if (match->type == ddmtDescription) { safef(value, sizeof(value), "%s (%s %s)", dbDb->name, dbDb->genome, dbDb->description); char *bolded = boldTerm(dbDb->description, term, match->offset, match->type); safef(label, sizeof(label), "%s (%s %s)", dbDb->name, dbDb->genome, bolded); freeMem(bolded); jsonWriteString(jw, "db", dbDb->name); } else errAbort("writeDbDbMatch: unrecognized dbDbMatchType value %d (db %s, term %s)", match->type, dbDb->name, term); + jsonWriteString(jw, "label", label); jsonWriteString(jw, "value", value); jsonWriteString(jw, "org", dbDb->organism); jsonWriteNumber(jw, "taxId", dbDb->taxId); if (isNotEmpty(category)) jsonWriteString(jw, "category", category); jsonWriteObjectEnd(jw); } int wordMatchOffset(char *term, char *target) /* If some word of target starts with term (case insensitive), return the offset of * that word in target; otherwise return -1. */ { if (startsWith(term, target)) return 0; @@ -1028,65 +1035,93 @@ { ++c; // the match contains tab-sep accession, hubUrl, asmName, scientificName, commonName char hubUrl[PATH_LEN+1]; safef(hubUrl, sizeof(hubUrl), "%s/%s", genarkHubUrl, match->hubUrl); slAddHead(&ret, gHubMatchNew(match->gcAccession, hubUrl, match->asmName, match->scientificName, match->commonName, -1)); if (c > GENARK_LIMIT) break; } if (ret) slReverse(&ret); return ret; } static struct gHubMatch *filterAssemblyListMatches(struct sqlConnection *conn, - char *asmListTable, char *term, char *genarkPrefix, boolean wildCard) + struct dbDb *dbDbList, struct dbDbMatch **dbDbMatchList, char *asmListTable, char *term, char *genarkPrefix, boolean wildCard) { struct gHubMatch *ret = NULL; struct dyString *query = dyStringNew(64); /* LIMIT of 100 will allow enough results to include some genArk assemblies */ if (wildCard) sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s*' IN BOOLEAN MODE) AND browserExists=1 ORDER BY priority LIMIT 100", asmListTable, term); else sqlDyStringPrintf(query, "SELECT * FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s' IN BOOLEAN MODE) AND browserExists=1 ORDER BY priority LIMIT 100", asmListTable, term); struct sqlResult *sr = sqlGetResult(conn, query->string); dyStringFree(&query); char **row; int c = 0; while ((row = sqlNextRow(sr)) != NULL) { struct assemblyList *el = assemblyListLoadWithNull(row); if (isGenArk(el->name)) { ++c; char genarkUrl[PATH_MAX]; safef(genarkUrl, sizeof(genarkUrl), "%s/%s", genarkPrefix, el->hubUrl); slAddHead(&ret, gHubMatchNew(el->name, genarkUrl, NULL, el->scientificName, el->commonName, *el->priority)); } + else + { + struct dbDb *dbDb; + for (dbDb = dbDbList; dbDb != NULL; dbDb = dbDb->next) + { + if (sameWord(dbDb->name, el->name)) + { + // Make uppercase version of target for case-insensitive matching. + int targetLen = strlen(el->name); + char targetUpcase[targetLen + 1]; + safencpy(targetUpcase, sizeof(targetUpcase), el->name, targetLen); + touppers(targetUpcase); + struct dbDbMatch *extraList = searchDbDb(dbDbList, targetUpcase); + struct dbDbMatch *match; + boolean canAdd = TRUE; + for (match = *dbDbMatchList; match != NULL; match = match->next) + { + if (sameWord(match->dbDb->name, el->name)) + { + canAdd = FALSE; + break; + } + } + if (canAdd) + slAddHead(dbDbMatchList, extraList); + } + } + } if ( c > GENARK_LIMIT) /* limit genArk returns */ break; } sqlFreeResult(&sr); if (ret) slReverse(&ret); return ret; -} /* static struct gHubMatch *filterAssemblyListMatche */ +} /* static struct gHubMatch *filterAssemblyListMatches */ -static struct gHubMatch *searchGenark(char *term) +static struct gHubMatch *searchGenark(struct dbDb *dbDbList, struct dbDbMatch **dbDbMatchList, char *term) /* Search through the genark table (or assemblyList table) for hubs matches term */ { char *genarkPrefix = cfgOption("genarkHubPrefix"); if (genarkPrefix == NULL) return NULL; /* the chopByWhite is going to cannabilize the term string, make a copy */ char *termCopy = cloneString(term); struct sqlConnection *conn = hConnectCentral(); struct gHubMatch *gHubMatchList = NULL; char *genarkTbl = genarkTableName(); int colCount = genArkColumnCount(); int termLength = strlen(termCopy); @@ -1103,55 +1138,55 @@ long long matchCount = sqlQuickLongLong(conn, query->string); dyStringFree(&query); boolean wildCard = FALSE; if (0 == matchCount) /* try prefix search */ { query = dyStringNew(64); sqlDyStringPrintf(query, "SELECT COUNT(*) FROM %s WHERE MATCH(name, commonName, scientificName, clade, description, refSeqCategory, versionStatus, assemblyLevel) AGAINST ('%s*' IN BOOLEAN MODE) AND browserExists=1", asmListTable, termCopy); matchCount = sqlQuickLongLong(conn, query->string); dyStringFree(&query); if (matchCount > 0) wildCard = TRUE; } if (matchCount > 0) - gHubMatchList = filterAssemblyListMatches(conn, asmListTable, termCopy, genarkPrefix, wildCard); + gHubMatchList = filterAssemblyListMatches(conn, dbDbList, dbDbMatchList, asmListTable, termCopy, genarkPrefix, wildCard); } /* 1 == wordCout single word search */ else { /* multiple word search */ char *matchAllWords = asmListMatchAllWords(termCopy); - gHubMatchList = filterAssemblyListMatches(conn, asmListTable, matchAllWords, genarkPrefix, FALSE); + gHubMatchList = filterAssemblyListMatches(conn, dbDbList, dbDbMatchList, asmListTable, matchAllWords, genarkPrefix, FALSE); } /* multiple word search */ } /* termLength > 2 */ else if (sqlTableExists(conn, genarkTbl)) { char query[1024]; if (colCount > 6) { sqlSafef(query, sizeof(query), "select * from %s where " "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%') order by priority", genarkTbl, term, term, term, term); } else { sqlSafef(query, sizeof(query), "select * from %s where " "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%') order by taxId ASC, commonName DESC", genarkTbl, term, term, term, term); } - struct genark *matchList = genarkLoadByQuery(conn, query); - gHubMatchList = filterGenarkMatches(genarkPrefix, matchList); + struct genark *genArkMatch = genarkLoadByQuery(conn, query); + gHubMatchList = filterGenarkMatches(genarkPrefix, genArkMatch); } hDisconnectCentral(&conn); return gHubMatchList; } static char *getSearchTermUpperCase() /* If we don't have the SEARCH_TERM cgi param, exit with an HTTP Bad Request response. * If we do, convert it to upper case for case-insensitive matching and return it. */ { pushWarnHandler(htmlVaBadRequestAbort); pushAbortHandler(htmlVaBadRequestAbort); char *cgiTerm = cgiOptionalString(SEARCH_TERM); char *term = skipLeadingSpaces(cgiTerm); eraseTrailingSpaces(term); touppers(term); @@ -1162,42 +1197,45 @@ return term; } static void lookupTerm() /* Look for matches to term in hgcentral and print as JSON for autocomplete if found. */ { char *term = getSearchTermUpperCase(); // Write JSON response with list of matches puts("Content-Type:text/javascript\n"); // Before accessing hubs, intialize udc cache location from hg.conf: setUdcCacheDir(); struct dbDb *dbDbList = hDbDbList(); struct dbDbMatch *matchList = searchDbDb(dbDbList, term); -struct gHubMatch *gHubMatchList = searchGenark(term); +struct gHubMatch *gHubMatchList = searchGenark(dbDbList, &matchList, term); struct aHubMatch *aHubMatchList = searchPublicHubs(dbDbList, term); struct jsonWrite *jw = jsonWriteNew(); jsonWriteListStart(jw, NULL); // Write out JSON for dbDb matches, if any; add category if we found assembly hub matches too. char *category = aHubMatchList ? "UCSC Genome Browser assemblies - annotation tracks curated by UCSC" : NULL; struct dbDbMatch *match; for (match = matchList; match != NULL; match = match->next) writeDbDbMatch(jw, match, term, category); + // Write out genark matches, if any, pass term so the matches can be highlighted +if (slCount(gHubMatchList)) writeGenarkMatches(jw, gHubMatchList, term); // Write out assembly hub matches, if any. +if (slCount(aHubMatchList)) writeAssemblyHubMatches(jw, aHubMatchList); jsonWriteListEnd(jw); puts(jw->dy->string); jsonWriteFree(&jw); } int main(int argc, char *argv[]) /* Process CGI / command line. */ { /* Null terminated list of CGI Variables we don't want to save * permanently. */ char *excludeVars[] = {SEARCH_TERM, CARTJSON_COMMAND, NULL,}; cgiSpoof(&argc, argv); measureTiming = cgiOptionalInt("measureTiming", 0); enteredMainTime = clock1000();