67dcce67b7da61d4de46d360754b174294432a8c chmalee Wed Mar 29 16:22:39 2023 -0700 Fix non-knownCanonical results not showing up in a knownGene search, refs #25078 diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index 0312ced..3bd4f03 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -406,54 +406,72 @@ slAddHead(&idList, idEl); } } pipelineClose(&pl); /* Takes care of lf too. */ freeCmds(cmds, keyCount); if (verboseLevel() >= 3) { int count = slCount(idList); verbose(3, "*** Got %d results from %s\n\n", count, indexFile); } } freeMem(escapedKey); return idList; } +static struct hgPosTable *findTable(struct hgPosTable *list, char *name) +/* Find first table in list that matches name */ +{ +struct hgPosTable *ret = NULL; +for (ret = list; ret != NULL; ret = ret->next) + { + if (sameString(ret->name, name)) + return ret; + } +return NULL; +} + static struct hgPosTable *addKnownGeneTable(char *db, struct hgPositions *hgp, char *name) /* Create new table for known genes matches, add it to hgp, and return it. */ { +// we may be coming here a second time, after already hitting knownGeneFast +// add non duplicate results to the end of our hgp struct hgPosTable *table; +table = findTable(hgp->tableList, name); +if (table == NULL) + { AllocVar(table); table->searchTime = -1; if (differentString(name, "knownGene")) { char *masterGeneTrack = hdbGetMasterGeneTrack(name); table->description = cloneString(masterGeneTrack); table->name = cloneString(masterGeneTrack); } else { if (hTableExists(db, "knownAttrs")) table->description = cloneString("Gencode Genes"); else if (hTableExists(db, "kgProtMap2")) table->description = cloneString("UCSC Genes"); else table->description = cloneString("Known Genes"); table->name = cloneString("knownGene"); } slAddHead(&hgp->tableList, table); + } return table; } static char *makeIndexPath(char *db, char *name) { /* create the pathname with the knowngene index for a db, result needs to be freed */ char *path = needMem(PATH_LEN); safef(path, PATH_LEN, "/gbdb/%s/%s.ix", db, name); char *newPath = hReplaceGbdb(path); freez(&path); return newPath; } static boolean gotFullText(char *db, char *indexPath) /* Return TRUE if we have full text index. */ @@ -486,31 +504,30 @@ const struct hgPos *hg1 = *((struct hgPos**)vhg1); const struct hgPos *hg2 = *((struct hgPos**)vhg2); int diff = trixSearchResultCmp(&hg1->tp->tsr, &hg2->tp->tsr); if (diff == 0) { diff = (hg2->canonical - hg1->canonical); if (diff == 0) { // Prioritize things on main chromosomes diff = chrNameCmpWithAltRandom(hg1->chrom, hg2->chrom); } } return diff; } - static void addKnownGeneItems(struct hgPosTable *table, struct trixSearchResult *tsrList, struct sqlConnection *conn, char *name, struct trix *trix, struct hgFindSpec *hfs) /* Convert tsrList to posList, and hang posList off of table. */ { struct dyString *dy = dyStringNew(0); struct trixSearchResult *tsr; struct hash *hash = hashNew(16); struct hgPos *pos, *posList = NULL; struct tsrPos *tpList = NULL, *tp; struct sqlResult *sr; char **row; int maxToReturn = NONEXHAUSTIVE_SEARCH_LIMIT; char *db = sqlGetDatabase(conn); char *dbName; @@ -591,31 +608,53 @@ } sqlFreeResult(&sr); /* Hang all pos onto table. */ for (tp = tpList; tp != NULL; tp = tp->next) { struct hgPos *next; for (pos = tp->posList; pos != NULL; pos = next) { next = pos->next; slAddHead(&posList, pos); } } slSort(&posList, hgPosCmpCanonical); +// we may have already been here (ex: queried knownGeneFast first), if so, +// we need to put the new list of results behind the old list, since the +// old results had a higher priority. We can now rank results to knownGene +// by putting what we want users to find first in different search specs +if (table->posList == NULL) table->posList = posList; +else + { + struct hash *prevHash = hashNew(0); + struct hgPos *newPosList = NULL, *next; + for (pos = table->posList; pos != NULL; pos = pos->next) + { + hashAdd(prevHash, pos->name, pos); + } + for (pos = posList; pos != NULL; pos = next) + { + next = pos->next; + if (!hashLookup(prevHash, pos->name)) + slAddHead(&newPosList, pos); + } + slReverse(&newPosList); + table->posList = slCat(table->posList, newPosList); + } hashFree(&hash); dyStringFree(&dy); } static boolean findKnownGeneFullText(char *db, char *term,struct hgPositions *hgp, char *name, char *path, struct hgFindSpec *hfs, boolean measureTiming) /* Look for position in full text. */ { long startTime = clock1000(); boolean gotIt = FALSE; struct trix *trix; struct trixSearchResult *tsrList; char *lowered = cloneString(term); char *keyWords[HGFIND_MAX_KEYWORDS]; int keyCount; @@ -627,31 +666,36 @@ tsrList = trixSearch(trix, keyCount, keyWords, tsmExpand); if (tsrList != NULL) { table = addKnownGeneTable(db, hgp, name); struct sqlConnection *conn = hAllocConn(db); addKnownGeneItems(table, tsrList, conn, name, trix, hfs); hFreeConn(&conn); gotIt = TRUE; } freez(&lowered); trixSearchResultFreeList(&tsrList); trixClose(&trix); // This is hacky but rely on knownGene table being at head of list // for timing. TODO: make this more robust if (measureTiming && table != NULL) + { + if (table->searchTime == -1) table->searchTime = clock1000() - startTime; + else + table->searchTime += clock1000() - startTime; + } return gotIt; } static char *getUiUrl(struct cart *cart) /* Get rest of UI from browser. */ { static struct dyString *dy = NULL; static char *s = NULL; if (dy == NULL) { dy = dyStringNew(64); if (cart != NULL && cart->sessionId != NULL) dyStringPrintf(dy, "%s=%s", cartSessionVarName(), cartSessionId(cart)); s = dy->string; } @@ -3397,31 +3441,31 @@ { boolean foundSpec = hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, FALSE, 0, 0, multiTerm, measureTiming); if (foundSpec) hashAdd(foundSpecHash, hfs->searchTable, hfs->searchTable); foundIt |= foundSpec; // for multiTerm searches (like '15q11;15q13'), each individual component // must resolve to a single position, so break once we find the first match if (multiTerm && foundSpec) break; } if (!(multiTerm) || (multiTerm && !foundIt)) { for (hfs = longList; hfs != NULL; hfs = hfs->next) { - if (hashFindVal(foundSpecHash, hfs->searchTable) != NULL) + if (hashFindVal(foundSpecHash, hfs->searchTable) != NULL && !sameString(hfs->searchTable, "knownGene")) continue; foundIt |= hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, FALSE, 0, 0, multiTerm, measureTiming); } // lastly search any included track hubs, or in the case of an assembly hub, any of the tracks if (hubCategoryList) foundIt |= findBigBedPosInTdbList(cart, db, hubCategoryList, term, hgp, NULL, measureTiming); } // multiTerm searches must resolve to a single range on a chromosome, so don't // do these non positional searches if a multiTerm was requested if (!multiTerm) { getLabelsForHubs(); struct searchCategory *category; for (category = categories; category != NULL; category = category->next)