a27219ced37a549f3b69bc79f07ea99a87439ecd braney Sat Feb 20 12:49:06 2021 -0800 Give user a chance to get non-short circuit hits if there's more than one short circuit choice (otherwise just go to position). Add ability to add trixSearch path to findSpecs. Add new find spec for hg38 gencodeV36 that only has canonical genes in it. diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index 08384ce..4b81034 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -420,45 +420,43 @@ } slAddHead(&hgp->tableList, table); return table; } static char *makeIndexPath(char *db, char *name) { /* create the pathname with the knowngene index for a db, result needs to be freed */ char *path = needMem(PATH_LEN); safef(path, PATH_LEN, "/gbdb/%s/%s.ix", db, name); char *newPath = hReplaceGbdb(path); freez(&path); return newPath; } -static boolean gotFullText(char *db, char *name) +static boolean gotFullText(char *db, char *indexPath) /* Return TRUE if we have full text index. */ { -char *indexPath = makeIndexPath(db, name); boolean result = FALSE; if (udcExists(indexPath)) result = TRUE; else { warn("%s doesn't exist", indexPath); result = FALSE; } -freez(&indexPath); return result; } struct tsrPos /* Little helper structure tying together search result * and pos, used by addKnownGeneItems */ { struct tsrPos *next; /* Next in list. */ struct trixSearchResult *tsr; /* Basically a gene symbol */ struct hgPos *posList; /* Associated list of positions. */ }; static boolean isCanonical(struct sqlConnection *conn, char *geneName) /* Look for the name in knownCannonical, return true if found */ { @@ -614,41 +612,40 @@ { struct hgPos *next; for (pos = tp->posList; pos != NULL; pos = next) { next = pos->next; slAddHead(&posList, pos); } } slSort(&posList, hgPosCmpCanonical); table->posList = posList; hashFree(&hash); dyStringFree(&dy); } -boolean findKnownGeneFullText(char *db, char *term,struct hgPositions *hgp, char *name) +static boolean findKnownGeneFullText(char *db, char *term,struct hgPositions *hgp, char *name, char *path) /* Look for position in full text. */ { boolean gotIt = FALSE; struct trix *trix; struct trixSearchResult *tsrList; char *lowered = cloneString(term); char *keyWords[HGFIND_MAX_KEYWORDS]; int keyCount; -char *path = makeIndexPath(db, name); trix = trixOpen(path); tolowers(lowered); keyCount = chopLine(lowered, keyWords); tsrList = trixSearch(trix, keyCount, keyWords, tsmExpand); if (tsrList != NULL) { struct hgPosTable *table = addKnownGeneTable(db, hgp, name); struct sqlConnection *conn = hAllocConn(db); struct sqlConnection *conn2 = hAllocConn(db); addKnownGeneItems(table, tsrList, conn, conn2, name); hFreeConn(&conn); hFreeConn(&conn2); gotIt = TRUE; } freez(&lowered); @@ -2078,32 +2075,36 @@ fprintf(f, " - "); htmTextOut(f, desc); } fprintf(f, "\n"); freeMem(encMatches); } } if (table->htmlEnd) table->htmlEnd(table, f); else fprintf(f, "\n"); } } if(containerDivPrinted) + { + if (hgp->shortCircuited) + fprintf(f, " More results...", hgTracksName(), getenv("QUERY_STRING")); fprintf(f, "\n"); } +} static struct hgPositions *hgPositionsSearch(char *db, char *spec, char **retChromName, int *retWinStart, int *retWinEnd, boolean *retIsMultiTerm, struct cart *cart, char *hgAppName, char **retMultiChrom, struct dyString *dyWarn) /* Search for positions that match spec (possibly ;-separated in which case *retIsMultiTerm is set). * Return a container of tracks and positions (if any) that match term. If different components * of a multi-term search land on different chromosomes then *retMultiChrom will be set. */ { struct hgPositions *hgp = NULL; char *chrom = NULL; int start = INT_MAX; int end = 0; char *terms[16]; @@ -2285,36 +2286,39 @@ return findBigBedPosInTdbList(cart, db, tdb, spec, hgp, hfs); } boolean searchSpecial(struct cart *cart, char *db, struct hgFindSpec *hfs, char *term, int limitResults, struct hgPositions *hgp, boolean relativeFlag, int relStart, int relEnd, boolean *retFound) /* Handle searchTypes for which we have special code. Return true if * we have special code. Set retFind according to whether we find term. */ { boolean isSpecial = TRUE; boolean found = FALSE; char *upcTerm = cloneString(term); touppers(upcTerm); -if (sameString(hfs->searchType, "knownGene")) +if (startsWith("knownGene", hfs->searchType)) { char *knownDatabase = hdbDefaultKnownDb(db); char *name = (sameString(knownDatabase, db)) ? "knownGene" : knownDatabase; - if (gotFullText(db, name)) - found = findKnownGeneFullText(db, term, hgp, name); + char *indexPath = hgFindSpecSetting(hfs, "searchTrix"); + if (indexPath == NULL) + indexPath = makeIndexPath(db, name); + if (gotFullText(db, indexPath)) + found = findKnownGeneFullText(db, term, hgp, name, indexPath); } else if (sameString(hfs->searchType, "refGene")) { found = findRefGenes(db, hfs, term, hgp); } else if (isBigFileFind(hfs)) { found = findBigBed(cart, db, hfs, term, hgp); } else if (sameString(hfs->searchType, "cytoBand")) { char *chrom; int start, end; found = hgFindCytoBand(db, term, &chrom, &start, &end); if (found) @@ -2878,42 +2882,49 @@ struct hgFindSpec *shortList = NULL, *longList = NULL; struct hgFindSpec *hfs; boolean done = FALSE; // Disable singleBaseSpec for any term that is not hgOfficialChromName // because that mangles legitimate IDs that are [A-Z]:[0-9]+. if (singleBaseSpec) { singleBaseSpec = relativeFlag = FALSE; term = cloneString(originalTerm); // restore original term relStart = relEnd = 0; } if (!trackHubDatabase(db)) hgFindSpecGetAllSpecs(db, &shortList, &longList); + if (cartOptionalString(cart, "noShort") == NULL) + { + hgp->shortCircuited = TRUE; for (hfs = shortList; hfs != NULL; hfs = hfs->next) { if (hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, multiTerm)) { done = TRUE; if (! hgFindSpecSetting(hfs, "semiShortCircuit")) break; } } + } + else + cartRemove(cart, "noShort"); if (! done) { + hgp->shortCircuited = FALSE; for (hfs = longList; hfs != NULL; hfs = hfs->next) { hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, multiTerm); } /* Lowe lab additions -- would like to replace these with specs, but * will leave in for now. */ if (!trackHubDatabase(db)) findTigrGenes(db, term, hgp); trackHubFindPos(cart, db, term, hgp); } hgFindSpecFreeList(&shortList); hgFindSpecFreeList(&longList); if (cart != NULL)