f0d475ddc991cec778b896f0621b5bfa75cc7d55 angie Thu Sep 26 16:39:12 2019 -0700 bigBedFind: support searchIndex in the search spec (not just trackDb); default to name if there is an index on name; support padding. refs #23283 Calling addHighlight from bigBedFind caused a lot of utils (e.g. hgTrackDb) to get linker errors about functions in hgFind.c that reference a global cart, so I made cart a param instead of a global in hgFind.c. diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index bbb03da..a83a283 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -35,31 +35,30 @@ #include "minGeneInfo.h" #include "pipeline.h" #include "hgConfig.h" #include "trix.h" #include "trackHub.h" #include "udc.h" #include "hubConnect.h" #include "bigBedFind.h" #include "genbank.h" // Exhaustive searches can lead to timeouts on CGIs (#11626). // However, hgGetAnn requires exhaustive searches (#11665). #define NONEXHAUSTIVE_SEARCH_LIMIT 500 #define EXHAUSTIVE_SEARCH_REQUIRED -1 -extern struct cart *cart; char *hgAppName = ""; /* alignment tables to check when looking for mrna alignments */ static char *estTables[] = { "intronEst", "all_est", "xenoEst", NULL }; static char *estLabels[] = { "Spliced ESTs", "ESTs", "Other ESTs", NULL }; static char *mrnaTables[] = { "all_mrna", "xenoMrna", NULL }; static char *mrnaLabels[] = { "mRNAs", "Other mRNAs", NULL }; static struct dyString *hgpMatchNames = NULL; static void hgPosFree(struct hgPos **pEl) /* Free up hgPos. */ { struct hgPos *el; if ((el = *pEl) != NULL) { @@ -1181,31 +1180,31 @@ fprintf(f, "------------------------------------------------------------------------------\n"); } static void mrnaHtmlEnd(struct hgPosTable *table, FILE *f) /* Print end to mrna alignment positions. */ { fprintf(f, ""); } static void mrnaHtmlOnePos(struct hgPosTable *table, struct hgPos *pos, FILE *f) /* Print one mrna alignment position. */ { fprintf(f, "%s", pos->description); } -char *hCarefulTrackOpenVis(char *db, char *trackName) +char *hCarefulTrackOpenVisCart(struct cart *cart, char *db, char *trackName) /* If track is already in full mode, return full; otherwise, return * hTrackOpenVis. */ { char *vis = cart ? cartOptionalString(cart, trackName) : NULL; if (vis && sameString(vis, "full")) return "full"; else return hTrackOpenVis(db, trackName); } static struct psl *getPslFromTable(struct sqlConnection *conn, char *db, char *table, char *acc) /* If table exists, return PSL for each row with qName = acc. */ { struct psl *pslList = NULL; if (sqlTableExists(conn, table)) @@ -1214,31 +1213,31 @@ char query[256]; sqlSafef(query, sizeof(query), "select * from %s where qName = '%s'", table, acc); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row+rowOffset); slAddHead(&pslList, psl); } slReverse(&pslList); sqlFreeResult(&sr); } return pslList; } -static void addPslResultToHgp(struct hgPositions *hgp, char *db, char *tableName, +static void addPslResultToHgp(struct cart *cart, struct hgPositions *hgp, char *db, char *tableName, char *shortLabel, char *acc, struct psl *pslList) /* Create an hgPosTable for the given psl search results, and add it to hgp->tableList. */ { if (pslList == NULL) return; struct hgPosTable *table; struct dyString *dy = newDyString(1024); struct psl *psl; char hgAppCombiner = (strchr(hgAppName, '?')) ? '&' : '?'; char *ui = getUiUrl(cart); AllocVar(table); table->htmlStart = mrnaHtmlStart; table->htmlEnd = mrnaHtmlEnd; table->htmlOnePos = mrnaHtmlOnePos; @@ -1248,48 +1247,48 @@ table->name = cloneString(tableName); char *trackName = hGetTrackForTable(db, table->name); slSort(&pslList, pslCmpScore); for (psl = pslList; psl != NULL; psl = psl->next) { struct hgPos *pos; dyStringClear(dy); AllocVar(pos); pos->chrom = hgOfficialChromName(db, psl->tName); pos->chromStart = psl->tStart; pos->chromEnd = psl->tEnd; pos->name = cloneString(psl->qName); pos->browserName = cloneString(psl->qName); dyStringPrintf(dy, "", hgp->extraCgi); dyStringPrintf(dy, "%5d %5.1f%% %9s %s %9d %9d %8s %5d %5d %5d", psl->match + psl->misMatch + psl->repMatch + psl->nCount, 100.0 - pslCalcMilliBad(psl, TRUE) * 0.1, skipChr(psl->tName), psl->strand, psl->tStart + 1, psl->tEnd, psl->qName, psl->qStart+1, psl->qEnd, psl->qSize); dyStringPrintf(dy, "\n"); pos->description = cloneString(dy->string); slAddHead(&table->posList, pos); } slReverse(&table->posList); freeDyString(&dy); } -static boolean findMrnaPos(char *db, char *acc, struct hgPositions *hgp) +static boolean findMrnaPos(struct cart *cart, char *db, char *acc, struct hgPositions *hgp) /* Find MRNA or EST position(s) from accession number. * Look to see if it's an mRNA or EST. Fill in hgp and return * TRUE if it is, otherwise return FALSE. */ /* NOTE: this excludes RefSeq mrna's, as they are currently * handled in findRefGenes(), which is called later in the main function */ { struct sqlConnection *conn = hAllocConn(db); if (!sqlTableExists(conn, gbCdnaInfoTable)) { hFreeConn(&conn); return FALSE; } char *type = mrnaType(db, acc); if (isEmpty(type)) { @@ -1331,31 +1330,31 @@ for (c = chromList; c != NULL; c = c->next) { safef(splitTable, sizeof(splitTable), "%s_%s", c->name, tableName); struct psl *chrPslList = getPslFromTable(conn, db, splitTable, acc); if (pslList == NULL) pslList = chrPslList; else slCat(pslList, chrPslList); } } else pslList = getPslFromTable(conn, db, tableName, acc); if (pslList == NULL) continue; gotResults = TRUE; - addPslResultToHgp(hgp, db, tableName, label, acc, pslList); + addPslResultToHgp(cart, hgp, db, tableName, label, acc, pslList); if (!sameString(tableName, "intronEst")) /* for speed -- found proper table, so don't need to look farther */ break; } hFreeConn(&conn); return gotResults; } static char *getGenbankGrepIndex(char *db, struct hgFindSpec *hfs, char *table, char *suffix) /* If hg.conf has a grepIndex.genbank setting, hfs has a (placeholder) * grepIndex setting, and we can access the index file for table, then * return the filename; else return NULL. */ /* Special case for genbank: Mark completely specifies the root in * hg.conf, so hfs's grepIndex setting value is ignored -- it is used @@ -1673,55 +1672,55 @@ { char *organism = hOrganism(hgp->database); /* dbDb organism column */ if (alignCount == 1) { // So far we have not bothered to look up the coordinates because there are almost always // multiple matches among which the user will have to choose. However, it is possible // for there to be a unique match (hgwdev 19-02-15, hg38, "elmer" --> U01022). In that // case we should look up the coordinates so the user doesn't have to click through a page // with one match leading to another search. char shortLabel[256]; safef(shortLabel, sizeof shortLabel, "%s%s %sligned mRNAs", isXeno ? "Non-" : "", organism, aligns ? "A" : "Una"); char *acc = table->posList->name; struct psl *pslList = getPslFromTable(conn, hgp->database, mrnaTable, acc); - addPslResultToHgp(hgp, hgp->database, mrnaTable, shortLabel, acc, pslList); + addPslResultToHgp(cart, hgp, hgp->database, mrnaTable, shortLabel, acc, pslList); if (hgp->tableList) alignCount = slCount(hgp->tableList->posList); else alignCount = 0; } else { char title[256]; slReverse(&table->posList); safef(title, sizeof(title), "%s%s %sligned mRNA Search Results", isXeno ? "Non-" : "", organism, aligns ? "A" : "Una"); table->description = cloneString(title); table->name = cloneString(mrnaTable); table->htmlOnePos = mrnaKeysHtmlOnePos; slAddHead(&hgp->tableList, table); } freeMem(organism); } freeDyString(&dy); return alignCount; } -static boolean findMrnaKeys(char *db, struct hgFindSpec *hfs, +static boolean findMrnaKeys(struct cart *cart, char *db, struct hgFindSpec *hfs, char *keys, int limitResults, struct hgPositions *hgp) /* Find mRNA that has keyword in one of its fields. */ { int alignCount; char *tables[] = { productNameTable, geneNameTable, authorTable, tissueTable, cellTable, descriptionTable, developmentTable, }; struct hash *allKeysHash = NULL; struct slName *allKeysList = NULL; struct sqlConnection *conn = hAllocConn(db); boolean found = FALSE; /* If we can use grep to search all tables, then use piped grep to * implement implicit "AND" of multiple keys. */ @@ -2395,31 +2394,31 @@ for (table = hgp->tableList; table != NULL; table = table->next) { if (table->posList != NULL) { char *tableName = table->name; if (startsWith("all_", tableName)) tableName += strlen("all_"); // clear the tdb cache if this track is a hub track if (isHubTrack(tableName)) tdbList = NULL; struct trackDb *tdb = tdbForTrack(db, tableName, &tdbList); if (!tdb) errAbort("no track for table \"%s\" found via a findSpec", tableName); char *trackName = tdb->track; - char *vis = hCarefulTrackOpenVis(db, trackName); + char *vis = hCarefulTrackOpenVisCart(cart, db, trackName); boolean excludeTable = FALSE; if(!containerDivPrinted) { fprintf(f, "
Your search resulted in multiple matches. " "Please select a position:
\n"); containerDivPrinted = TRUE; } if (table->htmlStart) table->htmlStart(table, f); else fprintf(f, "\n", table->description); for (pos = table->posList; pos != NULL; pos = pos->next) { if (table->htmlOnePos) @@ -2657,103 +2656,104 @@ hUserAbort("Sorry, range spec (\":%d-%d\") is not supported for %s.", relStart+1, relEnd, table); } #endif static boolean isBigFileFind(struct hgFindSpec *hfs) /* is this a find on a big* file? */ { return sameString(hfs->searchType, "bigBed") || sameString(hfs->searchType, "bigPsl") || sameString(hfs->searchType, "bigBarChart") || sameString(hfs->searchType, "bigGenePred"); } -static boolean findBigBed(char *db, struct hgFindSpec *hfs, char *spec, +static boolean findBigBed(struct cart *cart, char *db, struct hgFindSpec *hfs, char *spec, struct hgPositions *hgp) /* Look up items in bigBed */ { struct trackDb *tdb = tdbFindOrCreate(db, NULL, hfs->searchTable); return findBigBedPosInTdbList(cart, db, tdb, spec, hgp, hfs); } -static boolean searchSpecial(char *db, struct hgFindSpec *hfs, char *term, int limitResults, +static boolean searchSpecial(struct cart *cart, + char *db, struct hgFindSpec *hfs, char *term, int limitResults, struct hgPositions *hgp, boolean relativeFlag, int relStart, int relEnd, boolean *retFound) /* Handle searchTypes for which we have special code. Return true if * we have special code. Set retFind according to whether we find term. */ { boolean isSpecial = TRUE; boolean found = FALSE; char *upcTerm = cloneString(term); touppers(upcTerm); if (sameString(hfs->searchType, "knownGene")) { if (gotFullText(db)) found = findKnownGeneFullText(db, term, hgp); else /* NOTE, in a few months (say by April 1 2006) get rid of else -JK */ { if (!found && hTableExists(db, "kgAlias")) found = findKgGenesByAlias(db, term, hgp); if (!found && hTableExists(db, "kgProtAlias")) found = findKgGenesByProtAlias(db, term, hgp); if (!found) found = findKnownGene(db, term, hgp, hfs->searchTable); } } else if (sameString(hfs->searchType, "refGene")) { found = findRefGenes(db, hfs, term, hgp); } else if (isBigFileFind(hfs)) { - found = findBigBed(db, hfs, term, hgp); + found = findBigBed(cart, db, hfs, term, hgp); } else if (sameString(hfs->searchType, "cytoBand")) { char *chrom; int start, end; found = hgFindCytoBand(db, term, &chrom, &start, &end); if (found) singlePos(hgp, hfs->searchDescription, NULL, hfs->searchTable, term, term, chrom, start, end); } else if (sameString(hfs->searchType, "gold")) { char *chrom; int start, end; found = findChromContigPos(db, term, &chrom, &start, &end); if (found) { if (relativeFlag) { end = start + relEnd; start = start + relStart; } singlePos(hgp, hfs->searchDescription, NULL, hfs->searchTable, term, term, chrom, start, end); } } else if (sameString(hfs->searchType, "mrnaAcc")) { - found = findMrnaPos(db, term, hgp); + found = findMrnaPos(cart, db, term, hgp); } else if (sameString(hfs->searchType, "mrnaKeyword")) { - found = findMrnaKeys(db, hfs, upcTerm, limitResults, hgp); + found = findMrnaKeys(cart, db, hfs, upcTerm, limitResults, hgp); } else if (sameString(hfs->searchType, "sgdGene")) { found = findYeastGenes(db, term, hgp); } else { isSpecial = FALSE; } *retFound = found; freeMem(upcTerm); return(isSpecial); } @@ -2786,33 +2786,33 @@ { if (!isFuzzy || keyIsPrefixIgnoreCase(term, row[1])) { xrefPtr = slPairNew(cloneString(row[1]), cloneString(row[0])); slAddHead(&xrefList, xrefPtr); } } sqlFreeResult(&sr); hFreeConn(&conn); slReverse(&xrefList); if (xrefList == NULL && hgFindSpecSetting(hfs, "searchBoth") != NULL) xrefList = slPairNew(cloneString(""), cloneString(term)); return(xrefList); } -static char *addHighlight(struct cart *cart, char *db, char *chrom, unsigned start, unsigned end) -/* Add the given region to the existing value of the cart variable highlight. - * Return new value for highlight, or NULL if no change is necessary (already highlighted). */ +char *addHighlight(char *db, char *chrom, unsigned start, unsigned end) +/* Return a string that can be assigned to the cart var addHighlight, to add a yellow highlight + * at db.chrom:start+1-end for search results. */ { char *color = "fcfcac"; struct dyString *dy = dyStringCreate("%s.%s:%u-%u#%s", db, chrom, start+1, end, color); return dyStringCannibalize(&dy); } static boolean doQuery(char *db, struct hgFindSpec *hfs, char *xrefTerm, char *term, struct hgPositions *hgp, boolean relativeFlag, int relStart, int relEnd, boolean multiTerm, int limitResults) /* Perform a query as specified in hfs, assuming table existence has been * checked and xref'ing has been taken care of. */ { struct slName *tableList = hSplitTableNames(db, hfs->searchTable); struct slName *tPtr = NULL; @@ -2875,79 +2875,81 @@ pos->browserName = cloneString(row[3]); if (isNotEmpty(xrefTerm)) { safef(buf, sizeof(buf), "(%s%s)", termPrefix ? termPrefix : "", row[3]); pos->description = cloneString(buf); } if (relativeFlag && (pos->chromStart + relEnd) <= pos->chromEnd) { pos->chromEnd = pos->chromStart + relEnd; pos->chromStart = pos->chromStart + relStart; } else if (padding > 0 && !multiTerm) { // highlight the item bases to distinguish from padding - pos->highlight = addHighlight(cart, db, pos->chrom, pos->chromStart, pos->chromEnd); + pos->highlight = addHighlight(db, pos->chrom, pos->chromStart, pos->chromEnd); int chromSize = hChromSize(db, pos->chrom); pos->chromStart -= padding; pos->chromEnd += padding; if (pos->chromStart < 0) pos->chromStart = 0; if (pos->chromEnd > chromSize) pos->chromEnd = chromSize; } slAddHead(&table->posList, pos); } } if (table != NULL) slReverse(&table->posList); sqlFreeResult(&sr); hFreeConn(&conn); slFreeList(&tableList); return(found); } -boolean hgFindUsingSpec(char *db, struct hgFindSpec *hfs, char *term, int limitResults, +static boolean hgFindUsingSpec(struct cart *cart, + char *db, struct hgFindSpec *hfs, char *term, int limitResults, struct hgPositions *hgp, boolean relativeFlag, int relStart, int relEnd, boolean multiTerm) /* Perform the search described by hfs on term. If successful, put results * in hgp and return TRUE. (If not, don't modify hgp.) */ { struct slPair *xrefList = NULL, *xrefPtr = NULL; boolean found = FALSE; if (hfs == NULL || term == NULL || hgp == NULL) errAbort("NULL passed to hgFindUsingSpec.\n"); if (strlen(term)<2 && ! (sameString(hfs->searchName, "knownGene") || sameString(hfs->searchName, "flyBaseGeneSymbolOneLetter"))) return FALSE; if (isNotEmpty(hfs->termRegex) && ! regexMatchNoCase(term, hfs->termRegex)) return(FALSE); if ((!(sameString(hfs->searchType, "mrnaKeyword") || sameString(hfs->searchType, "mrnaAcc"))) && !isBigFileFind(hfs)) { if (! hTableOrSplitExists(db, hfs->searchTable)) return(FALSE); } -if (isNotEmpty(hfs->searchType) && searchSpecial(db, hfs, term, limitResults, hgp, relativeFlag, +if (isNotEmpty(hfs->searchType) && searchSpecial(cart, + db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, &found)) return(found); if (isNotEmpty(hfs->xrefTable)) { struct sqlConnection *conn = hAllocConn(db); // NOTE hfs->xrefTable can sometimes contain a comma-separated table list, // rather than just a single table. char *tables = replaceChars(hfs->xrefTable, ",", " "); boolean exists = sqlTablesExist(conn, tables); hFreeConn(&conn); freeMem(tables); if (! exists) return(FALSE); @@ -3090,31 +3092,31 @@ if (search == NULL) return FALSE; cartRemove(cart, "singleSearch"); boolean foundIt = FALSE; if (sameString(search, "knownCanonical")) foundIt = searchKnownCanonical(db, term, hgp); else { struct hgFindSpec *shortList = NULL, *longList = NULL; hgFindSpecGetAllSpecs(db, &shortList, &longList); struct hgFindSpec *hfs = hfsFind(shortList, search); if (hfs == NULL) hfs = hfsFind(longList, search); if (hfs != NULL) - foundIt = hgFindUsingSpec(db, hfs, term, limitResults, hgp, FALSE, 0,0, FALSE); + foundIt = hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, FALSE, 0,0, FALSE); else warn("Unrecognized singleSearch=%s in URL", search); } if (foundIt) { fixSinglePos(hgp); if (cart != NULL) cartSetString(cart, "hgFind.matches", hgp->tableList->posList->browserName); } return foundIt; } static boolean matchesHgvs(struct cart *cart, char *db, char *term, struct hgPositions *hgp) /* Return TRUE if the search term looks like a variant encoded using the HGVS nomenclature */ /* See http://varnomen.hgvs.org/ */ @@ -3144,31 +3146,31 @@ { if (startsWith("NM_", hgvs->seqAcc) || startsWith("NR_", hgvs->seqAcc) || startsWith("NP_", hgvs->seqAcc) || startsWith("YP_", hgvs->seqAcc)) trackTable = "ncbiRefSeqCurated"; else if (startsWith("XM_", hgvs->seqAcc) || startsWith("XR_", hgvs->seqAcc) || startsWith("XP_", hgvs->seqAcc)) trackTable = "ncbiRefSeqPredicted"; else trackTable = "ncbiRefSeq"; } else trackTable = "refGene"; singlePos(hgp, "HGVS", NULL, trackTable, term, "", mapping->chrom, mapping->chromStart-padding, mapping->chromEnd+padding); // highlight the mapped bases to distinguish from padding - hgp->tableList->posList->highlight = addHighlight(cart, db, mapping->chrom, + hgp->tableList->posList->highlight = addHighlight(db, mapping->chrom, mapping->chromStart, mapping->chromEnd); foundIt = TRUE; } dyStringFree(&dyWarn); } return foundIt; } struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi, char *hgAppNameIn, struct cart *cart, boolean multiTerm) /* Return container of tracks and positions (if any) that match term. */ { struct hgPositions *hgp = NULL, *hgpItem = NULL; regmatch_t substrs[4]; boolean canonicalSpec = FALSE; @@ -3273,43 +3275,43 @@ boolean done = FALSE; // Disable singleBaseSpec for any term that is not hgOfficialChromName // because that mangles legitimate IDs that are [A-Z]:[0-9]+. if (singleBaseSpec) { singleBaseSpec = relativeFlag = FALSE; term = cloneString(originalTerm); // restore original term relStart = relEnd = 0; } if (!trackHubDatabase(db)) hgFindSpecGetAllSpecs(db, &shortList, &longList); for (hfs = shortList; hfs != NULL; hfs = hfs->next) { - if (hgFindUsingSpec(db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, + if (hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, multiTerm)) { done = TRUE; if (! hgFindSpecSetting(hfs, "semiShortCircuit")) break; } } if (! done) { for (hfs = longList; hfs != NULL; hfs = hfs->next) { - hgFindUsingSpec(db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, + hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, multiTerm); } /* Lowe lab additions -- would like to replace these with specs, but * will leave in for now. */ if (!trackHubDatabase(db)) findTigrGenes(db, term, hgp); trackHubFindPos(cart, db, term, hgp); } hgFindSpecFreeList(&shortList); hgFindSpecFreeList(&longList); if (cart != NULL) { if(hgpMatchNames == NULL) hgpMatchNames = newDyString(256); @@ -3330,31 +3332,31 @@ } } cartSetString(cart, "hgFind.matches", hgpMatchNames->string); } } slReverse(&hgp->tableList); if (multiTerm) collapseSamePos(hgp); fixSinglePos(hgp); if (cart && hgp->singlePos && isNotEmpty(hgp->singlePos->highlight)) cartSetString(cart, "addHighlight", hgp->singlePos->highlight); return hgp; } -void hgPositionsHelpHtml(char *organism, char *database) +void hgPositionsHelpHtmlCart(struct cart *cart, char *organism, char *database) /* Display contents of dbDb.htmlPath for database, or print an HTML comment * explaining what's missing. */ { char *htmlPath = hHtmlPath(database); char *htmlString = NULL; size_t htmlStrLength = 0; if (strstrNoCase(organism, "zoo")) webNewSection("About the NISC Comparative Sequencing Program Browser"); else webNewSection("%s Genome Browser – %s assembly" " (sequences)", trackHubSkipHubName(organism), trackHubSkipHubName(database), hgTracksName(), cartSessionVarName(), cartSessionId(cart));