f0d475ddc991cec778b896f0621b5bfa75cc7d55 angie Thu Sep 26 16:39:12 2019 -0700 bigBedFind: support searchIndex in the search spec (not just trackDb); default to name if there is an index on name; support padding. refs #23283 Calling addHighlight from bigBedFind caused a lot of utils (e.g. hgTrackDb) to get linker errors about functions in hgFind.c that reference a global cart, so I made cart a param instead of a global in hgFind.c. diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index bbb03da..a83a283 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -35,31 +35,30 @@ #include "minGeneInfo.h" #include "pipeline.h" #include "hgConfig.h" #include "trix.h" #include "trackHub.h" #include "udc.h" #include "hubConnect.h" #include "bigBedFind.h" #include "genbank.h" // Exhaustive searches can lead to timeouts on CGIs (#11626). // However, hgGetAnn requires exhaustive searches (#11665). #define NONEXHAUSTIVE_SEARCH_LIMIT 500 #define EXHAUSTIVE_SEARCH_REQUIRED -1 -extern struct cart *cart; char *hgAppName = ""; /* alignment tables to check when looking for mrna alignments */ static char *estTables[] = { "intronEst", "all_est", "xenoEst", NULL }; static char *estLabels[] = { "Spliced ESTs", "ESTs", "Other ESTs", NULL }; static char *mrnaTables[] = { "all_mrna", "xenoMrna", NULL }; static char *mrnaLabels[] = { "mRNAs", "Other mRNAs", NULL }; static struct dyString *hgpMatchNames = NULL; static void hgPosFree(struct hgPos **pEl) /* Free up hgPos. */ { struct hgPos *el; if ((el = *pEl) != NULL) { @@ -1181,31 +1180,31 @@ fprintf(f, "------------------------------------------------------------------------------\n"); } static void mrnaHtmlEnd(struct hgPosTable *table, FILE *f) /* Print end to mrna alignment positions. */ { fprintf(f, ""); } static void mrnaHtmlOnePos(struct hgPosTable *table, struct hgPos *pos, FILE *f) /* Print one mrna alignment position. */ { fprintf(f, "%s", pos->description); } -char *hCarefulTrackOpenVis(char *db, char *trackName) +char *hCarefulTrackOpenVisCart(struct cart *cart, char *db, char *trackName) /* If track is already in full mode, return full; otherwise, return * hTrackOpenVis. */ { char *vis = cart ? cartOptionalString(cart, trackName) : NULL; if (vis && sameString(vis, "full")) return "full"; else return hTrackOpenVis(db, trackName); } static struct psl *getPslFromTable(struct sqlConnection *conn, char *db, char *table, char *acc) /* If table exists, return PSL for each row with qName = acc. */ { struct psl *pslList = NULL; if (sqlTableExists(conn, table)) @@ -1214,31 +1213,31 @@ char query[256]; sqlSafef(query, sizeof(query), "select * from %s where qName = '%s'", table, acc); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row+rowOffset); slAddHead(&pslList, psl); } slReverse(&pslList); sqlFreeResult(&sr); } return pslList; } -static void addPslResultToHgp(struct hgPositions *hgp, char *db, char *tableName, +static void addPslResultToHgp(struct cart *cart, struct hgPositions *hgp, char *db, char *tableName, char *shortLabel, char *acc, struct psl *pslList) /* Create an hgPosTable for the given psl search results, and add it to hgp->tableList. */ { if (pslList == NULL) return; struct hgPosTable *table; struct dyString *dy = newDyString(1024); struct psl *psl; char hgAppCombiner = (strchr(hgAppName, '?')) ? '&' : '?'; char *ui = getUiUrl(cart); AllocVar(table); table->htmlStart = mrnaHtmlStart; table->htmlEnd = mrnaHtmlEnd; table->htmlOnePos = mrnaHtmlOnePos; @@ -1248,48 +1247,48 @@ table->name = cloneString(tableName); char *trackName = hGetTrackForTable(db, table->name); slSort(&pslList, pslCmpScore); for (psl = pslList; psl != NULL; psl = psl->next) { struct hgPos *pos; dyStringClear(dy); AllocVar(pos); pos->chrom = hgOfficialChromName(db, psl->tName); pos->chromStart = psl->tStart; pos->chromEnd = psl->tEnd; pos->name = cloneString(psl->qName); pos->browserName = cloneString(psl->qName); dyStringPrintf(dy, "", hgp->extraCgi); dyStringPrintf(dy, "%5d %5.1f%% %9s %s %9d %9d %8s %5d %5d %5d", psl->match + psl->misMatch + psl->repMatch + psl->nCount, 100.0 - pslCalcMilliBad(psl, TRUE) * 0.1, skipChr(psl->tName), psl->strand, psl->tStart + 1, psl->tEnd, psl->qName, psl->qStart+1, psl->qEnd, psl->qSize); dyStringPrintf(dy, "\n"); pos->description = cloneString(dy->string); slAddHead(&table->posList, pos); } slReverse(&table->posList); freeDyString(&dy); } -static boolean findMrnaPos(char *db, char *acc, struct hgPositions *hgp) +static boolean findMrnaPos(struct cart *cart, char *db, char *acc, struct hgPositions *hgp) /* Find MRNA or EST position(s) from accession number. * Look to see if it's an mRNA or EST. Fill in hgp and return * TRUE if it is, otherwise return FALSE. */ /* NOTE: this excludes RefSeq mrna's, as they are currently * handled in findRefGenes(), which is called later in the main function */ { struct sqlConnection *conn = hAllocConn(db); if (!sqlTableExists(conn, gbCdnaInfoTable)) { hFreeConn(&conn); return FALSE; } char *type = mrnaType(db, acc); if (isEmpty(type)) { @@ -1331,31 +1330,31 @@ for (c = chromList; c != NULL; c = c->next) { safef(splitTable, sizeof(splitTable), "%s_%s", c->name, tableName); struct psl *chrPslList = getPslFromTable(conn, db, splitTable, acc); if (pslList == NULL) pslList = chrPslList; else slCat(pslList, chrPslList); } } else pslList = getPslFromTable(conn, db, tableName, acc); if (pslList == NULL) continue; gotResults = TRUE; - addPslResultToHgp(hgp, db, tableName, label, acc, pslList); + addPslResultToHgp(cart, hgp, db, tableName, label, acc, pslList); if (!sameString(tableName, "intronEst")) /* for speed -- found proper table, so don't need to look farther */ break; } hFreeConn(&conn); return gotResults; } static char *getGenbankGrepIndex(char *db, struct hgFindSpec *hfs, char *table, char *suffix) /* If hg.conf has a grepIndex.genbank setting, hfs has a (placeholder) * grepIndex setting, and we can access the index file for table, then * return the filename; else return NULL. */ /* Special case for genbank: Mark completely specifies the root in * hg.conf, so hfs's grepIndex setting value is ignored -- it is used @@ -1673,55 +1672,55 @@ { char *organism = hOrganism(hgp->database); /* dbDb organism column */ if (alignCount == 1) { // So far we have not bothered to look up the coordinates because there are almost always // multiple matches among which the user will have to choose. However, it is possible // for there to be a unique match (hgwdev 19-02-15, hg38, "elmer" --> U01022). In that // case we should look up the coordinates so the user doesn't have to click through a page // with one match leading to another search. char shortLabel[256]; safef(shortLabel, sizeof shortLabel, "%s%s %sligned mRNAs", isXeno ? "Non-" : "", organism, aligns ? "A" : "Una"); char *acc = table->posList->name; struct psl *pslList = getPslFromTable(conn, hgp->database, mrnaTable, acc); - addPslResultToHgp(hgp, hgp->database, mrnaTable, shortLabel, acc, pslList); + addPslResultToHgp(cart, hgp, hgp->database, mrnaTable, shortLabel, acc, pslList); if (hgp->tableList) alignCount = slCount(hgp->tableList->posList); else alignCount = 0; } else { char title[256]; slReverse(&table->posList); safef(title, sizeof(title), "%s%s %sligned mRNA Search Results", isXeno ? "Non-" : "", organism, aligns ? "A" : "Una"); table->description = cloneString(title); table->name = cloneString(mrnaTable); table->htmlOnePos = mrnaKeysHtmlOnePos; slAddHead(&hgp->tableList, table); } freeMem(organism); } freeDyString(&dy); return alignCount; } -static boolean findMrnaKeys(char *db, struct hgFindSpec *hfs, +static boolean findMrnaKeys(struct cart *cart, char *db, struct hgFindSpec *hfs, char *keys, int limitResults, struct hgPositions *hgp) /* Find mRNA that has keyword in one of its fields. */ { int alignCount; char *tables[] = { productNameTable, geneNameTable, authorTable, tissueTable, cellTable, descriptionTable, developmentTable, }; struct hash *allKeysHash = NULL; struct slName *allKeysList = NULL; struct sqlConnection *conn = hAllocConn(db); boolean found = FALSE; /* If we can use grep to search all tables, then use piped grep to * implement implicit "AND" of multiple keys. */ @@ -2395,31 +2394,31 @@ for (table = hgp->tableList; table != NULL; table = table->next) { if (table->posList != NULL) { char *tableName = table->name; if (startsWith("all_", tableName)) tableName += strlen("all_"); // clear the tdb cache if this track is a hub track if (isHubTrack(tableName)) tdbList = NULL; struct trackDb *tdb = tdbForTrack(db, tableName, &tdbList); if (!tdb) errAbort("no track for table \"%s\" found via a findSpec", tableName); char *trackName = tdb->track; - char *vis = hCarefulTrackOpenVis(db, trackName); + char *vis = hCarefulTrackOpenVisCart(cart, db, trackName); boolean excludeTable = FALSE; if(!containerDivPrinted) { fprintf(f, "
Your search resulted in multiple matches. " "Please select a position:
\n"); containerDivPrinted = TRUE; } if (table->htmlStart) table->htmlStart(table, f); else fprintf(f, "\n", table->description);
for (pos = table->posList; pos != NULL; pos = pos->next)
{
if (table->htmlOnePos)
@@ -2657,103 +2656,104 @@
hUserAbort("Sorry, range spec (\":%d-%d\") is not supported for %s.",
relStart+1, relEnd, table);
}
#endif
static boolean isBigFileFind(struct hgFindSpec *hfs)
/* is this a find on a big* file? */
{
return sameString(hfs->searchType, "bigBed")
|| sameString(hfs->searchType, "bigPsl")
|| sameString(hfs->searchType, "bigBarChart")
|| sameString(hfs->searchType, "bigGenePred");
}
-static boolean findBigBed(char *db, struct hgFindSpec *hfs, char *spec,
+static boolean findBigBed(struct cart *cart, char *db, struct hgFindSpec *hfs, char *spec,
struct hgPositions *hgp)
/* Look up items in bigBed */
{
struct trackDb *tdb = tdbFindOrCreate(db, NULL, hfs->searchTable);
return findBigBedPosInTdbList(cart, db, tdb, spec, hgp, hfs);
}
-static boolean searchSpecial(char *db, struct hgFindSpec *hfs, char *term, int limitResults,
+static boolean searchSpecial(struct cart *cart,
+ char *db, struct hgFindSpec *hfs, char *term, int limitResults,
struct hgPositions *hgp, boolean relativeFlag,
int relStart, int relEnd, boolean *retFound)
/* Handle searchTypes for which we have special code. Return true if
* we have special code. Set retFind according to whether we find term. */
{
boolean isSpecial = TRUE;
boolean found = FALSE;
char *upcTerm = cloneString(term);
touppers(upcTerm);
if (sameString(hfs->searchType, "knownGene"))
{
if (gotFullText(db))
found = findKnownGeneFullText(db, term, hgp);
else /* NOTE, in a few months (say by April 1 2006) get rid of else -JK */
{
if (!found && hTableExists(db, "kgAlias"))
found = findKgGenesByAlias(db, term, hgp);
if (!found && hTableExists(db, "kgProtAlias"))
found = findKgGenesByProtAlias(db, term, hgp);
if (!found)
found = findKnownGene(db, term, hgp, hfs->searchTable);
}
}
else if (sameString(hfs->searchType, "refGene"))
{
found = findRefGenes(db, hfs, term, hgp);
}
else if (isBigFileFind(hfs))
{
- found = findBigBed(db, hfs, term, hgp);
+ found = findBigBed(cart, db, hfs, term, hgp);
}
else if (sameString(hfs->searchType, "cytoBand"))
{
char *chrom;
int start, end;
found = hgFindCytoBand(db, term, &chrom, &start, &end);
if (found)
singlePos(hgp, hfs->searchDescription, NULL, hfs->searchTable, term,
term, chrom, start, end);
}
else if (sameString(hfs->searchType, "gold"))
{
char *chrom;
int start, end;
found = findChromContigPos(db, term, &chrom, &start, &end);
if (found)
{
if (relativeFlag)
{
end = start + relEnd;
start = start + relStart;
}
singlePos(hgp, hfs->searchDescription, NULL, hfs->searchTable, term,
term, chrom, start, end);
}
}
else if (sameString(hfs->searchType, "mrnaAcc"))
{
- found = findMrnaPos(db, term, hgp);
+ found = findMrnaPos(cart, db, term, hgp);
}
else if (sameString(hfs->searchType, "mrnaKeyword"))
{
- found = findMrnaKeys(db, hfs, upcTerm, limitResults, hgp);
+ found = findMrnaKeys(cart, db, hfs, upcTerm, limitResults, hgp);
}
else if (sameString(hfs->searchType, "sgdGene"))
{
found = findYeastGenes(db, term, hgp);
}
else
{
isSpecial = FALSE;
}
*retFound = found;
freeMem(upcTerm);
return(isSpecial);
}
@@ -2786,33 +2786,33 @@
{
if (!isFuzzy || keyIsPrefixIgnoreCase(term, row[1]))
{
xrefPtr = slPairNew(cloneString(row[1]), cloneString(row[0]));
slAddHead(&xrefList, xrefPtr);
}
}
sqlFreeResult(&sr);
hFreeConn(&conn);
slReverse(&xrefList);
if (xrefList == NULL && hgFindSpecSetting(hfs, "searchBoth") != NULL)
xrefList = slPairNew(cloneString(""), cloneString(term));
return(xrefList);
}
-static char *addHighlight(struct cart *cart, char *db, char *chrom, unsigned start, unsigned end)
-/* Add the given region to the existing value of the cart variable highlight.
- * Return new value for highlight, or NULL if no change is necessary (already highlighted). */
+char *addHighlight(char *db, char *chrom, unsigned start, unsigned end)
+/* Return a string that can be assigned to the cart var addHighlight, to add a yellow highlight
+ * at db.chrom:start+1-end for search results. */
{
char *color = "fcfcac";
struct dyString *dy = dyStringCreate("%s.%s:%u-%u#%s", db, chrom, start+1, end, color);
return dyStringCannibalize(&dy);
}
static boolean doQuery(char *db, struct hgFindSpec *hfs, char *xrefTerm, char *term,
struct hgPositions *hgp,
boolean relativeFlag, int relStart, int relEnd,
boolean multiTerm, int limitResults)
/* Perform a query as specified in hfs, assuming table existence has been
* checked and xref'ing has been taken care of. */
{
struct slName *tableList = hSplitTableNames(db, hfs->searchTable);
struct slName *tPtr = NULL;
@@ -2875,79 +2875,81 @@
pos->browserName = cloneString(row[3]);
if (isNotEmpty(xrefTerm))
{
safef(buf, sizeof(buf), "(%s%s)",
termPrefix ? termPrefix : "", row[3]);
pos->description = cloneString(buf);
}
if (relativeFlag && (pos->chromStart + relEnd) <= pos->chromEnd)
{
pos->chromEnd = pos->chromStart + relEnd;
pos->chromStart = pos->chromStart + relStart;
}
else if (padding > 0 && !multiTerm)
{
// highlight the item bases to distinguish from padding
- pos->highlight = addHighlight(cart, db, pos->chrom, pos->chromStart, pos->chromEnd);
+ pos->highlight = addHighlight(db, pos->chrom, pos->chromStart, pos->chromEnd);
int chromSize = hChromSize(db, pos->chrom);
pos->chromStart -= padding;
pos->chromEnd += padding;
if (pos->chromStart < 0)
pos->chromStart = 0;
if (pos->chromEnd > chromSize)
pos->chromEnd = chromSize;
}
slAddHead(&table->posList, pos);
}
}
if (table != NULL)
slReverse(&table->posList);
sqlFreeResult(&sr);
hFreeConn(&conn);
slFreeList(&tableList);
return(found);
}
-boolean hgFindUsingSpec(char *db, struct hgFindSpec *hfs, char *term, int limitResults,
+static boolean hgFindUsingSpec(struct cart *cart,
+ char *db, struct hgFindSpec *hfs, char *term, int limitResults,
struct hgPositions *hgp, boolean relativeFlag,
int relStart, int relEnd, boolean multiTerm)
/* Perform the search described by hfs on term. If successful, put results
* in hgp and return TRUE. (If not, don't modify hgp.) */
{
struct slPair *xrefList = NULL, *xrefPtr = NULL;
boolean found = FALSE;
if (hfs == NULL || term == NULL || hgp == NULL)
errAbort("NULL passed to hgFindUsingSpec.\n");
if (strlen(term)<2 && !
(sameString(hfs->searchName, "knownGene") ||
sameString(hfs->searchName, "flyBaseGeneSymbolOneLetter")))
return FALSE;
if (isNotEmpty(hfs->termRegex) && ! regexMatchNoCase(term, hfs->termRegex))
return(FALSE);
if ((!(sameString(hfs->searchType, "mrnaKeyword") || sameString(hfs->searchType, "mrnaAcc")))
&& !isBigFileFind(hfs))
{
if (! hTableOrSplitExists(db, hfs->searchTable))
return(FALSE);
}
-if (isNotEmpty(hfs->searchType) && searchSpecial(db, hfs, term, limitResults, hgp, relativeFlag,
+if (isNotEmpty(hfs->searchType) && searchSpecial(cart,
+ db, hfs, term, limitResults, hgp, relativeFlag,
relStart, relEnd, &found))
return(found);
if (isNotEmpty(hfs->xrefTable))
{
struct sqlConnection *conn = hAllocConn(db);
// NOTE hfs->xrefTable can sometimes contain a comma-separated table list,
// rather than just a single table.
char *tables = replaceChars(hfs->xrefTable, ",", " ");
boolean exists = sqlTablesExist(conn, tables);
hFreeConn(&conn);
freeMem(tables);
if (! exists)
return(FALSE);
@@ -3090,31 +3092,31 @@
if (search == NULL)
return FALSE;
cartRemove(cart, "singleSearch");
boolean foundIt = FALSE;
if (sameString(search, "knownCanonical"))
foundIt = searchKnownCanonical(db, term, hgp);
else
{
struct hgFindSpec *shortList = NULL, *longList = NULL;
hgFindSpecGetAllSpecs(db, &shortList, &longList);
struct hgFindSpec *hfs = hfsFind(shortList, search);
if (hfs == NULL)
hfs = hfsFind(longList, search);
if (hfs != NULL)
- foundIt = hgFindUsingSpec(db, hfs, term, limitResults, hgp, FALSE, 0,0, FALSE);
+ foundIt = hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, FALSE, 0,0, FALSE);
else
warn("Unrecognized singleSearch=%s in URL", search);
}
if (foundIt)
{
fixSinglePos(hgp);
if (cart != NULL)
cartSetString(cart, "hgFind.matches", hgp->tableList->posList->browserName);
}
return foundIt;
}
static boolean matchesHgvs(struct cart *cart, char *db, char *term, struct hgPositions *hgp)
/* Return TRUE if the search term looks like a variant encoded using the HGVS nomenclature */
/* See http://varnomen.hgvs.org/ */
@@ -3144,31 +3146,31 @@
{
if (startsWith("NM_", hgvs->seqAcc) || startsWith("NR_", hgvs->seqAcc) ||
startsWith("NP_", hgvs->seqAcc) || startsWith("YP_", hgvs->seqAcc))
trackTable = "ncbiRefSeqCurated";
else if (startsWith("XM_", hgvs->seqAcc) || startsWith("XR_", hgvs->seqAcc) ||
startsWith("XP_", hgvs->seqAcc))
trackTable = "ncbiRefSeqPredicted";
else
trackTable = "ncbiRefSeq";
}
else
trackTable = "refGene";
singlePos(hgp, "HGVS", NULL, trackTable, term, "",
mapping->chrom, mapping->chromStart-padding, mapping->chromEnd+padding);
// highlight the mapped bases to distinguish from padding
- hgp->tableList->posList->highlight = addHighlight(cart, db, mapping->chrom,
+ hgp->tableList->posList->highlight = addHighlight(db, mapping->chrom,
mapping->chromStart, mapping->chromEnd);
foundIt = TRUE;
}
dyStringFree(&dyWarn);
}
return foundIt;
}
struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi,
char *hgAppNameIn, struct cart *cart, boolean multiTerm)
/* Return container of tracks and positions (if any) that match term. */
{
struct hgPositions *hgp = NULL, *hgpItem = NULL;
regmatch_t substrs[4];
boolean canonicalSpec = FALSE;
@@ -3273,43 +3275,43 @@
boolean done = FALSE;
// Disable singleBaseSpec for any term that is not hgOfficialChromName
// because that mangles legitimate IDs that are [A-Z]:[0-9]+.
if (singleBaseSpec)
{
singleBaseSpec = relativeFlag = FALSE;
term = cloneString(originalTerm); // restore original term
relStart = relEnd = 0;
}
if (!trackHubDatabase(db))
hgFindSpecGetAllSpecs(db, &shortList, &longList);
for (hfs = shortList; hfs != NULL; hfs = hfs->next)
{
- if (hgFindUsingSpec(db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd,
+ if (hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd,
multiTerm))
{
done = TRUE;
if (! hgFindSpecSetting(hfs, "semiShortCircuit"))
break;
}
}
if (! done)
{
for (hfs = longList; hfs != NULL; hfs = hfs->next)
{
- hgFindUsingSpec(db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd,
+ hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd,
multiTerm);
}
/* Lowe lab additions -- would like to replace these with specs, but
* will leave in for now. */
if (!trackHubDatabase(db))
findTigrGenes(db, term, hgp);
trackHubFindPos(cart, db, term, hgp);
}
hgFindSpecFreeList(&shortList);
hgFindSpecFreeList(&longList);
if (cart != NULL)
{
if(hgpMatchNames == NULL)
hgpMatchNames = newDyString(256);
@@ -3330,31 +3332,31 @@
}
}
cartSetString(cart, "hgFind.matches", hgpMatchNames->string);
}
}
slReverse(&hgp->tableList);
if (multiTerm)
collapseSamePos(hgp);
fixSinglePos(hgp);
if (cart && hgp->singlePos && isNotEmpty(hgp->singlePos->highlight))
cartSetString(cart, "addHighlight", hgp->singlePos->highlight);
return hgp;
}
-void hgPositionsHelpHtml(char *organism, char *database)
+void hgPositionsHelpHtmlCart(struct cart *cart, char *organism, char *database)
/* Display contents of dbDb.htmlPath for database, or print an HTML comment
* explaining what's missing. */
{
char *htmlPath = hHtmlPath(database);
char *htmlString = NULL;
size_t htmlStrLength = 0;
if (strstrNoCase(organism, "zoo"))
webNewSection("About the NISC Comparative Sequencing Program Browser");
else
webNewSection("%s Genome Browser – %s assembly"
" (sequences)",
trackHubSkipHubName(organism),
trackHubSkipHubName(database),
hgTracksName(), cartSessionVarName(), cartSessionId(cart));