4a030195664cbdd1d3c1a4b81b1ed41dbf6d9bc5 angie Fri Feb 15 14:02:58 2019 -0800 Prevent hgFind lib (specifically genomePos -> hgPositionsHtml) from opening a web page; instead, pass up results and warning messages to the calling CGI so it can open the page its own way. refs #22945 hgTracks, hgTables and hgVai used to call findGenomePos{,Web} to resolve positions; hgTables and hgVai had to detect after the fact whether an HTML page had been started (with warnings and/or multiple results). In fact, hgPositionsHtml called webEnd which could cause conflicts with what the CGI was doing afterwards. Now, instead of findGenomePos{,Web} there is hgFindSearch which returns hgp and also warning messages, if any, via a dyString parameter. The calling CGI decides how to open the page if necessary (for hgTracks, it's already open) and displays warnings/multiple results -- or just proceeds as usual with the single position result. diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index ac6219c..8d533c0 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -9,30 +9,31 @@ #include "portable.h" #include "dystring.h" #include "hash.h" #include "cheapcgi.h" #include "htmshell.h" #include "web.h" #include "jksql.h" #include "hdb.h" #include "hui.h" #include "psl.h" #include "genePred.h" #include "genePredReader.h" #include "bed.h" #include "cytoBand.h" #include "cart.h" +#include "errCatch.h" #include "hgFind.h" #include "hgFindSpec.h" #include "hgHgvs.h" #include "snp.h" #include "refLink.h" #include "kgAlias.h" #include "kgProtAlias.h" #include "findKGAlias.h" #include "findKGProtAlias.h" #include "tigrCmrGene.h" #include "minGeneInfo.h" #include "pipeline.h" #include "hgConfig.h" #include "trix.h" #include "trackHub.h" @@ -96,58 +97,30 @@ static void hgPosTableFreeList(struct hgPosTable **pList) /* Free a list of dynamically allocated hgPos's */ { struct hgPosTable *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; hgPosTableFree(&el); } *pList = NULL; } -static void hgPositionsFree(struct hgPositions **pEl) -/* Free up hgPositions. */ -{ -struct hgPositions *el; -if ((el = *pEl) != NULL) - { - freeMem(el->query); - freeMem(el->extraCgi); - hgPosTableFreeList(&el->tableList); - freez(pEl); - } -} - -#if 0 /* not used */ -static void hgPositionsFreeList(struct hgPositions **pList) -/* Free a list of dynamically allocated hgPos's */ -{ -struct hgPositions *el, *next; - -for (el = *pList; el != NULL; el = next) - { - next = el->next; - hgPositionsFree(&el); - } -*pList = NULL; -} -#endif - #define HGPOSRANGESIZE 64 static char *hgPosBrowserRange(struct hgPos *pos, char range[HGPOSRANGESIZE]) /* Convert pos to chrN:123-456 format. If range parameter is NULL it returns * static buffer, otherwise writes and returns range. */ { static char buf[HGPOSRANGESIZE]; if (range == NULL) range = buf; safef(range, HGPOSRANGESIZE, "%s:%d-%d", pos->chrom, pos->chromStart+1, pos->chromEnd); return range; } @@ -1583,144 +1556,162 @@ char query[256]; sqlSafef(query, sizeof(query), "select count(*) from %s where qName = '%s'", table, acc); return (sqlQuickNum(conn, query) > 0); } static int addMrnaPositionTable(char *db, struct hgPositions *hgp, struct slName **pAccList, struct hash *accOrgHash, struct cart *cart, struct sqlConnection *conn, char *hgAppName, boolean aligns, boolean isXeno) /* Generate table of positions that match criteria. * Add to hgp if any found. Return number found */ { struct hgPosTable *table = NULL; -struct hgPos *pos = NULL; struct slName *el = NULL; struct slName *elToFree = NULL; struct dyString *dy = newDyString(256); -char query[512]; -char description[512]; -char product[256]; -char organism[128]; char *ui = getUiUrl(cart); -char *acc = NULL; int organismID = hOrganismID(hgp->database); /* id from mrna organism table */ int alignCount = 0; char hgAppCombiner = (strchr(hgAppName, '?')) ? '&' : '?'; char *mrnaTable = isXeno ? "xenoMrna" : "all_mrna"; boolean mrnaTableExists = hTableExists(hgp->database, mrnaTable); AllocVar(table); /* Examine all accessions to see if they fit criteria for * this table. Add all matching to the position list, and * remove from the accession list */ for (el = *pAccList; el != NULL; el = el->next) { freez(&elToFree); - acc = el->name; + char *acc = el->name; /* check if item matches xeno criterion */ int itemOrganismID = hashIntVal(accOrgHash, acc); if (isXeno == (itemOrganismID == organismID)) continue; /* check if item matches alignment criterion */ if (aligns != (mrnaTableExists && mrnaAligns(conn, mrnaTable, acc))) continue; /* item fits criteria, so enter in table */ + struct hgPos *pos = NULL; AllocVar(pos); slAddHead(&table->posList, pos); pos->name = cloneString(acc); pos->browserName = cloneString(acc); dyStringClear(dy); if (aligns) { dyStringPrintf(dy, "<A HREF=\"%s%cposition=%s", hgAppName, hgAppCombiner, acc); } else { /* display mRNA details page -- need to add dummy CGI variables*/ dyStringPrintf(dy, "<A HREF=\"%s%cg=%s&i=%s&c=0&o=0&l=0&r=0", hgcName(), hgAppCombiner, mrnaTable, acc); } if (ui != NULL) dyStringPrintf(dy, "&%s", ui); dyStringPrintf(dy, "%s\">", hgp->extraCgi); dyStringPrintf(dy, "%s</A>", acc); /* print description for item, or lacking that, the product name */ + char description[512]; safef(description, sizeof(description), "%s", "n/a"); + char query[512]; sqlSafef(query, sizeof(query), "select d.name from %s g,%s d" " where g.acc = '%s' and g.description = d.id", gbCdnaInfoTable, descriptionTable, acc); sqlQuickQuery(conn, query, description, sizeof(description)); if (sameString(description, "n/a")) { /* look for product name */ sqlSafef(query, sizeof(query), "select p.name from %s g,%s p" " where g.acc = '%s' and g.productName = p.id", gbCdnaInfoTable, productNameTable, acc); + char product[256]; sqlQuickQuery(conn, query, product, sizeof(product)); if (!sameString(product, "n/a")) { /* get organism name */ sqlSafef(query, sizeof(query), "select o.name from %s g,%s o" " where g.acc = '%s' and g.organism = o.id", gbCdnaInfoTable, organismTable, acc); + char organism[128]; *organism = 0; sqlQuickQuery(conn, query, organism, sizeof(organism)); safef(description, sizeof(description), "%s%s%s", *organism ? organism : "", *organism ? ", " : "", product); } } if (!sameString(description, "n/a")) /* print description if it has been loaded */ dyStringPrintf(dy, " - %s", description); dyStringPrintf(dy, "\n"); pos->description = cloneString(dy->string); /* remove processed element from accession list */ slRemoveEl(pAccList, el); elToFree = el; } /* fill in table and add to hgp only if it contains results */ alignCount = slCount(table->posList); if (alignCount > 0) { char *organism = hOrganism(hgp->database); /* dbDb organism column */ + if (alignCount == 1) + { + // So far we have not bothered to look up the coordinates because there are almost always + // multiple matches among which the user will have to choose. However, it is possible + // for there to be a unique match (hgwdev 19-02-15, hg38, "elmer" --> U01022). In that + // case we should look up the coordinates so the user doesn't have to click through a page + // with one match leading to another search. + char shortLabel[256]; + safef(shortLabel, sizeof shortLabel, "%s%s %sligned mRNAs", + isXeno ? "Non-" : "", organism, + aligns ? "A" : "Una"); + char *acc = table->posList->name; + struct psl *pslList = getPslFromTable(conn, hgp->database, mrnaTable, acc); + addPslResultToHgp(hgp, hgp->database, mrnaTable, shortLabel, acc, pslList); + alignCount = slCount(hgp->tableList->posList); + } + else + { char title[256]; slReverse(&table->posList); safef(title, sizeof(title), "%s%s %sligned mRNA Search Results", isXeno ? "Non-" : "", organism, aligns ? "A" : "Una"); - freeMem(organism); table->description = cloneString(title); table->name = cloneString(mrnaTable); table->htmlOnePos = mrnaKeysHtmlOnePos; slAddHead(&hgp->tableList, table); } + freeMem(organism); + } freeDyString(&dy); return alignCount; } static boolean findMrnaKeys(char *db, struct hgFindSpec *hfs, char *keys, int limitResults, struct hgPositions *hgp) /* Find mRNA that has keyword in one of its fields. */ { int alignCount; char *tables[] = { productNameTable, geneNameTable, authorTable, tissueTable, cellTable, descriptionTable, developmentTable, }; struct hash *allKeysHash = NULL; struct slName *allKeysList = NULL; @@ -2369,70 +2360,67 @@ if ((row = sqlNextRow(sr)) != NULL) dyStringPrintf(dy, "%s", row[0]); sqlFreeResult(&sr); } if (dy->stringSize > 0) pos->description = cloneString(dy->string); dyStringFree(&dy); } found = TRUE; } } hFreeConn(&conn); return(found); } -static void hgPositionsHtml(char *db, struct hgPositions *hgp, FILE *f, - boolean useWeb, char *hgAppName, struct cart *cart) -/* Write out hgp table as HTML to file. */ +void hgPositionsHtml(char *db, struct hgPositions *hgp, char *hgAppName, struct cart *cart) +/* Write multiple search results as HTML. */ { struct hgPosTable *table; struct hgPos *pos; char *desc; char range[HGPOSRANGESIZE]; char *ui = getUiUrl(cart); char *extraCgi = hgp->extraCgi; char hgAppCombiner = (strchr(hgAppName, '?')) ? '&' : '?'; boolean containerDivPrinted = FALSE; struct trackDb *tdbList = NULL; - -if (useWeb) - { - webStart(cart, db, "Select Position"); - db = cartString(cart, "db"); - } +// This used to be an argument, but only stdout was used: +FILE *f = stdout; for (table = hgp->tableList; table != NULL; table = table->next) { if (table->posList != NULL) { char *tableName = table->name; if (startsWith("all_", tableName)) tableName += strlen("all_"); // clear the tdb cache if this track is a hub track if (isHubTrack(tableName)) tdbList = NULL; struct trackDb *tdb = tdbForTrack(db, tableName, &tdbList); if (!tdb) errAbort("no track for table \"%s\" found via a findSpec", tableName); char *trackName = tdb->track; char *vis = hCarefulTrackOpenVis(db, trackName); boolean excludeTable = FALSE; if(!containerDivPrinted) { fprintf(f, "<div id='hgFindResults'>\n"); + fprintf(f, "<p>Your search resulted in multiple matches. " + "Please select a position:</p>\n"); containerDivPrinted = TRUE; } if (table->htmlStart) table->htmlStart(table, f); else fprintf(f, "<H2>%s</H2><PRE>\n", table->description); for (pos = table->posList; pos != NULL; pos = pos->next) { if (table->htmlOnePos) table->htmlOnePos(table, pos, f); else { char *matches = excludeTable ? "" : pos->browserName; char *encMatches = cgiEncode(matches); hgPosBrowserRange(pos, range); @@ -2478,135 +2466,193 @@ htmTextOut(f, desc); } fprintf(f, "\n"); freeMem(encMatches); } } if (table->htmlEnd) table->htmlEnd(table, f); else fprintf(f, "</PRE>\n"); } } if(containerDivPrinted) fprintf(f, "</div>\n"); - -if (useWeb) - webEnd(); } - -static struct hgPositions *genomePos(char *db, char *spec, char **retChromName, - int *retWinStart, int *retWinEnd, struct cart *cart, boolean showAlias, - boolean useWeb, char *hgAppName) -/* Search for positions in genome that match user query. - * Return an hgp if the query results in a unique position. - * Otherwise display list of positions, put # of positions in retWinStart, - * and return NULL. */ +static struct hgPositions *hgPositionsSearch(char *db, char *spec, + char **retChromName, int *retWinStart, int *retWinEnd, + boolean *retIsMultiTerm, struct cart *cart, + char *hgAppName, char **retMultiChrom, + struct dyString *dyWarn) +/* Search for positions that match spec (possibly ;-separated in which case *retIsMultiTerm is set). + * Return a container of tracks and positions (if any) that match term. If different components + * of a multi-term search land on different chromosomes then *retMultiChrom will be set. */ { struct hgPositions *hgp = NULL; -char *terms[16]; -int termCount = 0; -int i = 0; -boolean multiTerm = FALSE; char *chrom = NULL; int start = INT_MAX; int end = 0; - -termCount = chopByChar(cloneString(spec), ';', terms, ArraySize(terms)); -multiTerm = (termCount > 1); - +char *terms[16]; +int termCount = chopByChar(cloneString(spec), ';', terms, ArraySize(terms)); +boolean multiTerm = (termCount > 1); +if (retIsMultiTerm) + *retIsMultiTerm = multiTerm; +if (retMultiChrom) + *retMultiChrom = NULL; +int i; for (i = 0; i < termCount; i++) { trimSpaces(terms[i]); if (isEmpty(terms[i])) continue; + // Append warning messages to dyWarn, but allow errAborts to continue + struct errCatch *errCatch = errCatchNew(); + if (errCatchStart(errCatch)) hgp = hgPositionsFind(db, terms[i], "", hgAppName, cart, multiTerm); - if (hgp == NULL || hgp->posCount == 0) - { - hgPositionsFree(&hgp); - warn("Sorry, couldn't locate %s in %s %s\n", terms[i], - trackHubSkipHubName(hOrganism(db)), hFreezeDate(db)); - if (multiTerm) - hUserAbort("%s not uniquely determined -- " - "can't do multi-position search.", terms[i]); - *retWinStart = 0; - return NULL; - } - - if ((hgp->singlePos != NULL) && (!showAlias || !hgp->useAlias)) - { - if (chrom != NULL && !sameString(chrom, hgp->singlePos->chrom)) - hUserAbort("Sites occur on different chromosomes: %s, %s.", - chrom, hgp->singlePos->chrom); + errCatchEnd(errCatch); + if (errCatch->gotError) + errAbort("%s", errCatch->message->string); + else if (isNotEmpty(errCatch->message->string)) + dyStringAppend(dyWarn, errCatch->message->string); + errCatchFree(&errCatch); + if (hgp->singlePos != NULL) + { + if (retMultiChrom && chrom != NULL && differentString(chrom, hgp->singlePos->chrom)) + *retMultiChrom = cloneString(chrom); chrom = hgp->singlePos->chrom; if (hgp->singlePos->chromStart < start) start = hgp->singlePos->chromStart; if (hgp->singlePos->chromEnd > end) end = hgp->singlePos->chromEnd; } - else - { - hgPositionsHtml(db, hgp, stdout, useWeb, hgAppName, cart); - if (multiTerm && hgp->posCount != 1) - hUserAbort("%s not uniquely determined (%d locations) -- " - "can't do multi-position search.", - terms[i], hgp->posCount); - *retWinStart = hgp->posCount; - hgp = NULL; + else if (hgp->posCount == 0 || (multiTerm && hgp->posCount > 1)) break; } - } -if (hgp != NULL) - { +if (retChromName) *retChromName = chrom; +if (retWinStart) *retWinStart = start; +if (retWinEnd) *retWinEnd = end; - } return hgp; } +static struct hgPositions *revertPosition(struct cart *cart, char **pPosition, + char **retChrom, int *retStart, int *retEnd, + char *hgAppName, struct dyString *dyWarn) +/* Revert *pPosition to lastPosition (or default position). Return a new hgp for the + * resolved position. Append warnings to dyWarn, errAbort if defaultPos doesn't work. */ +{ +struct hgPositions *hgp = NULL; +boolean isMultiTerm = FALSE; +char *multiDiffChrom = NULL; +char *db = cartString(cart, "db"); +char *lastPosition = cartOptionalString(cart, "lastPosition"); +if (isNotEmpty(lastPosition) && !IS_CART_VAR_EMPTY(lastPosition)) + { + if (startsWith("virt:", lastPosition)) + { + lastPosition = cartUsualString(cart, "nonVirtPosition", hDefaultPos(db)); + } + hgp = hgPositionsSearch(db, lastPosition, retChrom, retStart, retEnd, &isMultiTerm, + cart, hgAppName, &multiDiffChrom, dyWarn); + if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom))) + { + freez(pPosition); + *pPosition = cloneString(lastPosition); + return hgp; + } + else + dyStringPrintf(dyWarn, " Unable to resolve lastPosition '%s'; " + "reverting to default position.", lastPosition); + } +char *defaultPosition = hDefaultPos(db); +hgp = hgPositionsSearch(db, defaultPosition, retChrom, retStart, retEnd, &isMultiTerm, + cart, hgAppName, &multiDiffChrom, dyWarn); +if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom))) + { + freez(pPosition); + *pPosition = cloneString(defaultPosition); + } +else + errAbort("Unable to resolve default position '%s' for database '%s'.", + defaultPosition, db); +return hgp; +} -struct hgPositions *findGenomePos(char *db, char *spec, char **retChromName, - int *retWinStart, int *retWinEnd, struct cart *cart) -/* Search for positions in genome that match user query. - * Return an hgp if the query results in a unique position. - * Otherwise display list of positions, put # of positions in retWinStart, - * and return NULL. */ +static boolean posIsObsolete(char *pos) +/* Return TRUE if pos is genome (or other obsolete keyword). Once upon a time position=genome + * was used to indicate genome-wide search, but now we have an independent option. */ { -return genomePos(db, spec, retChromName, retWinStart, retWinEnd, cart, TRUE, - FALSE, "hgTracks"); +pos = trimSpaces(pos); +return(sameWord(pos, "genome") || sameWord(pos, "hgBatch")); } -struct hgPositions *findGenomePosWeb(char *db, char *spec, char **retChromName, - int *retWinStart, int *retWinEnd, struct cart *cart, - boolean useWeb, char *hgAppName) -/* Search for positions in genome that match user query. - * Use the web library to print out HTML headers if necessary, and use - * hgAppName when forming URLs (instead of "hgTracks"). - * Return an hgp if the query results in a unique position. - * Otherwise display list of positions, put # of positions in retWinStart, - * and return NULL. */ +struct hgPositions *hgFindSearch(struct cart *cart, char **pPosition, + char **retChrom, int *retStart, int *retEnd, + char *hgAppName, struct dyString *dyWarn) +/* If *pPosition is a search term, then try to resolve it to genomic position(s). + * If unable to find a unique position then revert pPosition to lastPosition (or default position). + * Return a container of matching tables and positions. Warnings/errors are appended to dyWarn. */ +{ +struct hgPositions *hgp = NULL; +if (posIsObsolete(*pPosition)) { -struct hgPositions *hgp; -if (useWeb) - webPushErrHandlersCartDb(cart, db); -hgp = genomePos(db, spec, retChromName, retWinStart, retWinEnd, cart, TRUE, - useWeb, hgAppName); -if (useWeb) - webPopErrHandlers(); + hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); + } +else + { + boolean isMultiTerm = FALSE; + char *multiDiffChrom = NULL; + char *db = cartString(cart, "db"); + hgp = hgPositionsSearch(db, *pPosition, retChrom, retStart, retEnd, + &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn); + if (isMultiTerm && isNotEmpty(multiDiffChrom)) + { + dyStringPrintf(dyWarn, "Sites occur on different chromosomes: %s, %s.", + multiDiffChrom, hgp->singlePos->chrom); + hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); + } + else if (hgp->posCount > 1 || + // In weird cases it's possible to get a single result that does not have coords, but + // leads to another search a la multiple results! That happened with genbank keyword + // search ("elmer" in hg19, hg38 Feb. '19). I fixed it but there could be other cases. + (hgp->posCount == 1 && !hgp->singlePos)) + { + if (isMultiTerm) + dyStringPrintf(dyWarn, "%s not uniquely determined (%d locations) -- " + "can't do multi-position search.", + hgp->query, hgp->posCount); + // Revert position in cart (#13009), but don't replace hgp -- hgPositionsHtml will need it. + revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); + } + else if (hgp->posCount == 0) + { + dyStringPrintf(dyWarn, "Sorry, couldn't locate %s in %s %s", hgp->query, + trackHubSkipHubName(hOrganism(db)), hFreezeDate(db)); + hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); + } + if (hgp->singlePos && isEmpty(dyWarn->string)) + { + char position[512]; + safef(position, sizeof(position), "%s:%d-%d", + hgp->singlePos->chrom, hgp->singlePos->chromStart+1, hgp->singlePos->chromEnd); + *pPosition = cloneString(addCommasToPos(NULL, position)); + } + } return hgp; } #if 0 /* not used */ static void noRelative(boolean relativeFlag, int relStart, int relEnd, char *table) { if (relativeFlag) hUserAbort("Sorry, range spec (\":%d-%d\") is not supported for %s.", relStart+1, relEnd, table); } #endif static boolean isBigFileFind(struct hgFindSpec *hfs) @@ -3104,31 +3150,31 @@ trackTable = "refGene"; singlePos(hgp, "HGVS", NULL, trackTable, term, "", mapping->chrom, mapping->chromStart-padding, mapping->chromEnd+padding); // highlight the mapped bases to distinguish from padding hgp->tableList->posList->highlight = addHighlight(cart, db, mapping->chrom, mapping->chromStart, mapping->chromEnd); foundIt = TRUE; } dyStringFree(&dyWarn); } return foundIt; } struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi, char *hgAppNameIn, struct cart *cart, boolean multiTerm) -/* Return table of positions that match term or NULL if none such. */ +/* Return container of tracks and positions (if any) that match term. */ { struct hgPositions *hgp = NULL, *hgpItem = NULL; regmatch_t substrs[4]; boolean canonicalSpec = FALSE; boolean gbrowserSpec = FALSE; boolean lengthSpec = FALSE; boolean singleBaseSpec = FALSE; boolean relativeFlag = FALSE; int relStart = 0, relEnd = 0; hgAppName = hgAppNameIn; // Exhaustive searches can lead to timeouts on CGIs (#11626). // However, hgGetAnn requires exhaustive searches (#11665). // So... set a non-exhaustive search limit on all except hgGetAnn.