67dcce67b7da61d4de46d360754b174294432a8c chmalee Wed Mar 29 16:22:39 2023 -0700 Fix non-knownCanonical results not showing up in a knownGene search, refs #25078 diff --git src/hg/lib/hgFind.c src/hg/lib/hgFind.c index 0312ced..3bd4f03 100644 --- src/hg/lib/hgFind.c +++ src/hg/lib/hgFind.c @@ -1,3922 +1,3966 @@ /* hgFind.c - Find things in human genome annotations. */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "regexHelper.h" #include "obscure.h" #include "hCommon.h" #include "portable.h" #include "dystring.h" #include "hash.h" #include "cheapcgi.h" #include "htmshell.h" #include "web.h" #include "jksql.h" #include "hdb.h" #include "hui.h" #include "psl.h" #include "genePred.h" #include "genePredReader.h" #include "bed.h" #include "cytoBand.h" #include "cart.h" #include "errCatch.h" #include "hgFind.h" #include "hgFindSpec.h" #include "hgHgvs.h" #include "snp.h" #include "refLink.h" #include "kgAlias.h" #include "kgProtAlias.h" #include "findKGAlias.h" #include "findKGProtAlias.h" #include "tigrCmrGene.h" #include "minGeneInfo.h" #include "pipeline.h" #include "hgConfig.h" #include "trix.h" #include "trackHub.h" #include "udc.h" #include "hubConnect.h" #include "bigBedFind.h" #include "genbank.h" #include "chromAlias.h" #include "cart.h" #include "cartTrackDb.h" #include "jsonParse.h" // Exhaustive searches can lead to timeouts on CGIs (#11626). // However, hgGetAnn requires exhaustive searches (#11665). #define NONEXHAUSTIVE_SEARCH_LIMIT 500 #define EXHAUSTIVE_SEARCH_REQUIRED -1 #define SNIPPET_LIMIT 100 char *hgAppName = ""; /* alignment tables to check when looking for mrna alignments */ static char *estTables[] = { "intronEst", "all_est", "xenoEst", NULL }; static char *estLabels[] = { "Spliced ESTs", "ESTs", "Other ESTs", NULL }; static char *mrnaTables[] = { "all_mrna", "xenoMrna", NULL }; static char *mrnaLabels[] = { "mRNAs", "Other mRNAs", NULL }; static struct dyString *hgpMatchNames = NULL; void hgPosFree(struct hgPos **pEl) /* Free up hgPos. */ { struct hgPos *el; if ((el = *pEl) != NULL) { freeMem(el->name); freeMem(el->description); freeMem(el->browserName); freez(pEl); } } static void hgPosFreeList(struct hgPos **pList) /* Free a list of dynamically allocated hgPos's */ { struct hgPos *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; hgPosFree(&el); } *pList = NULL; } static void hgPosTableFree(struct hgPosTable **pEl) /* Free up hgPosTable. */ { struct hgPosTable *el; if ((el = *pEl) != NULL) { freeMem(el->name); hgPosFreeList(&el->posList); freez(pEl); } } static void hgPosTableFreeList(struct hgPosTable **pList) /* Free a list of dynamically allocated hgPos's */ { struct hgPosTable *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; hgPosTableFree(&el); } *pList = NULL; } void searchCategoryFree(struct searchCategory **el) { struct searchCategory *pEl = *el; if (pEl != NULL) { freeMem(pEl->id); freeMem(pEl->name); freeMem(pEl->searchString); freeMem(pEl->label); freeMem(pEl->description); freeMem(pEl->groupName); trixClose(&pEl->trix); slNameFreeList(pEl->parents); slNameFreeList(pEl->errors); } } #define HGPOSRANGESIZE 64 static char *hgPosBrowserRange(struct hgPos *pos, char range[HGPOSRANGESIZE]) /* Convert pos to chrN:123-456 format. If range parameter is NULL it returns * static buffer, otherwise writes and returns range. */ { static char buf[HGPOSRANGESIZE]; if (range == NULL) range = buf; safef(range, HGPOSRANGESIZE, "%s:%d-%d", pos->chrom, pos->chromStart+1, pos->chromEnd); return range; } #if 0 /* not used */ static char *getGrepIndexFile(struct hgFindSpec *hfs) /* Return grepIndex setting (may be relative to hg.conf grepIndex.default), * or NULL if the file doesn't exist. */ { char *indexFile = hgFindSpecSetting(hfs, "grepIndex"); if (indexFile == NULL) return NULL; else if (fileExists(indexFile)) return cloneString(indexFile); else if (! startsWith("/", indexFile)) { char *grepIndexRoot = cfgOption("grepIndex.default"); if (grepIndexRoot != NULL) { char absPath[1024]; safef(absPath, sizeof(absPath), "%s/%s/%s", grepIndexRoot, hGetDb(), indexFile); if (fileExists(absPath)) return cloneString(absPath); } } return NULL; } #endif #define HGFIND_MAX_KEYWORDS 16 #define HGFIND_MAX_CMDWORDS 6 static void makeCmds(char **cmds[HGFIND_MAX_KEYWORDS+1], char **keyWords, int keyCount, char *extraOptions) /* Fill in cmds, an array of command word arrays. */ { int i; for (i=0; i < keyCount; i++) { char **cmd = NULL; int j = 0; AllocArray(cmd, HGFIND_MAX_CMDWORDS); cmd[j++] = "fgrep"; cmd[j++] = "-i"; if (isNotEmpty(extraOptions)) cmd[j++] = extraOptions; cmd[j++] = keyWords[i]; cmd[j++] = NULL; if (j > HGFIND_MAX_CMDWORDS) errAbort("overflow error -- increase HGFIND_MAX_CMDWORDS."); cmds[i] = cmd; } cmds[i] = NULL; } static void freeCmds(char **cmds[], int keyCount) /* Free each element of cmds. */ { int i; for (i=0; i < keyCount; i++) { freez(&(cmds[i])); } } static boolean keyIsPrefix(char *key, char *text) /* Return TRUE only if key is at the start of some word in text. * For short keys (2 or less) it must be whole word. */ { char *s = text; int keyLen = strlen(key); while ((s = stringIn(key, s)) != NULL) { if (s == text || !isalnum(s[-1])) { if (keyLen > 2 || !isalnum(s[keyLen])) return TRUE; } s += 1; } return FALSE; } static boolean keyIsPrefixIgnoreCase(char *key, char *text) /* Case insensitive keyIsPrefix */ { boolean isPrefix; key = cloneString(key); touppers(key); text = cloneString(text); touppers(text); isPrefix = keyIsPrefix(key, text); freeMem(key); freeMem(text); return isPrefix; } static boolean allKeysPrefix(char **keys, int keyCount, char *text) /* Make sure that all keys in text are proper prefixes of a word. */ /* NOTE: this is case sensitive. To ignore case, caller must ensure that * all keys and text have been forced to the same case. */ { int i; for (i=0; i 0) { if (extraOptions == NULL) extraOptions = ""; makeCmds(cmds, keyWords, keyCount, extraOptions); pl = pipelineOpen(cmds, pipelineRead | pipelineNoAbort, indexFile, NULL, 0); lf = pipelineLineFile(pl); verbose(3, "\n***Running this fgrep command with pipeline from %s:\n*** %s\n\n", indexFile, pipelineDesc(pl)); while (lineFileNextReal(lf, &line)) { id = nextWord(&line); rest = skipLeadingSpaces(line); touppers(rest); if (allKeysPrefix(keyWords, keyCount, rest)) { struct slName *idEl = slNameNew(id); slAddHead(&idList, idEl); } } pipelineClose(&pl); /* Takes care of lf too. */ freeCmds(cmds, keyCount); if (verboseLevel() >= 3) { int count = slCount(idList); verbose(3, "*** Got %d results from %s\n\n", count, indexFile); } } freeMem(escapedKey); return idList; } +static struct hgPosTable *findTable(struct hgPosTable *list, char *name) +/* Find first table in list that matches name */ +{ +struct hgPosTable *ret = NULL; +for (ret = list; ret != NULL; ret = ret->next) + { + if (sameString(ret->name, name)) + return ret; + } +return NULL; +} + static struct hgPosTable *addKnownGeneTable(char *db, struct hgPositions *hgp, char *name) /* Create new table for known genes matches, add it to hgp, and return it. */ { +// we may be coming here a second time, after already hitting knownGeneFast +// add non duplicate results to the end of our hgp struct hgPosTable *table; +table = findTable(hgp->tableList, name); +if (table == NULL) + { AllocVar(table); table->searchTime = -1; if (differentString(name, "knownGene")) { char *masterGeneTrack = hdbGetMasterGeneTrack(name); table->description = cloneString(masterGeneTrack); table->name = cloneString(masterGeneTrack); } else { if (hTableExists(db, "knownAttrs")) table->description = cloneString("Gencode Genes"); else if (hTableExists(db, "kgProtMap2")) table->description = cloneString("UCSC Genes"); else table->description = cloneString("Known Genes"); table->name = cloneString("knownGene"); } slAddHead(&hgp->tableList, table); + } return table; } static char *makeIndexPath(char *db, char *name) { /* create the pathname with the knowngene index for a db, result needs to be freed */ char *path = needMem(PATH_LEN); safef(path, PATH_LEN, "/gbdb/%s/%s.ix", db, name); char *newPath = hReplaceGbdb(path); freez(&path); return newPath; } static boolean gotFullText(char *db, char *indexPath) /* Return TRUE if we have full text index. */ { boolean result = FALSE; if (udcExists(indexPath)) result = TRUE; else { warn("%s doesn't exist", indexPath); result = FALSE; } return result; } struct tsrPos /* Little helper structure tying together search result * and pos, used by addKnownGeneItems */ { struct tsrPos *next; /* Next in list. */ struct trixSearchResult *tsr; /* Basically a gene symbol */ struct hgPos *posList; /* Associated list of positions. */ }; static int hgPosCmpCanonical(const void *vhg1, const void *vhg2) // Compares two hgPos structs and returns an integer { const struct hgPos *hg1 = *((struct hgPos**)vhg1); const struct hgPos *hg2 = *((struct hgPos**)vhg2); int diff = trixSearchResultCmp(&hg1->tp->tsr, &hg2->tp->tsr); if (diff == 0) { diff = (hg2->canonical - hg1->canonical); if (diff == 0) { // Prioritize things on main chromosomes diff = chrNameCmpWithAltRandom(hg1->chrom, hg2->chrom); } } return diff; } - static void addKnownGeneItems(struct hgPosTable *table, struct trixSearchResult *tsrList, struct sqlConnection *conn, char *name, struct trix *trix, struct hgFindSpec *hfs) /* Convert tsrList to posList, and hang posList off of table. */ { struct dyString *dy = dyStringNew(0); struct trixSearchResult *tsr; struct hash *hash = hashNew(16); struct hgPos *pos, *posList = NULL; struct tsrPos *tpList = NULL, *tp; struct sqlResult *sr; char **row; int maxToReturn = NONEXHAUSTIVE_SEARCH_LIMIT; char *db = sqlGetDatabase(conn); char *dbName; if (sameString(name, "knownGene")) dbName = db; else dbName = name; if (slCount(tsrList) > maxToReturn) { //warn("Search terms are not very specific, only showing first %d matching UCSC Genes.", // maxToReturn); tsr = slElementFromIx(tsrList, maxToReturn-1); tsr->next = NULL; } // allow supporting snippet file to not exist, if there are no // snippets then the below code will use the description from // kgXref struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { char *context = hgFindSpecSetting(hfs, "searchTrixContext"); if (context && sameString(context, "on")) addSnippetsToSearchResults(tsrList, trix); } errCatchEnd(errCatch); /* Make hash of all search results - one for each known gene ID. */ for (tsr = tsrList; tsr != NULL; tsr = tsr->next) { lmAllocVar(hash->lm, tp); tp->tsr = tsr; slAddHead(&tpList, tp); hashAdd(hash, tsr->itemId, tp); } /* Stream through knownGenes table and make up a pos * for each mapping of each gene matching search. */ sqlDyStringPrintf(dy, "select kg.name,kg.chrom,kg.txStart,kg.txEnd,geneSymbol,description,kc.transcript from %s.knownGene kg " "join %s.kgXref on kg.name = %s.kgXref.kgID " "left join %s.knownCanonical kc on " "kc.transcript = kg.name and kc.chrom=kg.chrom and kc.chromStart = kg.txStart " "where name in (", dbName, dbName, dbName, dbName); for (tsr = tsrList; tsr != NULL; tsr = tsr->next) { sqlDyStringPrintf(dy, "'%s'", tsr->itemId); if (tsr->next != NULL) sqlDyStringPrintf(dy, ","); } sqlDyStringPrintf(dy, ")"); sr = sqlGetResult(conn, dy->string); while ((row = sqlNextRow(sr)) != NULL) { tp = hashFindVal(hash, row[0]); char nameBuf[256]; if (tp == NULL) internalErr(); else { AllocVar(pos); pos->chrom = cloneString(row[1]); pos->chromStart = sqlUnsigned(row[2]); pos->chromEnd = sqlUnsigned(row[3]); pos->tp = tp; slAddHead(&tp->posList, pos); safef(nameBuf, sizeof(nameBuf), "%s (%s)", row[4], row[0]); pos->name = cloneString(nameBuf); pos->browserName = cloneString(row[0]); if (tp->tsr->snippet) pos->description = tp->tsr->snippet; else pos->description = cloneString(row[5]); pos->canonical = row[6] != NULL; } } sqlFreeResult(&sr); /* Hang all pos onto table. */ for (tp = tpList; tp != NULL; tp = tp->next) { struct hgPos *next; for (pos = tp->posList; pos != NULL; pos = next) { next = pos->next; slAddHead(&posList, pos); } } slSort(&posList, hgPosCmpCanonical); +// we may have already been here (ex: queried knownGeneFast first), if so, +// we need to put the new list of results behind the old list, since the +// old results had a higher priority. We can now rank results to knownGene +// by putting what we want users to find first in different search specs +if (table->posList == NULL) table->posList = posList; +else + { + struct hash *prevHash = hashNew(0); + struct hgPos *newPosList = NULL, *next; + for (pos = table->posList; pos != NULL; pos = pos->next) + { + hashAdd(prevHash, pos->name, pos); + } + for (pos = posList; pos != NULL; pos = next) + { + next = pos->next; + if (!hashLookup(prevHash, pos->name)) + slAddHead(&newPosList, pos); + } + slReverse(&newPosList); + table->posList = slCat(table->posList, newPosList); + } hashFree(&hash); dyStringFree(&dy); } static boolean findKnownGeneFullText(char *db, char *term,struct hgPositions *hgp, char *name, char *path, struct hgFindSpec *hfs, boolean measureTiming) /* Look for position in full text. */ { long startTime = clock1000(); boolean gotIt = FALSE; struct trix *trix; struct trixSearchResult *tsrList; char *lowered = cloneString(term); char *keyWords[HGFIND_MAX_KEYWORDS]; int keyCount; struct hgPosTable *table = NULL; trix = trixOpen(path); tolowers(lowered); keyCount = chopLine(lowered, keyWords); tsrList = trixSearch(trix, keyCount, keyWords, tsmExpand); if (tsrList != NULL) { table = addKnownGeneTable(db, hgp, name); struct sqlConnection *conn = hAllocConn(db); addKnownGeneItems(table, tsrList, conn, name, trix, hfs); hFreeConn(&conn); gotIt = TRUE; } freez(&lowered); trixSearchResultFreeList(&tsrList); trixClose(&trix); // This is hacky but rely on knownGene table being at head of list // for timing. TODO: make this more robust if (measureTiming && table != NULL) + { + if (table->searchTime == -1) table->searchTime = clock1000() - startTime; + else + table->searchTime += clock1000() - startTime; + } return gotIt; } static char *getUiUrl(struct cart *cart) /* Get rest of UI from browser. */ { static struct dyString *dy = NULL; static char *s = NULL; if (dy == NULL) { dy = dyStringNew(64); if (cart != NULL && cart->sessionId != NULL) dyStringPrintf(dy, "%s=%s", cartSessionVarName(), cartSessionId(cart)); s = dy->string; } return s; } static void singlePos(struct hgPositions *hgp, char *tableDescription, char *posDescription, char *tableName, char *posName, char *browserName, char *chrom, int start, int end) /* Fill in pos for simple case single position. */ { struct hgPosTable *table; struct hgPos *pos; AllocVar(table); AllocVar(pos); slAddHead(&hgp->tableList, table); table->posList = pos; table->description = cloneString(tableDescription); table->name = cloneString(tableName); pos->chrom = chrom; pos->chromStart = start; pos->chromEnd = end; pos->name = cloneString(posName); pos->description = cloneString(posDescription); pos->browserName = cloneString(browserName); } static void fixSinglePos(struct hgPositions *hgp) /* Fill in posCount and if proper singlePos fields of hgp * by going through tables... */ { int posCount = 0; struct hgPosTable *table; struct hgPos *pos; for (table = hgp->tableList; table != NULL; table = table->next) { for (pos = table->posList; pos != NULL; pos = pos->next) { ++posCount; if (pos->chrom != NULL) hgp->singlePos = pos; } } if (posCount != 1) hgp->singlePos = NULL; hgp->posCount = posCount; } INLINE boolean setStartEndFromQuery(struct sqlConnection *conn, char *query, int *retStart, int *retEnd) /* Run query (which must have start and end as first two output columns) * and collect min start and max end from resulting rows. Return FALSE if no rows. */ { boolean foundIt = FALSE; int minStart = BIGNUM; int maxEnd = 0; struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { int start = sqlUnsigned(row[0]); int end = sqlUnsigned(row[1]); if (start < minStart) minStart = start; if (end > maxEnd) maxEnd = end; foundIt = TRUE; } sqlFreeResult(&sr); if (foundIt) { if (retStart != NULL) *retStart = minStart; if (retEnd != NULL) *retEnd = maxEnd; } return foundIt; } static boolean hgFindChromBand(char *db, char *chrom, char *band, int *retStart, int *retEnd) /* Return start/end of band in chromosome. */ { struct sqlConnection *conn = hAllocConn(db); struct dyString *query = sqlDyStringCreate("select chromStart, chromEnd from cytoBand " "where chrom = '%s' and name = '%s'", chrom, band); boolean foundIt = setStartEndFromQuery(conn, query->string, retStart, retEnd); if (! foundIt) { // No exact match -- if band has a '.', chop at the '.' in case we only have more coarse data. // Otherwise try prefix search. dyStringClear(query); int len = strlen(band); char truncBand[len+1]; safecpy(truncBand, sizeof(truncBand), band); char *dot = strchr(truncBand, '.'); if (dot) { *dot = 0; sqlDyStringPrintf(query, "select chromStart, chromEnd from cytoBand " "where chrom = '%s' and name = '%s'", chrom, truncBand); } else { sqlDyStringPrintf(query, "select chromStart, chromEnd from cytoBand " "where chrom = '%s' and name like '%s%%'", chrom, band); } foundIt = setStartEndFromQuery(conn, query->string, retStart, retEnd); } hFreeConn(&conn); dyStringFree(&query); return foundIt; } boolean hgParseCytoBandName(char *db, char *spec, char **retChromName, char **retBandName) /* Return TRUE if spec is a cytological band name including chromosome short * name. Returns chromosome chrN name and band (with chromosome stripped off) */ { regmatch_t substrArr[5]; // See if spec looks like a "chr"-less chromosome followed by a p or q, then a number, // and possibly a '.' and another number. // Mouse bands may have a letter A-H before the number, and may have no number. // Horse bands may have "pq". if (regexMatchSubstrNoCase(spec, "^(X|Y|[0-9]+)([pq]+[A-H]?([0-9]+(\\.[0-9]+)?)?)$", substrArr, ArraySize(substrArr))) { char chrSpec[PATH_LEN]; safencpy(chrSpec, sizeof(chrSpec), "chr", 3); safencpy(chrSpec+3, sizeof(chrSpec)-3, spec, substrArr[1].rm_eo); char *chromName = hgOfficialChromName(db, chrSpec); if (chromName) { if (retChromName) *retChromName = chromName; if (retBandName) *retBandName = cloneString(spec + substrArr[2].rm_so); return TRUE; } } return FALSE; } boolean hgFindCytoBand(char *db, char *spec, char **retChromName, int *retWinStart, int *retWinEnd) /* Return position associated with cytological band if spec looks to be * in that form. */ { char *bandName; if (!hgParseCytoBandName(db, spec, retChromName, &bandName)) return FALSE; return hgFindChromBand(db, *retChromName, bandName, retWinStart, retWinEnd); } boolean findChromContigPos(char *db, char *name, char **retChromName, int *retWinStart, int *retWinEnd) /* Find position in genome of contig. Look in all chroms. * Don't alter return variables unless found. */ /* NOTE: could probably speed this up by using the chromInfo hashtable */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; char **row; char query[256]; boolean foundIt = FALSE; /* In case this is a scaffold-based assembly, check for unsplit table first: */ if (sqlTableExists(conn, "gold")) { sqlSafef(query, sizeof(query), "select chrom,chromStart,chromEnd from gold where frag = '%s'", name); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { *retChromName = cloneString(row[0]); *retWinStart = atoi(row[1]); *retWinEnd = atoi(row[2]); foundIt = TRUE; } sqlFreeResult(&sr); } else { struct slName *allChroms = hAllChromNames(db); struct slName *chromPtr; for (chromPtr=allChroms; chromPtr != NULL; chromPtr=chromPtr->next) { char tableName[256]; safef(tableName, sizeof(tableName), "%s_gold", chromPtr->name); if (! sqlTableExists(conn, tableName)) continue; sqlSafef(query, sizeof(query), "select chromStart,chromEnd from %s where frag = '%s'", tableName, name); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { *retChromName = cloneString(chromPtr->name); *retWinStart = atoi(row[0]); *retWinEnd = atoi(row[1]); foundIt = TRUE; } sqlFreeResult(&sr); if (foundIt) break; } slNameFreeList(&allChroms); } hFreeConn(&conn); return foundIt; } #if 0 /* not used */ static boolean isAccForm(char *s) /* Returns TRUE if s is of format to be a genbank accession. */ { int len = strlen(s); if (len < 6 || len > 10) return FALSE; if (!isalpha(s[0])) return FALSE; if (!isdigit(s[len-1])) return FALSE; return TRUE; } #endif static boolean mrnaInfo(char *acc, struct sqlConnection *conn, char **mrnaType) /* Sets *mrnaType to mrna/est type for the accession */ /* Ignores returned values if parameters are NULL */ /* Return TRUE if search succeeded, else FALSE */ /* NOTE: caller must free mrnaType */ { char query[256]; struct sqlResult *sr; char **row; int ret; sqlSafef(query, sizeof(query), "select type from %s where acc = '%s'", gbCdnaInfoTable, acc); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { if (mrnaType != NULL) *mrnaType = cloneString(row[0]); ret = TRUE; } else ret = FALSE; sqlFreeResult(&sr); return ret; } boolean isRefSeqAcc(char *acc) /* Return TRUE if acc looks like a RefSeq acc. */ { return regexMatchNoCase(acc, "^(N|X)M_[0-9]{6}[0-9]*$"); } static char *mrnaType(char *db, char *acc) /* Return "mrna" or "est" if acc is mRNA, otherwise NULL. Returns * NULL for refseq mRNAs */ /* for compat with older databases, just look at the seqId to * determine if it's a refseq, don't use table */ /* NOTE: caller must free returned type */ { struct sqlConnection *conn; char *type = NULL; char *ret = NULL; if (isRefSeqAcc(acc)) return NULL; conn = hAllocConn(db); if (mrnaInfo(acc, conn, &type)) ret = type; else ret = NULL; hFreeConn(&conn); return ret; } static void mrnaHtmlStart(struct hgPosTable *table, FILE *f) /* Print preamble to mrna alignment positions. */ { fprintf(f, "

%s

", table->description); fprintf(f, "This aligns in multiple positions. Click on a hyperlink to "); fprintf(f, "go to tracks display at a particular alignment.
"); fprintf(f, "
");
 fprintf(f, " SIZE IDENTITY CHROMOSOME STRAND  START     END       cDNA   START  END  TOTAL\n");
 fprintf(f, "------------------------------------------------------------------------------\n");
 }
 
 static void mrnaHtmlEnd(struct hgPosTable *table, FILE *f)
 /* Print end to mrna alignment positions. */
 {
 fprintf(f, "
"); } static void mrnaHtmlOnePos(struct hgPosTable *table, struct hgPos *pos, FILE *f) /* Print one mrna alignment position. */ { fprintf(f, "%s", pos->description); } char *hCarefulTrackOpenVisCart(struct cart *cart, char *db, char *trackName) /* If track is already in full mode, return full; otherwise, return * hTrackOpenVis. */ { char *vis = cart ? cartOptionalString(cart, trackName) : NULL; if (vis && sameString(vis, "full")) return "full"; else return hTrackOpenVis(db, trackName); } static struct psl *getPslFromTable(struct sqlConnection *conn, char *db, char *table, char *acc) /* If table exists, return PSL for each row with qName = acc. */ { struct psl *pslList = NULL; if (sqlTableExists(conn, table)) { int rowOffset = hOffsetPastBin(db, NULL, table); char query[256]; sqlSafef(query, sizeof(query), "select * from %s where qName = '%s'", table, acc); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row+rowOffset); slAddHead(&pslList, psl); } slReverse(&pslList); sqlFreeResult(&sr); } return pslList; } static void addPslResultToHgp(struct cart *cart, struct hgPositions *hgp, char *db, char *tableName, char *shortLabel, char *acc, struct psl *pslList, boolean measureTiming) /* Create an hgPosTable for the given psl search results, and add it to hgp->tableList. */ { if (pslList == NULL) return; long startTime = clock1000(); struct hgPosTable *table; struct dyString *dy = dyStringNew(1024); struct psl *psl; char hgAppCombiner = (strchr(hgAppName, '?')) ? '&' : '?'; char *ui = getUiUrl(cart); AllocVar(table); table->searchTime = -1; table->htmlStart = mrnaHtmlStart; table->htmlEnd = mrnaHtmlEnd; table->htmlOnePos = mrnaHtmlOnePos; slAddHead(&hgp->tableList, table); dyStringPrintf(dy, "%s Alignments in %s", acc, shortLabel); table->description = cloneString(dy->string); table->name = cloneString(tableName); char *trackName = hGetTrackForTable(db, table->name); slSort(&pslList, pslCmpScore); for (psl = pslList; psl != NULL; psl = psl->next) { struct hgPos *pos; dyStringClear(dy); AllocVar(pos); pos->chrom = hgOfficialChromName(db, psl->tName); pos->chromStart = psl->tStart; pos->chromEnd = psl->tEnd; pos->name = cloneString(psl->qName); pos->browserName = cloneString(psl->qName); dyStringPrintf(dy, "", hgp->extraCgi); dyStringPrintf(dy, "%5d %5.1f%% %9s %s %9d %9d %8s %5d %5d %5d", psl->match + psl->misMatch + psl->repMatch + psl->nCount, 100.0 - pslCalcMilliBad(psl, TRUE) * 0.1, skipChr(psl->tName), psl->strand, psl->tStart + 1, psl->tEnd, psl->qName, psl->qStart+1, psl->qEnd, psl->qSize); dyStringPrintf(dy, "\n"); pos->description = cloneString(dy->string); slAddHead(&table->posList, pos); } slReverse(&table->posList); if (measureTiming) table->searchTime = clock1000() - startTime; dyStringFree(&dy); } static boolean findMrnaPos(struct cart *cart, char *db, char *acc, struct hgPositions *hgp, boolean measureTiming) /* Find MRNA or EST position(s) from accession number. * Look to see if it's an mRNA or EST. Fill in hgp and return * TRUE if it is, otherwise return FALSE. */ /* NOTE: this excludes RefSeq mrna's, as they are currently * handled in findRefGenes(), which is called later in the main function */ { struct sqlConnection *conn = hAllocConn(db); if (!sqlTableExists(conn, gbCdnaInfoTable)) { hFreeConn(&conn); return FALSE; } char *type = mrnaType(db, acc); if (isEmpty(type)) { hFreeConn(&conn); /* this excludes refseq mrna's, and accessions with * invalid column type in mrna table (refseq's and ests) */ return FALSE; } char lowerType[16]; char **tables, **labels, *tableName; boolean gotResults = FALSE; safecpy(lowerType, sizeof(lowerType), type); tolowers(lowerType); if (sameWord(lowerType, "mrna")) { tables = mrnaTables; labels = mrnaLabels; } else if (sameWord(lowerType, "est")) { tables = estTables; labels = estLabels; } else { hFreeConn(&conn); return FALSE; } while ((tableName = *tables++) != NULL) { char *label = *labels++; struct psl *pslList = NULL; if (sameString(tableName, "intronEst") && !sqlTableExists(conn, tableName)) { struct slName *c, *chromList = hChromList(db); char splitTable[HDB_MAX_TABLE_STRING]; for (c = chromList; c != NULL; c = c->next) { safef(splitTable, sizeof(splitTable), "%s_%s", c->name, tableName); struct psl *chrPslList = getPslFromTable(conn, db, splitTable, acc); if (pslList == NULL) pslList = chrPslList; else slCat(pslList, chrPslList); } } else pslList = getPslFromTable(conn, db, tableName, acc); if (pslList == NULL) continue; gotResults = TRUE; addPslResultToHgp(cart, hgp, db, tableName, label, acc, pslList, measureTiming); if (!sameString(tableName, "intronEst")) /* for speed -- found proper table, so don't need to look farther */ break; } hFreeConn(&conn); return gotResults; } static char *getGenbankGrepIndex(char *db, struct hgFindSpec *hfs, char *table, char *suffix) /* If hg.conf has a grepIndex.genbank setting, hfs has a (placeholder) * grepIndex setting, and we can access the index file for table, then * return the filename; else return NULL. */ /* Special case for genbank: Mark completely specifies the root in * hg.conf, so hfs's grepIndex setting value is ignored -- it is used * only to enable grep indexing. So we have multiple ways to turn this * off if necessary: remove hg.conf setting (takes out all dbs), * remove hgFindSpec setting (takes out one db at a time), or remove * a file (takes out one table at a time). */ { char *grepIndexRoot = cfgOption("grepIndex.genbank"); char *hfsSetting = hgFindSpecSetting(hfs, "grepIndex"); if (grepIndexRoot != NULL && hfsSetting != NULL) { char buf[1024]; char *dot; // check to see if table name has database in it if ((dot = strchr(table, '.')) != NULL) { *dot = 0; db = table; table = dot + 1; } safef(buf, sizeof(buf), "%s/%s/%s.%s", grepIndexRoot, db, table, suffix); if (dot) *dot = '.'; if (fileExists(buf)) return cloneString(buf); } return NULL; } static struct slName *genbankGrepQuery(char *indexFile, char *table, char *key) /* grep -i key indexFile, return a list of ids (first word of each line). */ { char *extraOptions = ""; if (sameString(table, "author")) extraOptions = "-w"; return doGrepQuery(indexFile, table, key, extraOptions); } static struct slName *genbankSqlFuzzyQuery(struct sqlConnection *conn, char *table, char *key, int limitResults) /* Perform a fuzzy sql search for %key% in table.name; return list of * corresponding table.id's. */ { struct slName *idList = NULL, *idEl = NULL; if (!isTooCommon(table, key)) { struct sqlResult *sr; char **row; char query[256]; if (limitResults == EXHAUSTIVE_SEARCH_REQUIRED) sqlSafef(query, sizeof(query), "select id,name from %s where name like '%%%s%%'", table, key); else // limit results to avoid CGI timeouts (#11626). sqlSafef(query, sizeof(query), "select id,name from %s where name like '%%%s%%' limit %d", table, key, limitResults); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { touppers(row[1]); if (keyIsPrefix(key, row[1])) { idEl = newSlName(row[0]); slAddHead(&idList, idEl); } } sqlFreeResult(&sr); } return idList; } static char *skipDb(char *tableName) /* retun a pointer past the datbase part of the table name (if any) */ { char *dot = tableName; if ((dot = strchr(tableName, '.')) == NULL) return tableName; return dot + 1; } static boolean gotAllGenbankGrepIndexFiles(char *db, struct hgFindSpec *hfs, char *tables[], int tableCount) /* Return TRUE if all tables have a readable genbank index file. */ { int i; for (i=0; i < tableCount; i++) if (! getGenbankGrepIndex(db, hfs, tables[i], "idName")) return FALSE; return TRUE;; } static void findHitsToTables(char *db, struct hgFindSpec *hfs, char *key, int limitResults, char *tables[], int tableCount, struct hash **retHash, struct slName **retList) /* Return all unique accessions that match any table. */ // Modified to return only the first 500 hits because of CGI timeouts { struct slName *list = NULL, *el; struct hash *hash = newHash(0); struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; char **row; char *field; int i; int rowCount = 0; // Excessively broad searches were leading to CGI timeouts (#11626). for (i = 0; inext) { /* don't check srcDb to exclude refseq for compat with older tables */ struct dyString *query = sqlDyStringCreate( "select acc, organism from %s where %s = '%s' " " and type = 'mRNA'", gbCdnaInfoTable, skipDb(field), idEl->name); // limit results to avoid CGI timeouts (#11626). if (limitResults != EXHAUSTIVE_SEARCH_REQUIRED) sqlDyStringPrintf(query, " limit %d", limitResults); sr = sqlGetResult(conn, dyStringContents(query)); dyStringFree(&query); while ((row = sqlNextRow(sr)) != NULL) { char *acc = row[0]; /* will use this later to distinguish xeno mrna */ int organismID = sqlUnsigned(row[1]); if (!isRefSeqAcc(acc) && !hashLookup(hash, acc)) { el = newSlName(acc); slAddHead(&list, el); hashAddInt(hash, acc, organismID); // limit results to avoid CGI timeouts (#11626). if (rowCount++ > limitResults && limitResults != EXHAUSTIVE_SEARCH_REQUIRED) break; } } sqlFreeResult(&sr); } slFreeList(&idList); } hFreeConn(&conn); slReverse(&list); *retList = list; *retHash = hash; } static void andHits(struct hash *aHash, struct slName *aList, struct hash *bHash, struct slName *bList, struct hash **retHash, struct slName **retList) /* Return hash/list that is intersection of lists a and b. */ { struct slName *list = NULL, *el, *newEl; struct hash *hash = newHash(0); for (el = aList; el != NULL; el = el->next) { char *name = el->name; int organismID = hashIntValDefault(bHash, name, -1); if (organismID >= 0 && !hashLookup(hash, name)) { newEl = newSlName(name); slAddHead(&list, newEl); hashAddInt(hash, name, organismID); } } *retHash = hash; *retList = list; } static void mrnaKeysHtmlOnePos(struct hgPosTable *table, struct hgPos *pos, FILE *f) { fprintf(f, "%s", pos->description); } static boolean mrnaAligns(struct sqlConnection *conn, char *table, char *acc) /* Return TRUE if accession is in the designated alignment table (for speed, * this assumes that we've already checked that the table exists) */ { char query[256]; sqlSafef(query, sizeof(query), "select count(*) from %s where qName = '%s'", table, acc); return (sqlQuickNum(conn, query) > 0); } static int addMrnaPositionTable(char *db, struct hgPositions *hgp, struct slName **pAccList, struct hash *accOrgHash, struct cart *cart, struct sqlConnection *conn, char *hgAppName, boolean aligns, boolean isXeno, boolean measureTiming) /* Generate table of positions that match criteria. * Add to hgp if any found. Return number found */ { struct hgPosTable *table = NULL; struct slName *el = NULL; struct slName *elToFree = NULL; long startTime = clock1000(); struct dyString *dy = dyStringNew(256); char *ui = getUiUrl(cart); int organismID = hOrganismID(hgp->database); /* id from mrna organism table */ int alignCount = 0; char hgAppCombiner = (strchr(hgAppName, '?')) ? '&' : '?'; char *mrnaTable = isXeno ? "xenoMrna" : "all_mrna"; boolean mrnaTableExists = hTableExists(hgp->database, mrnaTable); AllocVar(table); table->searchTime = -1; /* Examine all accessions to see if they fit criteria for * this table. Add all matching to the position list, and * remove from the accession list */ for (el = *pAccList; el != NULL; el = el->next) { freez(&elToFree); char *acc = el->name; /* check if item matches xeno criterion */ int itemOrganismID = hashIntVal(accOrgHash, acc); if (isXeno == (itemOrganismID == organismID)) continue; /* check if item matches alignment criterion */ if (aligns != (mrnaTableExists && mrnaAligns(conn, mrnaTable, acc))) continue; /* item fits criteria, so enter in table */ struct hgPos *pos = NULL; AllocVar(pos); slAddHead(&table->posList, pos); pos->name = cloneString(acc); pos->browserName = cloneString(acc); dyStringClear(dy); if (aligns) { dyStringPrintf(dy, "", hgp->extraCgi); dyStringPrintf(dy, "%s", acc); /* print description for item, or lacking that, the product name */ char description[1028]; safef(description, sizeof(description), "%s", "n/a"); char query[512]; sqlSafef(query, sizeof(query), "select d.name from %s g,%s d" " where g.acc = '%s' and g.description = d.id", gbCdnaInfoTable, descriptionTable, acc); sqlQuickQuery(conn, query, description, sizeof(description)); if (sameString(description, "n/a")) { /* look for product name */ sqlSafef(query, sizeof(query), "select p.name from %s g,%s p" " where g.acc = '%s' and g.productName = p.id", gbCdnaInfoTable, productNameTable, acc); char product[256]; sqlQuickQuery(conn, query, product, sizeof(product)); if (!sameString(product, "n/a")) { /* get organism name */ sqlSafef(query, sizeof(query), "select o.name from %s g,%s o" " where g.acc = '%s' and g.organism = o.id", gbCdnaInfoTable, organismTable, acc); char organism[128]; *organism = 0; sqlQuickQuery(conn, query, organism, sizeof(organism)); safef(description, sizeof(description), "%s%s%s", *organism ? organism : "", *organism ? ", " : "", product); } } if (!sameString(description, "n/a")) /* print description if it has been loaded */ dyStringPrintf(dy, " - %s", description); dyStringPrintf(dy, "\n"); pos->description = cloneString(dy->string); /* remove processed element from accession list */ slRemoveEl(pAccList, el); elToFree = el; } /* fill in table and add to hgp only if it contains results */ alignCount = slCount(table->posList); if (alignCount > 0) { char *organism = hOrganism(hgp->database); /* dbDb organism column */ if (alignCount == 1) { // So far we have not bothered to look up the coordinates because there are almost always // multiple matches among which the user will have to choose. However, it is possible // for there to be a unique match (hgwdev 19-02-15, hg38, "elmer" --> U01022). In that // case we should look up the coordinates so the user doesn't have to click through a page // with one match leading to another search. char shortLabel[256]; safef(shortLabel, sizeof shortLabel, "%s%s %sligned mRNAs", isXeno ? "Non-" : "", organism, aligns ? "A" : "Una"); char *acc = table->posList->name; struct psl *pslList = getPslFromTable(conn, hgp->database, mrnaTable, acc); addPslResultToHgp(cart, hgp, hgp->database, mrnaTable, shortLabel, acc, pslList, measureTiming); if (hgp->tableList) alignCount = slCount(hgp->tableList->posList); else alignCount = 0; } else { char title[256]; slReverse(&table->posList); safef(title, sizeof(title), "%s%s %sligned mRNA Search Results", isXeno ? "Non-" : "", organism, aligns ? "A" : "Una"); table->description = cloneString(title); table->name = cloneString(mrnaTable); table->htmlOnePos = mrnaKeysHtmlOnePos; slAddHead(&hgp->tableList, table); } freeMem(organism); } if (measureTiming) table->searchTime = clock1000() - startTime; dyStringFree(&dy); return alignCount; } static boolean findMrnaKeys(struct cart *cart, char *db, struct hgFindSpec *hfs, char *keys, int limitResults, struct hgPositions *hgp, boolean measureTiming) /* Find mRNA that has keyword in one of its fields. */ { int alignCount; char *tables[] = { productNameTable, geneNameTable, authorTable, tissueTable, cellTable, descriptionTable, developmentTable, }; struct hash *allKeysHash = NULL; struct slName *allKeysList = NULL; struct sqlConnection *conn = hAllocConn(db); boolean found = FALSE; /* If we can use grep to search all tables, then use piped grep to * implement implicit "AND" of multiple keys. */ if (gotAllGenbankGrepIndexFiles(db, hfs, tables, ArraySize(tables))) { findHitsToTables(db, hfs, keys, limitResults, tables, ArraySize(tables), &allKeysHash, &allKeysList); } else { struct hash *oneKeyHash = NULL; struct slName *oneKeyList = NULL; struct hash *andedHash = NULL; struct slName *andedList = NULL; char *words[32]; char buf[512]; int wordCount; int i; safef(buf, sizeof(buf), "%s", keys); wordCount = chopLine(buf, words); if (wordCount == 0) return FALSE; found = TRUE; for (i=0; i 10 || !isdigit(c)) return FALSE; } if (size==0) return FALSE; return TRUE; } static void addRefLinks(struct sqlConnection *conn, struct dyString *query, struct refLink **pList) /* Query database and add returned refLinks to head of list. */ { struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct refLink *rl = refLinkLoad(row); slAddHead(pList, rl); } sqlFreeResult(&sr); } static void addRefLinkAccs(struct sqlConnection *conn, struct slName *accList, struct refLink **pList) /* Query database and add returned refLinks to head of list. */ { struct slName *accEl = NULL; struct sqlResult *sr = NULL; char **row = NULL; char query[256]; for (accEl = accList; accEl != NULL; accEl = accEl->next) { sqlSafef(query, sizeof(query), "select * from %s where mrnaAcc = '%s'", refLinkTable, accEl->name); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct refLink *rl = refLinkLoad(row); slAddHead(pList, rl); } sqlFreeResult(&sr); } } static boolean findRefGenes(char *db, struct hgFindSpec *hfs, char *spec, struct hgPositions *hgp, boolean measureTiming) /* Look up refSeq genes in table. */ { long startTime = clock1000(); struct sqlConnection *conn = hAllocConn(db); struct dyString *ds = dyStringNew(256); struct refLink *rlList = NULL, *rl; boolean gotRefLink = sqlTableExists(conn, refLinkTable); boolean found = FALSE; char *specNoVersion = cloneString(spec); // chop off the version number, e.g. "NM_000454.4 ", // but if spec starts with "." like ".stuff" then specNoVersion is entirely empty. (void) chopPrefix(specNoVersion); if (gotRefLink && isNotEmpty(specNoVersion)) { if (startsWith("NM_", specNoVersion) || startsWith("NR_", specNoVersion) || startsWith("XM_", specNoVersion)) { sqlDyStringPrintf(ds, "select * from %s where mrnaAcc = '%s'", refLinkTable, specNoVersion); addRefLinks(conn, ds, &rlList); } else if (startsWith("NP_", specNoVersion) || startsWith("XP_", specNoVersion)) { sqlDyStringPrintf(ds, "select * from %s where protAcc = '%s'", refLinkTable, specNoVersion); addRefLinks(conn, ds, &rlList); } else if (isUnsignedInt(specNoVersion)) { sqlDyStringPrintf(ds, "select * from %s where locusLinkId = '%s'", refLinkTable, specNoVersion); addRefLinks(conn, ds, &rlList); dyStringClear(ds); sqlDyStringPrintf(ds, "select * from %s where omimId = '%s'", refLinkTable,specNoVersion); addRefLinks(conn, ds, &rlList); } else { char *indexFile = getGenbankGrepIndex(db, hfs, refLinkTable, "mrnaAccProduct"); sqlDyStringPrintf(ds, "select * from %s where name like '%s%%' limit %d", refLinkTable, specNoVersion, NONEXHAUSTIVE_SEARCH_LIMIT); addRefLinks(conn, ds, &rlList); if (indexFile != NULL) { struct slName *accList = doGrepQuery(indexFile, refLinkTable, specNoVersion, NULL); addRefLinkAccs(conn, accList, &rlList); } else { dyStringClear(ds); sqlDyStringPrintf(ds, "select * from %s where product like '%%%s%%' limit %d", refLinkTable, specNoVersion, NONEXHAUSTIVE_SEARCH_LIMIT); addRefLinks(conn, ds, &rlList); } } } if (rlList != NULL) { struct hgPosTable *table = NULL; struct hash *hash = newHash(8); for (rl = rlList; rl != NULL; rl = rl->next) { char where[64]; struct genePredReader *gpr; struct genePred *gp; /* Don't return duplicate mrna accessions */ if (hashFindVal(hash, rl->mrnaAcc)) { hashAdd(hash, rl->mrnaAcc, rl); continue; } hashAdd(hash, rl->mrnaAcc, rl); sqlSafef(where, sizeof where, "name = '%s'", rl->mrnaAcc); gpr = genePredReaderQuery(conn, hfs->searchTable, where); while ((gp = genePredReaderNext(gpr)) != NULL) { struct hgPos *pos = NULL; AllocVar(pos); if (table == NULL) { char desc[256]; AllocVar(table); table->searchTime = -1; table->name = cloneString(hfs->searchTable); if (startsWith("xeno", hfs->searchTable)) safef(desc, sizeof(desc), "Non-%s RefSeq Genes", hOrganism(db)); else safef(desc, sizeof(desc), "RefSeq Genes"); table->description = cloneString(desc); slAddHead(&hgp->tableList, table); } slAddHead(&table->posList, pos); pos->name = cloneString(rl->name); pos->browserName = cloneString(rl->mrnaAcc); dyStringClear(ds); dyStringPrintf(ds, "(%s) %s", rl->mrnaAcc, rl->product); pos->description = cloneString(ds->string); pos->chrom = hgOfficialChromName(db, gp->chrom); pos->chromStart = gp->txStart; pos->chromEnd = gp->txEnd; genePredFree(&gp); found = TRUE; } genePredReaderFree(&gpr); } if (table != NULL && measureTiming) table->searchTime = clock1000() - startTime; refLinkFreeList(&rlList); freeHash(&hash); } dyStringFree(&ds); hFreeConn(&conn); return(found); } /* Lowe lab additions */ static void addTigrCmrGenes(struct sqlConnection *conn, struct dyString *query, struct tigrCmrGene **pList) /* Query database and add returned tigrCmrGenes to head of list. */ { struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct tigrCmrGene *rl = tigrCmrGeneLoad(row); slAddHead(pList, rl); } sqlFreeResult(&sr); } static void findTigrGenes(char *db, char *spec, struct hgPositions *hgp) /* Look up TIGR and Genbank genes from keyword */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; struct dyString *ds = dyStringNew(256); char **row; struct hgPosTable *table = NULL; struct hgPos *pos; struct bed *bed; struct tigrCmrGene *tigrList = NULL, *tigr; /* struct minGeneInfo *gbList = NULL, *gb; */ boolean gotTIGRkeys = sqlTableExists(conn, "tigrCmrORFsInfo"); if (gotTIGRkeys) { sqlDyStringPrintf(ds, "select * from tigrCmrORFsInfo where tigrCommon like '%%%s%%'", spec); addTigrCmrGenes(conn, ds, &tigrList); dyStringClear(ds); sqlDyStringPrintf(ds, "select * from tigrCmrORFsInfo where tigrMainRole like '%%%s%%'", spec); addTigrCmrGenes(conn, ds, &tigrList); dyStringClear(ds); sqlDyStringPrintf(ds, "select * from tigrCmrORFsInfo where tigrSubRole like '%%%s%%'", spec); addTigrCmrGenes(conn, ds, &tigrList); dyStringClear(ds); } if (tigrList != NULL) { struct hash *hash = newHash(8); AllocVar(table); slAddHead(&hgp->tableList, table); table->description = cloneString("TIGR CMR Genes"); table->name = cloneString("tigrORFsCmr"); for (tigr = tigrList; tigr != NULL; tigr = tigr->next) { /* Don't return duplicate TIGR CMR accessions */ if (hashFindVal(hash, tigr->name)) { hashAdd(hash, tigr->name, tigr); continue; } hashAdd(hash, tigr->name, tigr); dyStringClear(ds); sqlDyStringPrintf(ds, "select * from tigrCmrORFs where name = '%s'", tigr->name); sr = sqlGetResult(conn, ds->string); while ((row = sqlNextRow(sr)) != NULL) { bed = bedLoadN(row+1,6); AllocVar(pos); slAddHead(&table->posList, pos); pos->name = cloneString(tigr->name); pos->browserName = cloneString(tigr->name); dyStringClear(ds); dyStringPrintf(ds, "%s; %s; %s", tigr->tigrCommon, tigr->tigrMainRole, tigr->tigrSubRole); pos->description = cloneString(ds->string); pos->chrom = hgOfficialChromName(db, bed->chrom); pos->chromStart = bed->chromStart; pos->chromEnd = bed->chromEnd; bedFree(&bed); } sqlFreeResult(&sr); } tigrCmrGeneFreeList(&tigrList); freeHash(&hash); } dyStringFree(&ds); hFreeConn(&conn); } /* End of Lowe Lab stuff */ static boolean findGenePredPattern(char *db, char *pattern, struct hgPositions *hgp, char *tableName, struct hgPosTable *table) /* Look for position pattern in gene prediction table. */ { struct sqlConnection *conn; struct sqlResult *sr = NULL; struct dyString *query; char **row; boolean ok = FALSE; struct hgPos *pos = NULL; if (!hTableExists(db, tableName)) return FALSE; conn = hAllocConn(db); query = dyStringNew(256); sqlDyStringPrintf(query, "SELECT chrom, txStart, txEnd, name FROM %s WHERE name LIKE '%s'", tableName, pattern); sr = sqlGetResult(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { if (ok == FALSE) { ok = TRUE; if (table == NULL) { AllocVar(table); struct dyString *desc = dyStringNew(256); dyStringPrintf(desc, "%s Gene Predictions", tableName); table->description = dyStringCannibalize(&desc); table->name = cloneString(tableName); slAddHead(&hgp->tableList, table); } } AllocVar(pos); pos->chrom = hgOfficialChromName(db, row[0]); pos->chromStart = atoi(row[1]); pos->chromEnd = atoi(row[2]); pos->name = cloneString(row[3]); pos->browserName = cloneString(row[3]); slAddHead(&table->posList, pos); } if (table != NULL) slReverse(&table->posList); dyStringFree(&query); sqlFreeResult(&sr); hFreeConn(&conn); return ok; } static void addUniqYeastGene(char *db, struct hash *uniqHash, struct sqlConnection *conn, char *query, struct hgPositions *hgp, char *geneTable, struct hgPosTable **pTable) /* Execute query which returns a single row, and add genes. */ { struct sqlResult *sr = sqlGetResult(conn, query); char **row; struct hgPosTable *table = *pTable; while ((row = sqlNextRow(sr)) != NULL) { char *id = row[0]; if (!hashLookup(uniqHash, id)) { hashAdd(uniqHash, id, NULL); if (table == NULL) { AllocVar(table); table->name = geneTable; table->description = "Genes from Sacchromyces Genome Database"; slAddHead(&hgp->tableList, table); *pTable = table; } findGenePredPattern(db, id, hgp, geneTable, table); } } sqlFreeResult(&sr); } static boolean findYeastGenes(char *db, char *pattern, struct hgPositions *hgp) /* Scan yeast-specific tables. */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; char **row, query[256]; struct hgPosTable *table = NULL; boolean found = FALSE; if (hTableExists(db, "sgdGene")) { struct hash *uniqHash = newHash(0); boolean gotNames = FALSE, gotDescriptions = FALSE; sqlSafef(query, sizeof(query), "select name from sgdGene where name = '%s'", pattern); addUniqYeastGene(db, uniqHash, conn, query, hgp, "sgdGene", &table); if (hTableExists(db, "sgdToName")) { gotNames = TRUE; sqlSafef(query, sizeof(query), "select name from sgdToName where value like '%s%%'", pattern); addUniqYeastGene(db, uniqHash, conn, query, hgp, "sgdGene", &table); } if (hTableExists(db, "sgdDescription")) { gotDescriptions = TRUE; sqlSafef(query, sizeof(query), "select name from sgdDescription where description like '%%%s%%'", pattern); addUniqYeastGene(db, uniqHash, conn, query, hgp, "sgdGene", &table); } hashFree(&uniqHash); /* Add descriptions to table. */ if (table != NULL) { struct hgPos *pos; for (pos = table->posList; pos != NULL; pos = pos->next) { struct dyString *dy = dyStringNew(1024); if (gotNames) { sqlSafef(query, sizeof(query), "select value from sgdToName where name = '%s'", pos->name); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) dyStringPrintf(dy, "(%s) ", row[0]); sqlFreeResult(&sr); } if (gotDescriptions) { sqlSafef(query, sizeof(query), "select description from sgdDescription where name = '%s'", pos->name); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) dyStringPrintf(dy, "%s", row[0]); sqlFreeResult(&sr); } if (dy->stringSize > 0) pos->description = cloneString(dy->string); dyStringFree(&dy); } found = TRUE; } } hFreeConn(&conn); return(found); } void hgPositionsHtml(char *db, struct hgPositions *hgp, char *hgAppName, struct cart *cart) /* Write multiple search results as HTML. */ { struct hgPosTable *table; struct hgPos *pos; char *desc; char range[HGPOSRANGESIZE]; char *ui = getUiUrl(cart); char *extraCgi = hgp->extraCgi; char hgAppCombiner = (strchr(hgAppName, '?')) ? '&' : '?'; boolean containerDivPrinted = FALSE; struct trackDb *tdbList = NULL; // This used to be an argument, but only stdout was used: FILE *f = stdout; if (hgp->posCount == 0) { fprintf(f, "
\n"); fprintf(f, "

No additional items found

"); fprintf(f, "
\n"); return; } for (table = hgp->tableList; table != NULL; table = table->next) { if (table->posList != NULL) { char *tableName = table->name; if (startsWith("all_", tableName)) tableName += strlen("all_"); // clear the tdb cache if this track is a hub track if (isHubTrack(tableName)) tdbList = NULL; struct trackDb *tdb = tdbForTrack(db, tableName, &tdbList); if (!tdb) errAbort("no track for table \"%s\" found via a findSpec", tableName); char *trackName = tdb->track; char *vis = hCarefulTrackOpenVisCart(cart, db, trackName); boolean excludeTable = FALSE; if(!containerDivPrinted) { fprintf(f, "
\n"); if (hgp->singlePos == NULL) // we might be called with only one result fprintf(f, "

Your search resulted in multiple matches. " "Please select a position:

\n"); containerDivPrinted = TRUE; } if (table->htmlStart) table->htmlStart(table, f); else fprintf(f, "

%s

\n", table->description);
 	for (pos = table->posList; pos != NULL; pos = pos->next)
 	    {
 	    if (table->htmlOnePos)
 	        table->htmlOnePos(table, pos, f);
 	    else
 		{
 		char *matches = excludeTable ? "" : pos->browserName;
 		char *encMatches = cgiEncode(matches);
 		hgPosBrowserRange(pos, range);
 		fprintf(f, "parent)
 		    {
 		    if (tdbIsSuperTrackChild(tdb))
 			fprintf(f, "%s=show&", tdb->parent->track);
 		    else
 			{
 			// tdb is a subtrack of a composite or a view
 			fprintf(f, "%s_sel=1&", trackName);
 			fprintf(f, "%s_sel=1&", tdb->parent->track);
 			}
 		    }
                 if (isNotEmpty(pos->highlight))
                     {
                     char *encHighlight = cgiEncode(pos->highlight);
                     fprintf(f, "addHighlight=%s&", encHighlight);
                     freeMem(encHighlight);
                     }
 		fprintf(f, "hgFind.matches=%s,\">", encMatches);
 		// Bold canonical genes. 
 		if(pos->canonical) {
 		    fprintf(f, "");
 		    }
 		htmTextOut(f, pos->name);
 		if(pos->canonical) {
 		    fprintf(f, "");
 		    }
 		fprintf(f, " at %s", range);
 		desc = pos->description;
 		if (desc)
 		    {
 		    fprintf(f, " - ");
 		    htmTextOut(f, desc);
 		    }
 		fprintf(f, "\n");
 		freeMem(encMatches);
 		}
 	    }
 	if (table->htmlEnd) 
 	    table->htmlEnd(table, f);
 	else
 	    fprintf(f, "
\n"); } } if(containerDivPrinted) { if (hgp->shortCircuited) { char *queryString = getenv("QUERY_STRING"); char *addString = "&noShort=1"; if (isEmpty(queryString)) addString = "noShort=1"; fprintf(f, " More results...", hgAppName, queryString, addString); } fprintf(f, "
\n"); } } static struct hgPositions *hgPositionsSearch(char *db, char *spec, char **retChromName, int *retWinStart, int *retWinEnd, boolean *retIsMultiTerm, struct cart *cart, char *hgAppName, char **retMultiChrom, struct dyString *dyWarn) /* Search for positions that match spec (possibly ;-separated in which case *retIsMultiTerm is set). * Return a container of tracks and positions (if any) that match term. If different components * of a multi-term search land on different chromosomes then *retMultiChrom will be set. */ { struct hgPositions *hgp = NULL; char *chrom = NULL; int start = INT_MAX; int end = 0; char *terms[16]; int termCount = chopByChar(cloneString(spec), ';', terms, ArraySize(terms)); boolean multiTerm = (termCount > 1); boolean measureTiming = cartUsualBoolean(cart, "measureTiming", FALSE); if (retIsMultiTerm) *retIsMultiTerm = multiTerm; if (retMultiChrom) *retMultiChrom = NULL; int i; for (i = 0; i < termCount; i++) { trimSpaces(terms[i]); if (isEmpty(terms[i])) continue; // Append warning messages to dyWarn, but allow errAborts to continue struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) hgp = hgPositionsFind(db, terms[i], "", hgAppName, cart, multiTerm, measureTiming, NULL); errCatchEnd(errCatch); if (errCatch->gotError) errAbort("%s", errCatch->message->string); else if (isNotEmpty(errCatch->message->string)) dyStringAppend(dyWarn, errCatch->message->string); errCatchFree(&errCatch); if (hgp->singlePos != NULL) { if (retMultiChrom && chrom != NULL && differentString(chrom, hgp->singlePos->chrom)) *retMultiChrom = cloneString(chrom); chrom = hgp->singlePos->chrom; if (hgp->singlePos->chromStart < start) start = hgp->singlePos->chromStart; if (hgp->singlePos->chromEnd > end) end = hgp->singlePos->chromEnd; } else if (hgp->posCount == 0 || (multiTerm && hgp->posCount > 1)) break; } if (retChromName) *retChromName = (chrom == NULL) ? chrom : hgOfficialChromName(db, chrom); if (retWinStart) *retWinStart = start; if (retWinEnd) *retWinEnd = end; return hgp; } static struct hgPositions *revertPosition(struct cart *cart, char **pPosition, char **retChrom, int *retStart, int *retEnd, char *hgAppName, struct dyString *dyWarn) /* Revert *pPosition to lastPosition (or default position). Return a new hgp for the * resolved position. Append warnings to dyWarn, errAbort if defaultPos doesn't work. */ { struct hgPositions *hgp = NULL; boolean isMultiTerm = FALSE; char *multiDiffChrom = NULL; char *db = cartString(cart, "db"); char *lastPosition = cartOptionalString(cart, "lastPosition"); if (isNotEmpty(lastPosition) && !IS_CART_VAR_EMPTY(lastPosition)) { if (startsWith(MULTI_REGION_CHROM, lastPosition) || startsWith(OLD_MULTI_REGION_CHROM, lastPosition)) { lastPosition = cartUsualString(cart, "nonVirtPosition", hDefaultPos(db)); } hgp = hgPositionsSearch(db, lastPosition, retChrom, retStart, retEnd, &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn); if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom))) { freez(pPosition); *pPosition = cloneString(lastPosition); return hgp; } else dyStringPrintf(dyWarn, " Unable to resolve lastPosition '%s'; " "reverting to default position.", lastPosition); } char *defaultPosition = hDefaultPos(db); hgp = hgPositionsSearch(db, defaultPosition, retChrom, retStart, retEnd, &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn); if (hgp->singlePos && !(isMultiTerm && isNotEmpty(multiDiffChrom))) { freez(pPosition); *pPosition = cloneString(defaultPosition); } else errAbort("Unable to resolve default position '%s' for database '%s'.", defaultPosition, db); return hgp; } static boolean posIsObsolete(char *pos) /* Return TRUE if pos is genome (or other obsolete keyword). Once upon a time position=genome * was used to indicate genome-wide search, but now we have an independent option. */ { pos = trimSpaces(pos); return(sameWord(pos, "genome") || sameWord(pos, "hgBatch")); } struct hgPositions *hgFindSearch(struct cart *cart, char **pPosition, char **retChrom, int *retStart, int *retEnd, char *hgAppName, struct dyString *dyWarn) /* If *pPosition is a search term, then try to resolve it to genomic position(s). * If unable to find a unique position then revert pPosition to lastPosition (or default position). * Return a container of matching tables and positions. Warnings/errors are appended to dyWarn. */ { struct hgPositions *hgp = NULL; if (posIsObsolete(*pPosition)) { hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); } else { boolean isMultiTerm = FALSE; char *multiDiffChrom = NULL; char *db = cartString(cart, "db"); hgp = hgPositionsSearch(db, *pPosition, retChrom, retStart, retEnd, &isMultiTerm, cart, hgAppName, &multiDiffChrom, dyWarn); if (isMultiTerm && isNotEmpty(multiDiffChrom)) { dyStringPrintf(dyWarn, "Sites occur on different chromosomes: %s, %s.", multiDiffChrom, hgp->singlePos->chrom); hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); } else if (hgp->posCount > 1 || // In weird cases it's possible to get a single result that does not have coords, but // leads to another search a la multiple results! That happened with genbank keyword // search ("elmer" in hg19, hg38 Feb. '19). I fixed it but there could be other cases. (hgp->posCount == 1 && !hgp->singlePos)) { if (isMultiTerm) dyStringPrintf(dyWarn, "%s not uniquely determined (%d locations) -- " "can't do multi-position search.", hgp->query, hgp->posCount); // Revert position in cart (#13009), but don't replace hgp -- hgPositionsHtml will need it. revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); } else if (hgp->posCount == 0) { dyStringPrintf(dyWarn, "Sorry, couldn't locate %s in %s %s", hgp->query, trackHubSkipHubName(hOrganism(db)), hFreezeDate(db)); hgp = revertPosition(cart, pPosition, retChrom, retStart, retEnd, hgAppName, dyWarn); } if (hgp->singlePos && isEmpty(dyWarn->string)) { char position[512]; safef(position, sizeof(position), "%s:%d-%d", hgp->singlePos->chrom, hgp->singlePos->chromStart+1, hgp->singlePos->chromEnd); *pPosition = cloneString(addCommasToPos(NULL, position)); } } return hgp; } #if 0 /* not used */ static void noRelative(boolean relativeFlag, int relStart, int relEnd, char *table) { if (relativeFlag) hUserAbort("Sorry, range spec (\":%d-%d\") is not supported for %s.", relStart+1, relEnd, table); } #endif static boolean isBigFileFind(struct hgFindSpec *hfs) /* is this a find on a big* file? */ { return sameString(hfs->searchType, "bigBed") || sameString(hfs->searchType, "bigPsl") || sameString(hfs->searchType, "bigBarChart") || sameString(hfs->searchType, "bigGenePred"); } static boolean findBigBed(struct cart *cart, char *db, struct hgFindSpec *hfs, char *spec, struct hgPositions *hgp, boolean measureTiming) /* Look up items in bigBed */ { struct trackDb *tdb = tdbFindOrCreate(db, NULL, hfs->searchTable); return findBigBedPosInTdbList(cart, db, tdb, spec, hgp, hfs, measureTiming); } boolean searchSpecial(struct cart *cart, char *db, struct hgFindSpec *hfs, char *term, int limitResults, struct hgPositions *hgp, boolean relativeFlag, int relStart, int relEnd, boolean *retFound, boolean measureTiming) /* Handle searchTypes for which we have special code. Return true if * we have special code. Set retFind according to whether we find term. */ { boolean isSpecial = TRUE; boolean found = FALSE; char *upcTerm = cloneString(term); touppers(upcTerm); if (startsWith("knownGene", hfs->searchType)) { char *knownDatabase = hdbDefaultKnownDb(db); char *name = (sameString(knownDatabase, db)) ? "knownGene" : knownDatabase; char *indexPath = hReplaceGbdb(hgFindSpecSetting(hfs, "searchTrix")); if (indexPath == NULL) indexPath = makeIndexPath(db, name); if (gotFullText(db, indexPath)) found = findKnownGeneFullText(db, term, hgp, name, indexPath, hfs, measureTiming); } else if (sameString(hfs->searchType, "refGene")) { found = findRefGenes(db, hfs, term, hgp, measureTiming); } else if (isBigFileFind(hfs)) { found = findBigBed(cart, db, hfs, term, hgp, measureTiming); } else if (sameString(hfs->searchType, "cytoBand")) { char *chrom; int start, end; found = hgFindCytoBand(db, term, &chrom, &start, &end); if (found) singlePos(hgp, hfs->searchDescription, NULL, hfs->searchTable, term, term, chrom, start, end); } else if (sameString(hfs->searchType, "gold")) { char *chrom; int start, end; found = findChromContigPos(db, term, &chrom, &start, &end); if (found) { if (relativeFlag) { end = start + relEnd; start = start + relStart; } singlePos(hgp, hfs->searchDescription, NULL, hfs->searchTable, term, term, chrom, start, end); } } else if (sameString(hfs->searchType, "mrnaAcc")) { found = findMrnaPos(cart, db, term, hgp, measureTiming); } else if (sameString(hfs->searchType, "mrnaKeyword")) { found = findMrnaKeys(cart, db, hfs, upcTerm, limitResults, hgp, measureTiming); } else if (sameString(hfs->searchType, "sgdGene")) { found = findYeastGenes(db, term, hgp); } else { isSpecial = FALSE; } *retFound = found; freeMem(upcTerm); return(isSpecial); } static struct slPair *getXrefTerms(char *db, struct hgFindSpec *hfs, char *term) /* Search xrefTable for xrefQuery with term. Return all matching names. */ { struct slPair *xrefList = NULL, *xrefPtr = NULL; struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; char **row; boolean isFuzzy = sameWord(hfs->searchMethod, "fuzzy"); // TODO we could re-work this better to get to upstream sql creation and // then be able to avoid this complexity:? // hfs->refTable sometimes contains a comma-separated table list // in trackDb.ra and hgFindSpec table. // example from human/hg19/trackDb.ra // xrefTable kgXref, ucscRetroInfo5 // xrefQuery select ucscRetroInfo5.name, spDisplayID from %s where spDisplayID like '%s%%' and kgName = kgID // NOTE this also goes into hgFindSpec table as hti fields hfs->xrefTable and hfs->xrefQuery. // hfs->xrefTable is sometimes a comma-separated list of fields // xrefTable = [hgFixed.refLink, ucscRetroInfo8] struct dyString *dy = dyStringNew(256); sqlCkIl(xrefTableSafe, hfs->xrefTable) // Replace the %s with %-s if it has not already been done in the upstream source .ra files // it would be better to do this upstream in .ra and hgFindSpec char *update = replaceChars(hfs->xrefQuery, " from %s ", " from %-s "); // this patches older values that still need it. sqlDyStringPrintf(dy, update, xrefTableSafe, term); sqlDyStringPrintf(dy, " limit %d", NONEXHAUSTIVE_SEARCH_LIMIT); freeMem(update); sr = sqlGetResult(conn, dy->string); dyStringFree(&dy); while ((row = sqlNextRow(sr)) != NULL) { if (!isFuzzy || keyIsPrefixIgnoreCase(term, row[1])) { xrefPtr = slPairNew(cloneString(row[1]), cloneString(row[0])); slAddHead(&xrefList, xrefPtr); } } sqlFreeResult(&sr); hFreeConn(&conn); slReverse(&xrefList); if (xrefList == NULL && hgFindSpecSetting(hfs, "searchBoth") != NULL) xrefList = slPairNew(cloneString(""), cloneString(term)); return(xrefList); } char *addHighlight(char *db, char *chrom, unsigned start, unsigned end) /* Return a string that can be assigned to the cart var addHighlight, to add a yellow highlight * at db.chrom:start+1-end for search results. */ { char *color = "fcfcac"; struct dyString *dy = dyStringCreate("%s.%s:%u-%u#%s", db, chrom, start+1, end, color); return dyStringCannibalize(&dy); } static boolean doQuery(char *db, struct hgFindSpec *hfs, char *xrefTerm, char *term, struct hgPositions *hgp, boolean relativeFlag, int relStart, int relEnd, boolean multiTerm, int limitResults, boolean measureTiming) /* Perform a query as specified in hfs, assuming table existence has been * checked and xref'ing has been taken care of. */ { struct slName *tableList = hSplitTableNames(db, hfs->searchTable); struct slName *tPtr = NULL; struct hgPosTable *table = NULL; struct hgPos *pos = NULL; struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; char **row = NULL; char *termPrefix = hgFindSpecSetting(hfs, "termPrefix"); char *paddingStr = hgFindSpecSetting(hfs, "padding"); int padding = isEmpty(paddingStr) ? 0 : atoi(paddingStr); boolean found = FALSE; char *description = NULL; char buf[2048]; long startTime = clock1000(); if (isNotEmpty(termPrefix) && startsWith(termPrefix, term)) term += strlen(termPrefix); if (isEmpty(term)) return(FALSE); if (isNotEmpty(hfs->searchDescription)) truncatef(buf, sizeof(buf), "%s", hfs->searchDescription); else safef(buf, sizeof(buf), "%s", hfs->searchTable); description = cloneString(buf); if (hgp->tableList != NULL && sameString(hgp->tableList->name, hfs->searchTable) && sameString(hgp->tableList->description, description)) table = hgp->tableList; for (tPtr = tableList; tPtr != NULL; tPtr = tPtr->next) { // we do not have control over the original sql since it comes from trackDb.ra or elsewhere? struct dyString *query = sqlDyStringCreate(hfs->query, tPtr->name, term); if (limitResults != EXHAUSTIVE_SEARCH_REQUIRED) sqlDyStringPrintf(query, " limit %d", limitResults); sr = sqlGetResult(conn, dyStringContents(query)); dyStringFree(&query); while ((row = sqlNextRow(sr)) != NULL) { if(table == NULL) { AllocVar(table); table->searchTime = -1; table->description = description; table->name = cloneString(hfs->searchTable); slAddHead(&hgp->tableList, table); } found = TRUE; AllocVar(pos); pos->chrom = cloneString(row[0]); pos->chromStart = atoi(row[1]); pos->chromEnd = atoi(row[2]); if (isNotEmpty(xrefTerm)) truncatef(buf, sizeof(buf), xrefTerm); else safef(buf, sizeof(buf), "%s%s", termPrefix ? termPrefix : "", row[3]); pos->name = cloneString(buf); pos->browserName = cloneString(row[3]); if (isNotEmpty(xrefTerm)) { safef(buf, sizeof(buf), "(%s%s)", termPrefix ? termPrefix : "", row[3]); pos->description = cloneString(buf); } if (relativeFlag && (pos->chromStart + relEnd) <= pos->chromEnd) { pos->chromEnd = pos->chromStart + relEnd; pos->chromStart = pos->chromStart + relStart; } else if (padding > 0 && !multiTerm) { // highlight the item bases to distinguish from padding pos->highlight = addHighlight(db, pos->chrom, pos->chromStart, pos->chromEnd); int chromSize = hChromSize(db, pos->chrom); pos->chromStart -= padding; pos->chromEnd += padding; if (pos->chromStart < 0) pos->chromStart = 0; if (pos->chromEnd > chromSize) pos->chromEnd = chromSize; } slAddHead(&table->posList, pos); } } if (table != NULL) slReverse(&table->posList); sqlFreeResult(&sr); hFreeConn(&conn); slFreeList(&tableList); if (measureTiming && table) table->searchTime += clock1000() - startTime; return(found); } static boolean hgFindUsingSpec(struct cart *cart, char *db, struct hgFindSpec *hfs, char *term, int limitResults, struct hgPositions *hgp, boolean relativeFlag, int relStart, int relEnd, boolean multiTerm, boolean measureTiming) /* Perform the search described by hfs on term. If successful, put results * in hgp and return TRUE. (If not, don't modify hgp.) */ { struct slPair *xrefList = NULL, *xrefPtr = NULL; boolean found = FALSE; if (hfs == NULL || term == NULL || hgp == NULL) errAbort("NULL passed to hgFindUsingSpec.\n"); if (strlen(term)<2 && ! (sameString(hfs->searchName, "knownGene") || sameString(hfs->searchName, "flyBaseGeneSymbolOneLetter"))) return FALSE; if (isNotEmpty(hfs->termRegex) && ! regexMatchNoCase(term, hfs->termRegex)) return(FALSE); if ((!(sameString(hfs->searchType, "mrnaKeyword") || sameString(hfs->searchType, "mrnaAcc"))) && !isBigFileFind(hfs)) { if (! hTableOrSplitExists(db, hfs->searchTable)) return(FALSE); } if (isNotEmpty(hfs->searchType) && searchSpecial(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, &found, measureTiming)) return(found); if (isNotEmpty(hfs->xrefTable)) { struct sqlConnection *conn = hAllocConn(db); // NOTE hfs->xrefTable can sometimes contain a comma-separated table list, // rather than just a single table. char *tables = replaceChars(hfs->xrefTable, ",", " "); boolean exists = sqlTablesExist(conn, tables); hFreeConn(&conn); freeMem(tables); if (! exists) return(FALSE); xrefList = getXrefTerms(db, hfs, term); } else xrefList = slPairNew(cloneString(""), cloneString(term)); for (xrefPtr = xrefList; xrefPtr != NULL; xrefPtr = xrefPtr->next) { found |= doQuery(db, hfs, xrefPtr->name, (char *)xrefPtr->val, hgp, relativeFlag, relStart, relEnd, multiTerm, limitResults, measureTiming); } slPairFreeValsAndList(&xrefList); return(found); } /* Support these formats for range specifiers. Note the ()'s around chrom, * start and end portions for substring retrieval: */ char *canonicalRangeExp = "^([[:alnum:]._#\\-]+)" "[[:space:]]*:[[:space:]]*" "([-0-9,]+)" "[[:space:]]*[-_][[:space:]]*" "([0-9,]+)$"; char *gbrowserRangeExp = "^([[:alnum:]._#\\-]+)" "[[:space:]]*:[[:space:]]*" "([0-9,]+)" "[[:space:]]*\\.\\.[[:space:]]*" "([0-9,]+)$"; char *lengthRangeExp = "^([[:alnum:]._#\\-]+)" "[[:space:]]*:[[:space:]]*" "([0-9,]+)" //"[[:space:]]*\\^[[:space:]]*" "[[:space:]]*\\+[[:space:]]*" "([0-9,]+)$"; char *bedRangeExp = "^([[:alnum:]._#\\-]+)" "[[:space:]]+" "([0-9,]+)" "[[:space:]]+" "([0-9,]+)$"; char *sqlRangeExp = "^([[:alnum:]._#\\-]+)" "[[:space:]]*\\|[[:space:]]*" "([0-9,]+)" "[[:space:]]*\\|[[:space:]]*" "([0-9,]+)$"; char *singleBaseExp = "^([[:alnum:]._#\\-]+)" "[[:space:]]*:[[:space:]]*" "([0-9,]+)$"; static void collapseSamePos(struct hgPositions *hgp) /* If all positions in all tables in hgp are the same position, then * trim all but the first table/pos. */ { struct hgPosTable *firstTable = NULL, *table; struct hgPos *firstPos = NULL, *pos; char *chrom = NULL; int start=0, end=0; for (table = hgp->tableList; table != NULL; table = table->next) { for (pos = table->posList; pos != NULL; pos = pos->next) { if (pos->chrom != NULL) { if (chrom == NULL) { chrom = pos->chrom; start = pos->chromStart; end = pos->chromEnd; firstTable = table; firstPos = pos; } else if (! (sameString(chrom, pos->chrom) && start == pos->chromStart && end == pos->chromEnd)) return; } } } if (firstPos) { hgp->tableList = firstTable; hgp->tableList->posList = firstPos; hgPosTableFreeList(&(hgp->tableList->next)); hgPosFreeList(&(hgp->tableList->posList->next)); } } static boolean searchKnownCanonical(char *db, char *term, struct hgPositions *hgp) /* Look for term in kgXref.geneSymbol, and if found, put knownCanonical coords and * knownGene.name in hgp. */ { boolean foundIt = FALSE; char *knownDatabase = hdbDefaultKnownDb(db); struct sqlConnection *conn = hAllocConn(knownDatabase); if (sqlTableExists(conn, "knownGene") && sqlTableExists(conn, "knownCanonical") && sqlTableExists(conn, "kgXref")) { char query[512]; sqlSafef(query, sizeof(query), "select chrom,chromStart,chromEnd,kgID from knownCanonical,kgXref " "where kgXref.geneSymbol = '%s' and kgXref.kgId = knownCanonical.transcript;", term); struct sqlResult *sr = sqlGetResult(conn, query); char **row; if ((row = sqlNextRow(sr)) != NULL) { char buffer[4096]; safef(buffer, sizeof buffer, "%s.knownGene", knownDatabase); singlePos(hgp, "GENCODE Genes", term, cloneString(buffer), row[3], row[3], cloneString(row[0]), atoi(row[1]), atoi(row[2])); foundIt = TRUE; } sqlFreeResult(&sr); } hFreeConn(&conn); return foundIt; } static struct hgFindSpec *hfsFind(struct hgFindSpec *list, char *name) /* Return first element of list that matches name. */ { struct hgFindSpec *el; for (el = list; el != NULL; el = el->next) if (sameString(name, el->searchName)) return el; return NULL; } static void myLoadFindSpecs(char *db, struct searchCategory *categories, struct hgFindSpec **quickList, struct hgFindSpec **fullList) /* Get all find specs where the search table or search name is what we want */ { struct hgFindSpec *shortList = NULL, *longList = NULL; struct dyString *clause = dyStringNew(0); struct searchCategory *categ; struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; struct slName *tbl, *tblList = hTrackDbList(); for (tbl = tblList; tbl != NULL; tbl = tbl->next) { dyStringClear(clause); char *tblName = replaceChars(tbl->name, "trackDb", "hgFindSpec"); if (hTableExists(db, tblName)) { sqlDyStringPrintf(clause, "select * from %s where searchName in (", tblName); for (categ = categories; categ != NULL; categ = categ->next) { sqlDyStringPrintf(clause, "'%s'", categ->id); if (categ->next) sqlDyStringPrintf(clause, ","); } sqlDyStringPrintf(clause, ") or searchTable in ("); for (categ = categories; categ != NULL; categ = categ->next) { if (sameString(categ->id, "mrna")) sqlDyStringPrintf(clause, "'all_mrna'"); else sqlDyStringPrintf(clause, "'%s'", categ->id); if (categ->next) sqlDyStringPrintf(clause, ","); } sqlDyStringPrintf(clause, ")"); sr = sqlGetResult(conn, dyStringContents(clause)); char **row = NULL; while ((row = sqlNextRow(sr)) != NULL) { struct hgFindSpec *hfs = hgFindSpecLoad(row); if (hfs->shortCircuit) slAddHead(&shortList, hfs); else slAddHead(&longList, hfs); } sqlFreeResult(&sr); } } hFreeConn(&conn); dyStringFree(&clause); if (quickList != NULL) { slSort(&shortList, hgFindSpecPriCmp); *quickList = shortList; } else hgFindSpecFreeList(&shortList); if (fullList != NULL) { slSort(&longList, hgFindSpecPriCmp); *fullList = longList; } else hgFindSpecFreeList(&longList); } static bool subtrackEnabledInTdb(struct trackDb *subTdb) /* Return TRUE unless the subtrack was declared with "subTrack ... off". */ { bool enabled = TRUE; char *words[2]; char *setting; if ((setting = trackDbLocalSetting(subTdb, "parent")) != NULL) { if (chopLine(cloneString(setting), words) >= 2) if (sameString(words[1], "off")) enabled = FALSE; } else return subTdb->visibility != tvHide; return enabled; } static bool isSubtrackVisible(struct cart *cart, struct trackDb *tdb) /* Has this subtrack not been deselected in hgTrackUi or declared with * * "subTrack ... off"? -- assumes composite track is visible. */ { boolean overrideComposite = (NULL != cartOptionalString(cart, tdb->track)); bool enabledInTdb = subtrackEnabledInTdb(tdb); char option[1024]; safef(option, sizeof(option), "%s_sel", tdb->track); boolean enabled = cartUsualBoolean(cart, option, enabledInTdb); if (overrideComposite) enabled = TRUE; return enabled; } static bool isParentVisible(struct cart *cart, struct trackDb *tdb) // Are this track's parents visible? { if (tdb->parent == NULL) return TRUE; if (!isParentVisible(cart, tdb->parent)) return FALSE; char *cartVis = cartOptionalString(cart, tdb->parent->track); boolean vis; if (cartVis != NULL) vis = differentString(cartVis, "hide"); else if (tdbIsSuperTrack(tdb->parent)) vis = tdb->parent->isShow; else vis = tdb->parent->visibility != tvHide; return vis; } static bool isTrackVisible(struct cart *cart, struct trackDb *tdb) /* Is a track visible? */ { boolean isVisible = FALSE; if (tdb->parent == NULL) { char *cartVis = cartOptionalString(cart, tdb->track); if (cartVis == NULL) isVisible = tdb->visibility != tvHide; else isVisible = differentString(cartVis, "hide"); } else if (isParentVisible(cart, tdb) && isSubtrackVisible(cart, tdb)) isVisible = TRUE; return isVisible; } struct hash *hgFindTrackHash = NULL; struct hash *hgFindGroupHash = NULL; int cmpCategories(const void *a, const void *b) /* Compare two categories for uniquifying */ { struct searchCategory *categA = *(struct searchCategory **)a; struct searchCategory *categB = *(struct searchCategory **)b; return strcmp(categA->id, categB->id); } static struct searchableTrack *getSearchableTracks(struct cart *cart, char *database) /* Return the list of all tracks with an hgFindSpec available */ { if (trackHubDatabase(database)) return NULL; struct searchableTrack *ret = NULL; struct sqlConnection *conn = hAllocConn(database); struct slName *tbl, *tblList = hTrackDbList(); for (tbl = tblList; tbl != NULL; tbl = tbl->next) { char *tdbName, *findSpecName; tdbName = tbl->name; findSpecName = replaceChars(tbl->name, "trackDb", "hgFindSpec"); if (hTableExists(database, findSpecName)) { char query[1024]; sqlSafef(query, sizeof(query), "select distinct " "tableName,shortLabel,longLabel,searchDescription,priority " "from %s join %s on " "%s.searchTable=%s.tableName or " "%s.searchName=%s.tableName or " "%s.searchTable = concat('all_', %s.tableName) " "where searchTable !='knownGene' and searchName != 'knownGene' " "order by priority,shortLabel", findSpecName, tdbName, findSpecName, tdbName, findSpecName, tdbName, findSpecName, tdbName); struct sqlResult *sr = sqlGetResult(conn, query); char **row = NULL; struct trackDb *tdb = NULL; while ( (row = sqlNextRow(sr)) != NULL) { if ( (tdb = hashFindVal(hgFindTrackHash, row[0])) != NULL) { struct searchableTrack *track = NULL; AllocVar(track); track->track = cloneString(row[0]); track->shortLabel = cloneString(row[1]); track->longLabel = cloneString(row[2]); track->description = cloneString(row[3]); track->visibility = isTrackVisible(cart, tdb); track->priority = sqlDouble(row[4]); track->grp = tdb->grp; slAddHead(&ret, track); } } sqlFreeResult(&sr); } } hFreeConn(&conn); slReverse(&ret); return ret; } static struct trackDb *hubCategoriesToTdbList(struct searchCategory *categories) /* Make a list of trackDbs for the selected tracks */ { struct trackDb *ret = NULL; struct searchCategory *categ; for (categ = categories; categ != NULL; categ = categ->next) { if (startsWith("hub_", categ->id)) slAddHead(&ret, categ->tdb); } return ret; } static struct searchCategory *searchCategoryFromTdb(struct trackDb *tdb, struct searchableTrack *searchTrack, int visibility) /* Make a searchCategory from a leaf tdb, use searchCategory settings if possible, as they * have more accurate visibilities and labels */ { struct searchCategory *category = NULL; AllocVar(category); category->tdb = tdb; if (sameString(tdb->track, "mrna") || sameString(tdb->track, "est")) { char tableName[10]; safef(tableName, sizeof(tableName), "all_%s", tdb->track); category->id = cloneString(tableName); } else category->id = tdb->track; category->name = searchTrack != NULL ? searchTrack->shortLabel : tdb->shortLabel; category->visibility = searchTrack != NULL ? searchTrack->visibility: tdb->visibility; if (visibility > 0) // for when tdb is from a hub track category->visibility = visibility; category->priority = searchTrack != NULL ? searchTrack->priority : tdb->priority; if (slCount(category->errors) == 0) { category->label = searchTrack != NULL ? searchTrack->shortLabel: tdb->shortLabel; category->description = searchTrack != NULL ? searchTrack->description: tdb->longLabel; category->groupName = searchTrack != NULL ? searchTrack->grp: tdb->grp; category->parents = NULL; while (tdb->parent) { slNameAddHead(&category->parents, tdb->parent->track); slNameAddHead(&category->parents, tdb->parent->shortLabel); tdb = tdb->parent; } if (category->parents) slReverse(&category->parents); } return category; } struct trix *openStaticTrix(char *trixName) /* Open up a trix file in hgFixed */ { char trixPath[PATH_LEN]; safef(trixPath, sizeof(trixPath), "%s%s.ix", hgFixedTrix, trixName); struct trix *ret = trixOpen(trixPath); return ret; } static struct searchCategory *makeTrixCategory(char *indexName, char *database) /* Fill out the fields for a category filter for the UI. */ { struct searchCategory *category = NULL; AllocVar(category); struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { if (sameString(indexName, "publicHubs")) { category->id = "publicHubs"; category->name = "publicHubs"; category->label = "Public Hubs"; category->description = "Track names and descriptions of public hubs"; category->priority = 4.0; category->trix = openStaticTrix(publicHubsTrix); } else if (sameString(indexName, "helpDocs")) { category->id = "helpDocs"; category->name = "helpDocs"; category->label = "Help Pages"; category->description = "Help documentation"; category->visibility = 1; category->priority = 5.0; category->trix = openStaticTrix(helpDocsTrix); } else if (startsWith("trackDb", indexName)) { category->id = "trackDb"; category->name = "trackDb"; category->visibility = 1; category->priority = 3.0; char trixPath[PATH_LEN]; safef(trixPath, sizeof(trixPath), "%s Track Labels/Descriptions", database); category->label = cloneString(trixPath); category->description = "Track names or descriptions"; safef(trixPath, sizeof(trixPath), "/gbdb/%s/trackDb.ix", database); category->trix = trixOpen(trixPath); } } errCatchEnd(errCatch); if (errCatch->gotError) slAddHead(&category->errors, slNameNew(errCatch->message->string)); return category; } static struct searchCategory *makeCategoryForTrack(struct trackDb *tdb, struct searchableTrack *searchTrack) /* Make a searchCategory from a track. If the track is any type of container, * we will recurse down all the way to subtracks, as only leaf nodes have searchSpecs */ { struct trackDb *sub; struct searchCategory *ret = NULL; if (tdb->subtracks) { for (sub = tdb->subtracks; sub != NULL; sub = sub->next) { if (sub->subtracks) { struct searchCategory *temp = makeCategoryForTrack(sub, searchTrack); if (temp) slAddHead(&ret, temp); } else { struct searchCategory *temp = searchCategoryFromTdb(sub, NULL, 0); if (temp) slAddHead(&ret, temp); } } } else ret = searchCategoryFromTdb(tdb, searchTrack, 0); return ret; } struct searchCategory *makeCategory(struct cart *cart, char *categName, struct searchableTrack *searchTrack, char *db, struct hash *groupHash) /* Make a single searchCategory, unless the requested categName is a container * track or track group (for example all phenotype tracks), in which case we make * categories for each subtrack */ { struct searchCategory *ret = NULL; if (sameString(categName, "helpDocs")) ret = makeTrixCategory("helpDocs", NULL); else if (sameString(categName, "publicHubs")) ret = makeTrixCategory("publicHubs", NULL); else if (startsWith("trackDb", categName)) ret = makeTrixCategory("trackDb", db); else if (hashLookup(groupHash, categName) != NULL) { // add all tracks for this track grouping struct hashEl *hel, *helList = hashElListHash(hgFindTrackHash); for (hel = helList; hel != NULL; hel = hel->next) { struct trackDb *tdb = hel->val; if (isTdbSearchable(tdb) && sameString(tdb->grp, categName)) { struct searchCategory *temp = makeCategoryForTrack(tdb, searchTrack); if (temp) slAddHead(&ret, temp); } } } else { // must be a track, ret will contain subtracks if necessary struct trackDb *tdb = hashFindVal(hgFindTrackHash, categName); if (tdb) ret = makeCategoryForTrack(tdb, searchTrack); } return ret; } struct searchCategory *getCategsForNonDb(struct cart *cart, char *db, struct hash *groupHash) /* Return the default categories for all databases */ { struct searchCategory *ret = NULL; struct searchCategory *kgCategory = makeCategory(cart, "knownGene", NULL, db, groupHash); if (kgCategory) slAddHead(&ret, kgCategory); struct searchCategory *helpDocCategory = makeCategory(cart, "helpDocs", NULL, db, groupHash); if (helpDocCategory) slAddHead(&ret, helpDocCategory); struct searchCategory *publicHubCategory = makeCategory(cart, "publicHubs", NULL, db, groupHash); if (publicHubCategory) slAddHead(&ret, publicHubCategory); char trackDbIndexName[2048]; safef(trackDbIndexName, sizeof(trackDbIndexName), "trackDb%s", db); struct searchCategory *tdbCategory = makeCategory(cart, trackDbIndexName, NULL, db, groupHash); if (tdbCategory) slAddHead(&ret, tdbCategory); return ret; } static struct searchableTrack *makeGenbankSearchableTrack(struct trackDb *tdb, struct cart *cart) { struct searchableTrack *track = NULL; AllocVar(track); track->track = cloneString(tdb->track); track->shortLabel = cloneString(tdb->shortLabel); track->longLabel = cloneString(tdb->longLabel); track->description = cloneString(tdb->longLabel); track->visibility = isTrackVisible(cart, tdb); track->priority = tdb->priority; track->grp = tdb->grp; return track; } struct searchCategory *getCategsForDatabase(struct cart *cart, char *db, struct hash *groupHash) /* Get the track categories to search for a particular database */ { struct searchCategory *ret = NULL; struct trackDb *tdb = NULL; struct searchableTrack *track = NULL, *searchableTracks = getSearchableTracks(cart, db); for (track = searchableTracks; track != NULL; track = track->next) { struct searchCategory *trackCategory = makeCategory(cart, track->track, track, db, groupHash); if (trackCategory) { if (ret) slCat(&ret, trackCategory); else ret = trackCategory; } } // only the all_mrna table will have a valid struct searchableTrack added, we need // to make some for the rest of the searchable genbank mrna/est tracks: char *table = NULL; char **tables = mrnaTables; while ((table = *tables++) != NULL) { if (!sameString(table, "all_mrna") && (tdb = hashFindVal(hgFindTrackHash, table)) != NULL) { struct searchableTrack *tmp = makeGenbankSearchableTrack(tdb, cart); struct searchCategory *category = makeCategory(cart, tmp->track, tmp, db, groupHash); if (category) slAddHead(&ret, category); } } tables = estTables; while ((table = *tables++) != NULL) { if ( (tdb = hashFindVal(hgFindTrackHash, table)) != NULL) { struct searchableTrack *tmp = makeGenbankSearchableTrack(tdb, cart); struct searchCategory *category = makeCategory(cart, tmp->track, tmp, db, groupHash); if (category) slAddHead(&ret, category); } } // add hub tracks to list struct trackDb *hubList = hubCollectTracks(db, NULL); hubList = getSearchableBigBeds(hubList); for (tdb = hubList; tdb != NULL; tdb = tdb->next) { int visibility = isTrackVisible(cart, tdb); struct searchCategory *tmp = searchCategoryFromTdb(tdb, NULL, visibility); if (tmp) slAddHead(&ret, tmp); } return ret; } struct searchCategory *getAllCategories(struct cart *cart, char *db, struct hash *groupHash) /* Return all searchable stuff, both current db specific tracks, and things like hubs that searchable * no matter the current database */ { struct searchCategory *ret = NULL; struct searchCategory *tdbCategories = getCategsForDatabase(cart, db, groupHash); if (tdbCategories) ret = tdbCategories; struct searchCategory *staticCategs = getCategsForNonDb(cart, db, groupHash); if (staticCategs) { if (ret) slCat(&ret, staticCategs); else ret = staticCategs; } return ret; } static struct hash *hubLabelHash = NULL; /* struct hubLabel: a helper struct for making links to hubs in the search result list */ struct hubLabel { char *shortLabel; char *longLabel; char *hubId; }; static void getLabelsForHubs() /* Hash up the shortLabels, longLabels and hub_id for a hubUrl */ { if (hubLabelHash != NULL) return; hubLabelHash = hashNew(0); struct sqlConnection *conn = hConnectCentral(); char **row; struct sqlResult *sr; char query[2048]; sqlSafef(query, sizeof(query), "select hp.hubUrl, hp.shortLabel, hp.longLabel, concat('hub_',id) from hubPublic hp join hubStatus hs on hp.hubUrl=hs.hubUrl"); sr = sqlGetResult(conn, query); while ( (row = sqlNextRow(sr)) != NULL) { struct hubLabel *label = NULL; AllocVar(label); label->shortLabel = cloneString(row[1]); label->longLabel = cloneString(row[2]); label->hubId = cloneString(row[3]); char *hubUrl = cloneString(row[0]); hashAdd(hubLabelHash, hubUrl, label); } hDisconnectCentral(&conn); } static struct hubLabel *getLabelForHub(char *hubUrl) /* Look up the shortLabel, longLabel, and hub_id for a hubUrl */ { if (!hubLabelHash) getLabelsForHubs(); return (struct hubLabel *)hashFindVal(hubLabelHash, hubUrl); } static boolean fillOutTrackDbHgPos(struct hgPos *this, struct trixSearchResult *tsr) { boolean foundIt = FALSE; struct trackDb *tdb = (struct trackDb *)hashFindVal(hgFindTrackHash, this->name); if (tdb) { struct dyString *tdbLabels = dyStringNew(0); dyStringPrintf(tdbLabels, "%s:%s:%s", tsr->itemId, tdb->shortLabel, tdb->longLabel); this->name = dyStringCannibalize(&tdbLabels); foundIt = TRUE; } return foundIt; } static boolean fillOutPublicHubsHgPos(struct hgPos *this, struct trixSearchResult *tsr) { boolean foundIt = FALSE; char *itemId[5]; int numItems = chopString(tsr->itemId, ":", itemId, ArraySize(itemId)); struct dyString *hubLabel = dyStringNew(0); char hubUrl[PATH_LEN]; safef(hubUrl, sizeof(hubUrl), "%s:%s", itemId[0], itemId[1]); struct hubLabel *label = getLabelForHub(hubUrl); if (!label) return foundIt; else foundIt = TRUE; char *db = ""; struct dyString *track = dyStringNew(0); if (numItems > 2) db = itemId[2] != NULL ? itemId[2] : ""; if (numItems > 3) dyStringPrintf(track, "%s_%s", label->hubId, itemId[3]); dyStringPrintf(hubLabel, "%s:%s:%s:%s:%s", hubUrl, db, dyStringCannibalize(&track), label->shortLabel, label->longLabel); this->name = dyStringCannibalize(&hubLabel); return foundIt; } static boolean doTrixQuery(struct searchCategory *category, char *searchTerm, struct hgPositions *hgp, char *database, boolean measureTiming) /* Get a trix search result and potentially snippets for an hgFixed trix index. * TODO: return an error message if there is a problem with the trix index or snippet index */ { long startTime = clock1000(); boolean ret = FALSE; char *lowered = cloneString(searchTerm); char *keyWords[16]; int keyCount; tolowers(lowered); keyCount = chopLine(lowered, keyWords); // TODO: let the user control this: int maxReturn = SNIPPET_LIMIT; struct trixSearchResult *tsrList = NULL; if (category->trix) { tsrList = trixSearch(category->trix, keyCount, keyWords, tsmExpand); struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) initSnippetIndex(category->trix); errCatchEnd(errCatch); // silently return if there was a problem opening the snippet index if (errCatch->gotError || errCatch->gotWarning) return FALSE; errCatchFree(&errCatch); } struct trixSearchResult *tsr = NULL; int len = 0; struct hgPosTable *table = NULL; AllocVar(table); table->searchTime = -1; table->name = cloneString(category->name); table->description = cloneString(category->description); for (tsr = tsrList; tsr != NULL; tsr = tsr->next) { if (startsWith(category->name,"publicHubs")) { // Check that this public hubs result is for our current database char *itemId[5]; int numItems = chopString(cloneString(tsr->itemId), ":", itemId, ArraySize(itemId)); if (numItems <= 2 || isEmpty(itemId[2]) || !sameString(itemId[2], database)) continue; } struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { addSnippetForResult(tsr, category->trix); } errCatchEnd(errCatch); // silently return if there was a problem getting a single snippet, there is // probably a data error with the rest of the index if so if (errCatch->gotError || errCatch->gotWarning) return FALSE; errCatchFree(&errCatch); struct hgPos *this = NULL; AllocVar(this); this->name = tsr->itemId; this->description = tsr->snippet; if (startsWith(category->name, "trackDb")) { boolean addedTdbFields = fillOutTrackDbHgPos(this, tsr); if (!addedTdbFields) continue; } if (sameString(category->name, "publicHubs")) { boolean addedHubFields = fillOutPublicHubsHgPos(this, tsr); if (!addedHubFields) continue; } slAddHead(&table->posList, this); len++; if (len > maxReturn) break; } if (table->posList != NULL) { slReverse(&table->posList); if (measureTiming) table->searchTime = clock1000() - startTime; slAddHead(&hgp->tableList, table); ret = TRUE; } return ret; } static boolean userDefinedSearch(char *db, char *term, int limitResults, struct cart *cart, struct hgPositions *hgp, struct searchCategory *categories, boolean multiTerm, boolean measureTiming) /* If a search type(s) is specified in the cart, perform that search. * If the search is successful, fill in hgp and return TRUE. */ { boolean foundIt = FALSE; struct hash *foundSpecHash = hashNew(0); struct hgFindSpec *shortList = NULL, *longList = NULL; struct trackDb *hubCategoryList = NULL; // get all the lists of what to query: if (!trackHubDatabase(db)) { if (categories) myLoadFindSpecs(db, categories, &shortList, &longList); else hgFindSpecGetAllSpecs(db, &shortList, &longList); } // lastly search any included track hubs, or in the case of an assembly hub, any of the tracks hubCategoryList = hubCategoriesToTdbList(categories); struct hgFindSpec *hfs; for (hfs = shortList; hfs != NULL; hfs = hfs->next) { boolean foundSpec = hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, FALSE, 0, 0, multiTerm, measureTiming); if (foundSpec) hashAdd(foundSpecHash, hfs->searchTable, hfs->searchTable); foundIt |= foundSpec; // for multiTerm searches (like '15q11;15q13'), each individual component // must resolve to a single position, so break once we find the first match if (multiTerm && foundSpec) break; } if (!(multiTerm) || (multiTerm && !foundIt)) { for (hfs = longList; hfs != NULL; hfs = hfs->next) { - if (hashFindVal(foundSpecHash, hfs->searchTable) != NULL) + if (hashFindVal(foundSpecHash, hfs->searchTable) != NULL && !sameString(hfs->searchTable, "knownGene")) continue; foundIt |= hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, FALSE, 0, 0, multiTerm, measureTiming); } // lastly search any included track hubs, or in the case of an assembly hub, any of the tracks if (hubCategoryList) foundIt |= findBigBedPosInTdbList(cart, db, hubCategoryList, term, hgp, NULL, measureTiming); } // multiTerm searches must resolve to a single range on a chromosome, so don't // do these non positional searches if a multiTerm was requested if (!multiTerm) { getLabelsForHubs(); struct searchCategory *category; for (category = categories; category != NULL; category = category->next) { if (startsWith("trackDb", category->id) || sameString(category->id, "helpDocs") || sameString(category->id, "publicHubs")) { foundIt |= doTrixQuery(category, term, hgp, db, measureTiming); } } } return foundIt; } static boolean singleSearch(char *db, char *term, int limitResults, struct cart *cart, struct hgPositions *hgp, boolean measureTiming) /* If a search type is specified in the CGI line (not cart), perform that search. * If the search is successful, fill in hgp as a single-pos result and return TRUE. */ { char *search = cgiOptionalString("singleSearch"); if (search == NULL) return FALSE; cartRemove(cart, "singleSearch"); boolean foundIt = FALSE; if (sameString(search, "knownCanonical")) foundIt = searchKnownCanonical(db, term, hgp); else { struct hgFindSpec *shortList = NULL, *longList = NULL; hgFindSpecGetAllSpecs(db, &shortList, &longList); struct hgFindSpec *hfs = hfsFind(shortList, search); if (hfs == NULL) hfs = hfsFind(longList, search); if (hfs != NULL) foundIt = hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, FALSE, 0,0, FALSE, measureTiming); else warn("Unrecognized singleSearch=%s in URL", search); } if (foundIt) { fixSinglePos(hgp); if (cart != NULL) cartSetString(cart, "hgFind.matches", hgp->tableList->posList->browserName); } return foundIt; } // a little data structure for combining multiple transcripts that resolve // to the same hgvs change. This struct can be used to fill out a struct hgPos struct hgvsHelper { struct hgvsHelper *next; char *chrom; // chromosome name of position int chromStart; // start of position int chromEnd; // end of position struct slName *validTranscripts; // valid transcripts/protein accessions for this position char *label; // corresponding hgvs term char *table; // type of match, LRG, NCBI, etc boolean mapError; // does this hgvs mapping result in a map error? }; static boolean matchesHgvs(struct cart *cart, char *db, char *term, struct hgPositions *hgp, boolean measureTiming) /* Return TRUE if the search term looks like a variant encoded using the HGVS nomenclature * See http://varnomen.hgvs.org/ * If search term is a pseudo hgvs term like GeneName AminoAcidPosition (RUNX2 Arg155) and * matches more than one transcript, fill out the hgp with the potential matches so the user * can choose where to go, otherwise return a singlePos */ { boolean foundIt = FALSE; long startTime = clock1000(); struct hgvsVariant *hgvsList = hgvsParseTerm(term); if (hgvsList == NULL) hgvsList = hgvsParsePseudoHgvs(db, term); if (hgvsList) { struct hgvsVariant *hgvs = NULL; int hgvsListLen = slCount(hgvsList); struct hgPosTable *table; AllocVar(table); table->description = "HGVS"; table->searchTime = -1; int padding = 5; int mapErrCnt = 0; struct dyString *dyWarn = dyStringNew(0); struct hgvsHelper *helper = NULL; struct hash *uniqHgvsPos = hashNew(0); struct dyString *chromPosIndex = dyStringNew(0); struct dyString *allWarnings = dyStringNew(0); for (hgvs = hgvsList; hgvs != NULL; hgvs = hgvs->next) { dyStringClear(dyWarn); dyStringClear(chromPosIndex); char *pslTable = NULL; struct bed *mapping = hgvsValidateAndMap(hgvs, db, term, dyWarn, &pslTable); if (dyStringLen(dyWarn) > 0) mapErrCnt++; if (mapping) { char *trackTable; if (isEmpty(pslTable)) trackTable = "chromInfo"; else if (startsWith("lrg", pslTable)) trackTable = "lrgTranscriptAli"; else if (startsWith("wgEncodeGencode", pslTable)) trackTable = pslTable; else if (startsWith("ncbiRefSeqPsl", pslTable)) { if (startsWith("NM_", hgvs->seqAcc) || startsWith("NR_", hgvs->seqAcc) || startsWith("NP_", hgvs->seqAcc) || startsWith("YP_", hgvs->seqAcc)) trackTable = "ncbiRefSeqCurated"; else if (startsWith("XM_", hgvs->seqAcc) || startsWith("XR_", hgvs->seqAcc) || startsWith("XP_", hgvs->seqAcc)) trackTable = "ncbiRefSeqPredicted"; else trackTable = "ncbiRefSeq"; } else trackTable = "refGene"; dyStringPrintf(chromPosIndex, "%s%s%d%d", trackTable, mapping->chrom, mapping->chromStart-padding, mapping->chromEnd+padding); if ((helper = hashFindVal(uniqHgvsPos, chromPosIndex->string)) != NULL) { slNameAddHead(&helper->validTranscripts, hgvs->seqAcc); } else { AllocVar(helper); helper->chrom = mapping->chrom; helper->chromStart = mapping->chromStart; helper->chromEnd = mapping->chromEnd; helper->validTranscripts = slNameNew(hgvs->seqAcc); helper->label = cloneString(term); helper->table = trackTable; hashAdd(uniqHgvsPos, chromPosIndex->string, helper); } if (dyStringLen(dyWarn) > 0) { helper->mapError = TRUE; dyStringPrintf(allWarnings, "%s%s", dyStringLen(allWarnings) > 0 ? "\n" : "", dyStringContents(dyWarn)); } } } if (mapErrCnt < hgvsListLen) // at least one of the hgvs terms mapped sucessfully, so we can go to that spot // or let the user pick a location { int numPositions = 0; struct hashEl *hel, *helList= hashElListHash(uniqHgvsPos); for (hel = helList; hel != NULL; hel = hel->next) { helper = (struct hgvsHelper *)hel->val; if (!helper->mapError) { if (hgp->tableList == NULL) hgp->tableList = table; foundIt = TRUE; table->name = helper->table; struct hgPos *pos; AllocVar(pos); pos->chrom = helper->chrom; pos->chromStart = helper->chromStart - padding; pos->chromEnd = helper->chromEnd + padding; pos->name = slNameListToString(helper->validTranscripts, '/'); pos->description = cloneString(helper->label); pos->browserName = ""; slAddHead(&table->posList, pos); // highlight the mapped bases to distinguish from padding hgp->tableList->posList->highlight = addHighlight(db, helper->chrom, helper->chromStart, helper->chromEnd); numPositions++; } } if (numPositions > 1) table->description = "HGVS search resulted in multiple positions, please select a transcript below"; } else // all of the positions mapped incorrectly, so the term was bad. However, we may // be able to still go to a general area around the term, so build that, warn the // user about their bad search term, and warn that this is not an exactly correct position // NOTE: There is a bug here in general, in that when mapping an hgvs term we don't // consider alternate haplotypes, and thus below we will always get at least some range // on the same chromosome within a gene, but if the mapping code were to change in the // future, we might end up with some weird coordinates { struct hashEl *hel, *helList= hashElListHash(uniqHgvsPos); if (helList) { if (hgp->tableList == NULL) hgp->tableList = table; foundIt = TRUE; struct hgPos *pos; AllocVar(pos); char *chrom = NULL; int spanStart = INT_MAX, spanEnd = 0; for (hel = helList; hel != NULL; hel = hel->next) { helper = (struct hgvsHelper *)hel->val; chrom = helper->chrom; spanStart = helper->chromStart < spanStart ? helper->chromStart : spanStart; spanEnd = helper->chromEnd > spanEnd ? helper->chromEnd : spanEnd; table->name = helper->table; } pos->chrom = cloneString(chrom); pos->chromStart = spanStart-padding; pos->chromEnd = spanEnd + padding; pos->name = "Approximate area"; pos->description = term; pos->browserName = term; slAddHead(&table->posList, pos); // highlight the 'mapped' bases to distinguish from padding hgp->tableList->posList->highlight = addHighlight(db, helper->chrom, spanStart, spanEnd); warn("%s", dyStringContents(allWarnings)); warn("Sorry, couldn't locate %s, moving to general location", term); } else warn("%s", dyStringContents(dyWarn)); } dyStringFree(&dyWarn); dyStringFree(&allWarnings); if (measureTiming && hgp && hgp->tableList) table->searchTime = clock1000() - startTime; } return foundIt; } struct hgPositions *hgPositionsFind(char *db, char *term, char *extraCgi, char *hgAppNameIn, struct cart *cart, boolean multiTerm, boolean measureTiming, struct searchCategory *categories) /* Return container of tracks and positions (if any) that match term. */ { struct hgPositions *hgp = NULL, *hgpItem = NULL; regmatch_t substrs[4]; boolean canonicalSpec = FALSE; boolean gbrowserSpec = FALSE; boolean lengthSpec = FALSE; boolean singleBaseSpec = FALSE; boolean relativeFlag = FALSE; int relStart = 0, relEnd = 0; hgAppName = hgAppNameIn; // Exhaustive searches can lead to timeouts on CGIs (#11626). // However, hgGetAnn requires exhaustive searches (#11665). // So... set a non-exhaustive search limit on all except hgGetAnn. // NOTE: currently non-exhaustive search limits are only applied to findMrnaKeys int limitResults = NONEXHAUSTIVE_SEARCH_LIMIT; if (sameString(hgAppNameIn,"hgGetAnn")) limitResults = EXHAUSTIVE_SEARCH_REQUIRED; AllocVar(hgp); hgp->useAlias = FALSE; term = trimSpaces(term); if(isEmpty(term)) return hgp; hgp->query = cloneString(term); hgp->database = db; if (extraCgi == NULL) extraCgi = ""; hgp->extraCgi = cloneString(extraCgi); if (singleSearch(db, term, limitResults, cart, hgp, measureTiming)) return hgp; if (categories != NULL) { char *originalTerm = term; if (hgOfficialChromName(db, term) != NULL) // this mangles the term { char *chrom; int start, end; hgParseChromRange(db, term, &chrom, &start, &end); if (relativeFlag) { int chromSize = end; end = start + relEnd; start = start + relStart; if (end > chromSize) end = chromSize; if (start < 0) start = 0; } singlePos(hgp, "Chromosome Range", NULL, "chromInfo", originalTerm, "", chrom, start, end); } else if (!matchesHgvs(cart, db, term, hgp, measureTiming)) userDefinedSearch(db, term, limitResults, cart, hgp, categories, multiTerm, measureTiming); slReverse(&hgp->tableList); if (multiTerm) collapseSamePos(hgp); fixSinglePos(hgp); if (cart && hgp->singlePos && isNotEmpty(hgp->singlePos->highlight)) cartSetString(cart, "addHighlight", hgp->singlePos->highlight); if (hgp->posCount > 0) return hgp; else // if categories was passed in we should explicitly return no results // if there weren't any return NULL; } /* Allow any search term to end with a :Start-End range -- also support stuff * pasted in from BED (chrom start end) or SQL query (chrom | start | end). * If found, strip it off and remember the start and end. */ char *originalTerm = term; if ((canonicalSpec = regexMatchSubstrNoCase(term, canonicalRangeExp, substrs, ArraySize(substrs))) || (gbrowserSpec = regexMatchSubstrNoCase(term, gbrowserRangeExp, substrs, ArraySize(substrs))) || (lengthSpec = regexMatchSubstrNoCase(term, lengthRangeExp, substrs, ArraySize(substrs))) || regexMatchSubstrNoCase(term, bedRangeExp, substrs, ArraySize(substrs)) || (singleBaseSpec = regexMatchSubstrNoCase(term, singleBaseExp, substrs, ArraySize(substrs))) || regexMatchSubstrNoCase(term, sqlRangeExp, substrs, ArraySize(substrs))) { term = cloneString(term); /* Since we got a match, substrs[1] is the chrom/term, [2] is relStart, * [3] is relEnd. ([0] is all.) */ term[substrs[1].rm_eo] = 0; eraseTrailingSpaces(term); term[substrs[2].rm_eo] = 0; relStart = atoi(stripCommas(term+substrs[2].rm_so)); term[substrs[3].rm_eo] = 0; if (singleBaseSpec) { relEnd = relStart; relStart--; } else relEnd = atoi(stripCommas(term+substrs[3].rm_so)); if (relStart > relEnd) { int tmp = relStart; relStart = relEnd; relEnd = tmp; } if (canonicalSpec || gbrowserSpec || lengthSpec) relStart--; if (lengthSpec) relEnd += relStart; relativeFlag = TRUE; } term = cloneString(term); // because hgOfficialChromName mangles it if (hgOfficialChromName(db, term) != NULL) // this mangles the term { char *chrom; int start, end; hgParseChromRange(db, term, &chrom, &start, &end); if (relativeFlag) { int chromSize = end; end = start + relEnd; start = start + relStart; if (end > chromSize) end = chromSize; if (start < 0) start = 0; } singlePos(hgp, "Chromosome Range", NULL, "chromInfo", originalTerm, "", chrom, start, end); } else if (!matchesHgvs(cart, db, term, hgp, measureTiming)) { struct hgFindSpec *shortList = NULL, *longList = NULL; struct hgFindSpec *hfs; boolean done = FALSE; // Disable singleBaseSpec for any term that is not hgOfficialChromName // because that mangles legitimate IDs that are [A-Z]:[0-9]+. if (singleBaseSpec) { singleBaseSpec = relativeFlag = FALSE; term = cloneString(originalTerm); // restore original term relStart = relEnd = 0; } if (!trackHubDatabase(db)) hgFindSpecGetAllSpecs(db, &shortList, &longList); if ((cart == NULL) || (cartOptionalString(cart, "noShort") == NULL)) { hgp->shortCircuited = TRUE; for (hfs = shortList; hfs != NULL; hfs = hfs->next) { if (hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, multiTerm, measureTiming)) { done = TRUE; if (! hgFindSpecSetting(hfs, "semiShortCircuit")) break; } } } else cartRemove(cart, "noShort"); if (! done) { hgp->shortCircuited = FALSE; for (hfs = longList; hfs != NULL; hfs = hfs->next) { hgFindUsingSpec(cart, db, hfs, term, limitResults, hgp, relativeFlag, relStart, relEnd, multiTerm, measureTiming); } /* Lowe lab additions -- would like to replace these with specs, but * will leave in for now. */ if (!trackHubDatabase(db)) findTigrGenes(db, term, hgp); trackHubFindPos(cart, db, term, hgp, measureTiming); } hgFindSpecFreeList(&shortList); hgFindSpecFreeList(&longList); if (cart != NULL) { if(hgpMatchNames == NULL) hgpMatchNames = dyStringNew(256); dyStringClear(hgpMatchNames); int matchCount = 0; for(hgpItem = hgp; hgpItem != NULL; hgpItem = hgpItem->next) { struct hgPosTable *hpTable = NULL; for(hpTable = hgpItem->tableList; hpTable != NULL; hpTable = hpTable->next) { struct hgPos *pos = NULL; for(pos = hpTable->posList; pos != NULL; pos = pos->next) { if (limitResults != EXHAUSTIVE_SEARCH_REQUIRED && matchCount++ >= limitResults) break; dyStringPrintf(hgpMatchNames, "%s,", pos->browserName); } } } cartSetString(cart, "hgFind.matches", hgpMatchNames->string); } } slReverse(&hgp->tableList); if (multiTerm) collapseSamePos(hgp); fixSinglePos(hgp); if (cart && hgp->singlePos && isNotEmpty(hgp->singlePos->highlight)) cartSetString(cart, "addHighlight", hgp->singlePos->highlight); return hgp; } void hgPositionsHelpHtmlCart(struct cart *cart, char *organism, char *database) /* Display contents of dbDb.htmlPath for database, or print an HTML comment * explaining what's missing. */ { char *htmlPath = hHtmlPath(database); char *htmlString = NULL; size_t htmlStrLength = 0; if (strstrNoCase(organism, "zoo")) webNewSection("About the NISC Comparative Sequencing Program Browser"); else webNewSection("%s Genome Browser – %s assembly" " (sequences)", trackHubSkipHubName(organism), trackHubSkipHubName(database), hgTracksName(), cartSessionVarName(), cartSessionId(cart)); if (htmlPath != NULL) { if (fileExists(htmlPath)) readInGulp(htmlPath, &htmlString, &htmlStrLength); else if ( startsWith("http://" , htmlPath) || startsWith("https://", htmlPath) || startsWith("ftp://" , htmlPath)) { struct lineFile *lf = udcWrapShortLineFile(htmlPath, NULL, 256*1024); htmlString = lineFileReadAll(lf); htmlStrLength = strlen(htmlString); lineFileClose(&lf); } } if (htmlStrLength > 0) { puts(htmlString); freeMem(htmlString); freeMem(htmlPath); } else { printf("

%s

\n", trackHubSkipHubName(organism)); if (htmlPath == NULL || htmlPath[0] == 0) printf("\n\n", database); else printf("\n\n", htmlPath); } }