2776159fd4f68c3c5a1a114f238dc2f568a5bd64
hiram
Fri Sep 27 11:07:16 2024 -0700
allow use of new genark table with more columns refs #32596
diff --git src/hg/hgGateway/hgGateway.c src/hg/hgGateway/hgGateway.c
index 00f18be..f262d8e 100644
--- src/hg/hgGateway/hgGateway.c
+++ src/hg/hgGateway/hgGateway.c
@@ -1,1068 +1,1079 @@
/* hgGateway: make it easy to select a species and assembly
*
* Copyright (C) 2016 The Regents of the University of California
*
* This CGI has three modes of operation:
* - HTML output for main page (default); most HTML is in hgGateway.html
* - cart-based JSON responses to ajax requests from javascript (using hg/lib/cartJson.c)
* (if CGI param CARTJSON_COMMAND exists)
* - no cart; fast JSON responses to species-search autocomplete requests
* (if CGI param SEARCH_TERM exists)
*/
#include "common.h"
#include "cart.h"
#include "cartJson.h"
#include "cheapcgi.h"
#include "errCatch.h"
#include "googleAnalytics.h"
#include "hCommon.h"
#include "hgConfig.h"
#include "hdb.h"
#include "htmshell.h"
#include "hubConnect.h"
#include "hui.h"
#include "jsHelper.h"
#include "jsonParse.h"
#include "obscure.h" // for readInGulp
#include "regexHelper.h"
#include "suggest.h"
#include "trackHub.h"
#include "web.h"
#include "botDelay.h"
#include "genark.h"
+#include "assemblyList.h"
/* Global Variables */
struct cart *cart = NULL; /* CGI and other variables */
struct hash *oldVars = NULL; /* Old contents of cart before it was updated by CGI */
static boolean issueBotWarning = FALSE;
static int measureTiming = 0;
static long enteredMainTime = 0;
#define SEARCH_TERM "hggw_term"
static char *maybeGetDescriptionText(char *db)
/* Slurp the description.html file for db into a string (if possible, don't die if
* we can't read it) and return it. */
{
struct errCatch *errCatch = errCatchNew();
char *descText = NULL;
if (errCatchStart(errCatch))
{
char *htmlPath = hHtmlPath(db);
if (isNotEmpty(htmlPath))
descText = udcFileReadAll(htmlPath, NULL, 0, NULL);
}
errCatchEnd(errCatch);
// Just ignore errors for now.
return descText;
}
static int trackHubCountAssemblies(struct trackHub *hub)
/* Return the number of hub's genomes that are hub assemblies, i.e. that have a twoBitPath. */
{
int count = 0;
struct trackHubGenome *genome;
for (genome = hub->genomeList; genome != NULL; genome = genome->next)
{
if (isNotEmpty(genome->twoBitPath))
count++;
}
return count;
}
static char *trackHubDefaultAssembly(struct trackHub *hub)
/* If hub->defaultDb is an assembly genome, return it; otherwise return the first assembly.
* Don't free result. */
{
char *defaultDb = hub->defaultDb;
char *firstAssemblyDb = NULL;
struct trackHubGenome *genome;
for (genome = hub->genomeList; genome != NULL; genome = genome->next)
{
if (isNotEmpty(genome->twoBitPath))
{
if (sameString(defaultDb, genome->name))
return defaultDb;
else if (firstAssemblyDb == NULL)
firstAssemblyDb = genome->name;
}
}
return firstAssemblyDb;
}
static void listAssemblyHubs(struct jsonWrite *jw)
/* Write out JSON describing assembly hubs (not track-only hubs) connected in the cart. */
{
jsonWriteListStart(jw, "hubs");
struct hubConnectStatus *status, *statusList = hubConnectStatusListFromCart(cart);
for (status = statusList; status != NULL; status = status->next)
{
struct trackHub *hub = status->trackHub;
if (hub == NULL)
continue;
int assemblyCount = trackHubCountAssemblies(hub);
if (assemblyCount > 0)
{
jsonWriteObjectStart(jw, NULL);
jsonWriteString(jw, "name", hub->name);
jsonWriteString(jw, "shortLabel", hub->shortLabel);
jsonWriteString(jw, "longLabel", hub->longLabel);
jsonWriteString(jw, "defaultDb", trackHubDefaultAssembly(hub));
jsonWriteString(jw, "hubUrl", status->hubUrl);
jsonWriteNumber(jw, "assemblyCount", assemblyCount);
jsonWriteString(jw, "errorMessage", status->errorMessage);
// We might be able to do better than this for taxId, for example if defaultDb is local
// or if hub genomes ever specify taxId...
jsonWriteNumber(jw, "taxId", 0);
jsonWriteObjectEnd(jw);
}
}
jsonWriteListEnd(jw);
}
static void writeFindPositionInfo(struct jsonWrite *jw, char *db, int taxId, char *hubUrl,
char *position)
/* Write JSON for the info needed to populate the 'Find Position' section. */
{
char *genome = hGenome(db);
if (isEmpty(genome))
{
jsonWriteStringf(jw, "error", "No genome for db '%s'", db);
}
else
{
jsonWriteString(jw, "db", db);
jsonWriteNumber(jw, "taxId", taxId);
jsonWriteString(jw, "genome", genome);
struct slPair *dbOptions = NULL;
char genomeLabel[PATH_LEN*4];
if (isNotEmpty(hubUrl) && !startsWith("/gbdb", hubUrl))
{
struct trackHub *hub = hubConnectGetHub(hubUrl);
if (hub == NULL)
{
jsonWriteStringf(jw, "error", "Can't connect to hub at '%s'", hubUrl);
return;
}
struct dbDb *dbDbList = trackHubGetDbDbs(hub->name);
dbOptions = trackHubDbDbToValueLabel(dbDbList);
safecpy(genomeLabel, sizeof(genomeLabel), hub->shortLabel);
jsonWriteString(jw, "hubUrl", hubUrl);
}
else
{
dbOptions = hGetDbOptionsForGenome(trackHubSkipHubName(genome));
safecpy(genomeLabel, sizeof(genomeLabel), genome);
}
jsonWriteValueLabelList(jw, "dbOptions", dbOptions);
jsonWriteString(jw, "genomeLabel", genomeLabel);
jsonWriteString(jw, "position", position);
char *suggestTrack = NULL;
if (! trackHubDatabase(db) && (sqlMayConnect(db) != NULL))
suggestTrack = assemblyGeneSuggestTrack(db);
jsonWriteString(jw, "suggestTrack", suggestTrack);
char *description = maybeGetDescriptionText(db);
jsonWriteString(jw, "description", description);
listAssemblyHubs(jw);
}
}
static void setTaxId(struct cartJson *cj, struct hash *paramHash)
/* Set db and genome according to taxId (and/or db) and return the info we'll need
* to fill in the findPosition section. */
{
char *taxIdStr = cartJsonRequiredParam(paramHash, "taxId", cj->jw, "setTaxId");
char *db = cartJsonOptionalParam(paramHash, "db");
char *organism = cartJsonOptionalParam(paramHash, "org");
int taxId = atoi(taxIdStr);
if (isEmpty(db))
db = hDbForTaxon(taxId);
if (isEmpty(db))
jsonWriteStringf(cj->jw, "error", "No db for taxId '%s'", taxIdStr);
else
{
writeFindPositionInfo(cj->jw, db, taxId, NULL, hDefaultPos(db));
cartSetString(cart, "db", db);
if (!isEmpty(organism))
cartSetString(cart, "org", organism);
cartSetString(cart, "position", hDefaultPos(db));
}
}
static void setHubDb(struct cartJson *cj, struct hash *paramHash)
/* Set db and genome according to hubUrl (and/or db and hub) and return the info we'll need
* to fill in the findPosition section. */
{
char *hubUrl = cartJsonRequiredParam(paramHash, "hubUrl", cj->jw, "setHubDb");
char *taxIdStr = cartJsonOptionalParam(paramHash, "taxId");
int taxId = taxIdStr ? atoi(taxIdStr) : -1;
// cart's db was already set by magic handling of hub CGI variables sent along
// with this command.
char *db = cartString(cart, "db");
if (isEmpty(db))
jsonWriteStringf(cj->jw, "error", "No db for hubUrl '%s'", hubUrl);
else
writeFindPositionInfo(cj->jw, db, taxId, hubUrl, hDefaultPos(db));
}
static void setDb(struct cartJson *cj, struct hash *paramHash)
/* Set taxId and genome according to db and return the info we'll need to fill in
* the findPosition section. */
{
char *db = cartJsonRequiredParam(paramHash, "db", cj->jw, "setDb");
char *hubUrl = cartJsonOptionalParam(paramHash, "hubUrl");
// we want to go back to the most recent position on this db
// so push "lastDbPos" into the cart so cartGetPosition() can find it
char *maybePosition = cartJsonOptionalParam(paramHash, "position");
if (maybePosition)
cartSetString(cart, "position", maybePosition);
int taxId = hTaxId(db);
// look up the old position the user was browsing (if they came here from
// a hub connection for instance) and start them there, otherwise use
// the assembly default position
char *maybeLastPos = cartGetPosition(cart, db, NULL);
char *pos = maybeLastPos != NULL ? maybeLastPos : hDefaultPos(db);
writeFindPositionInfo(cj->jw, db, taxId, hubUrl, pos);
cartSetString(cart, "db", db);
cartSetString(cart, "position", pos);
}
static void getUiState(struct cartJson *cj, struct hash *paramHash)
/* Write out JSON for hgGateway.js's uiState object using current cart settings. */
{
char *db = cartUsualString(cj->cart, "db", hDefaultDb());
char *position = cartUsualString(cart, "position", hDefaultPos(db));
char *hubUrl = NULL;
if (trackHubDatabase(db))
{
struct trackHub *hub = hubConnectGetHubForDb(db);
hubUrl = hub->url;
}
writeFindPositionInfo(cj->jw, db, hTaxId(db), hubUrl, position);
// If cart already has a pix setting, pass that along; otherwise the JS will
// set pix according to web browser window width.
int pix = cartUsualInt(cj->cart, "pix", 0);
if (pix)
jsonWriteNumber(cj->jw, "pix", pix);
}
static void doCartJson()
/* Perform UI commands to update the cart and/or retrieve cart vars & metadata. */
{
struct cartJson *cj = cartJsonNew(cart);
cartJsonRegisterHandler(cj, "setTaxId", setTaxId);
cartJsonRegisterHandler(cj, "setDb", setDb);
cartJsonRegisterHandler(cj, "setHubDb", setHubDb);
cartJsonRegisterHandler(cj, "getUiState", getUiState);
cartJsonExecute(cj);
}
static void printActiveGenomes(struct dyString *dy)
/* Print out JSON for an object mapping each genome that has at least one db with active=1
* to its taxId. */
{
struct jsonWrite *jw = jsonWriteNew();
jsonWriteObjectStart(jw, NULL);
struct sqlConnection *conn = hConnectCentral();
// Join with defaultDb because in rare cases, different taxIds (species vs. subspecies)
// may be used for different assemblies of the same species. Using defaultDb means that
// we send a taxId consistent with the taxId of the assembly that we'll change to when
// the species is selected from the tree.
struct dyString *query = sqlDyStringCreate(
"select dbDb.genome, taxId, dbDb.name from dbDb, defaultDb "
"where defaultDb.name = dbDb.name and active = 1 "
"and taxId > 1;"); // filter out experimental hgwdev-only stuff with invalid taxIds
struct sqlResult *sr = sqlGetResult(conn, dyStringContents(query));
char **row;
while ((row = sqlNextRow(sr)) != NULL)
{
char *genome = row[0], *db = row[2];
int taxId = atoi(row[1]);
if (hDbExists(db))
jsonWriteNumber(jw, genome, taxId);
}
hDisconnectCentral(&conn);
jsonWriteObjectEnd(jw);
dyStringAppend(dy, jw->dy->string);
jsonWriteFree(&jw);
dyStringFree(&query);
}
static void doMainPage()
/* Send HTML with javascript to bootstrap the user interface. */
{
// Start web page with new banner
char *db = NULL, *genome = NULL, *clade = NULL;
getDbGenomeClade(cart, &db, &genome, &clade, oldVars);
// If CGI has &lastDbPos=..., handle that here and save position to cart so it's in place for
// future cartJson calls.
char *position = cartGetPosition(cart, db, NULL);
cartSetString(cart, "position", position);
webStartJWest(cart, db, "Genome Browser Gateway");
if (cgiIsOnWeb())
checkForGeoMirrorRedirect(cart);
#define HOG_WARNING_BOX_START "
" \
"
"
#define HOG_WARNING_BOX_END "
"
if (issueBotWarning)
{
char *hogHost = getenv("REMOTE_ADDR");
char *delayMsg = botDelayWarningMsg(hogHost, botDelayMillis);
printf("%s%s%s\n", HOG_WARNING_BOX_START, delayMsg, HOG_WARNING_BOX_END);
}
#define WARNING_BOX_START "" \
"
"
#define UNDER_DEV "Data and tools on this site are under development, have not been reviewed " \
"for quality, and are subject to change at any time. "
#define MAIN_SITE "The high-quality, reviewed public site of the UCSC Genome Browser is " \
"available for use at
http://genome.ucsc.edu/."
#define WARNING_BOX_END "
"
if (hIsPreviewHost())
{
puts(WARNING_BOX_START
"WARNING: This is the UCSC Genome Browser preview site. "
"This website is a weekly mirror of our internal development server for public access. "
UNDER_DEV
"We provide this site for early access, with the warning that it is less available "
"and stable than our public site. "
MAIN_SITE
WARNING_BOX_END);
}
if (hIsPrivateHost() && !hHostHasPrefix("hgwdev-demo6"))
{
puts(WARNING_BOX_START
"WARNING: This is the UCSC Genome Browser development site. "
"This website is used for testing purposes only and is not intended for general public "
"use. "
UNDER_DEV
MAIN_SITE
WARNING_BOX_END);
}
// The visible page elements are all in ./hgGateway.html, which is transformed into a quoted .h
// file containing a string constant that we #include and print here (see makefile).
puts(
#include "hgGateway.html.h"
);
// Set global JS variables hgsid, activeGenomes, and survey* at page load time
// We can't just use "var hgsid = " or the other scripts won't see it -- it has to be
// "window.hgsid = ".
struct dyString *dy = dyStringNew(1024);
dyStringPrintf(dy, "window.%s = '%s';\n", cartSessionVarName(), cartSessionId(cart));
dyStringPrintf(dy, "window.activeGenomes =\n");
printActiveGenomes(dy);
dyStringPrintf(dy, "\n;\n");
char *surveyLink = cfgOption("survey");
if (isNotEmpty(surveyLink) && !sameWord(surveyLink, "off"))
{
dyStringPrintf(dy, "window.surveyLink=\"%s\";\n", jsonStringEscape(surveyLink));
char *surveyLabel = cfgOptionDefault("surveyLabel", "Please take our survey");
dyStringPrintf(dy, "window.surveyLabel=\"%s\";\n", jsonStringEscape(surveyLabel));
char *surveyLabelImage = cfgOption("surveyLabelImage");
if (isNotEmpty(surveyLabelImage))
dyStringPrintf(dy, "window.surveyLabelImage=\"%s\";\n", jsonStringEscape(surveyLabelImage));
else
dyStringPrintf(dy, "window.surveyLabelImage=null;\n");
}
else
{
dyStringPrintf(dy, "window.surveyLink=null;\n");
dyStringPrintf(dy, "window.surveyLabel=null;\n");
dyStringPrintf(dy, "window.surveyLabelImage=null;\n");
}
dyStringPrintf(dy, "hgGateway.init();\n");
jsInline(dy->string);
dyStringFree(&dy);
jsIncludeFile("es5-shim.4.0.3.min.js", NULL);
jsIncludeFile("es5-sham.4.0.3.min.js", NULL);
jsIncludeFile("lodash.3.10.0.compat.min.js", NULL);
jsIncludeFile("cart.js", NULL);
webIncludeResourceFile("jquery-ui.css");
jsIncludeFile("jquery-ui.js", NULL);
jsIncludeFile("jquery.watermarkinput.js", NULL);
jsIncludeFile("autocompleteCat.js",NULL);
jsIncludeFile("utils.js",NULL);
// Phylogenetic tree .js file, produced by dbDbTaxonomy.pl:
char *defaultDbDbTree = webTimeStampedLinkToResource("dbDbTaxonomy.js", FALSE);
char *dbDbTree = cfgOptionDefault("hgGateway.dbDbTaxonomy", defaultDbDbTree);
if (isNotEmpty(dbDbTree))
printf("\n", dbDbTree);
// Main JS for hgGateway:
jsIncludeFile("hgGateway.js", NULL);
#define TIMING_WARNING_BOX_START "" \
"
"
#define TIMING_WARNING_BOX_END "
"
if (measureTiming)
{
printf("%selapsed time %ld ms (%d ms bottleneck)%s\n",
TIMING_WARNING_BOX_START, clock1000() - enteredMainTime,
botDelayMillis, TIMING_WARNING_BOX_END);
}
webIncludeFile("inc/jWestFooter.html");
cartFlushHubWarnings();
webEndJWest();
}
void doMiddle(struct cart *theCart)
/* Depending on invocation, either perform a query and print out results
* or display the main page. */
{
cart = theCart;
if (cgiOptionalString(CARTJSON_COMMAND))
doCartJson();
else
doMainPage();
}
// We find matches from various fields of dbDb, and prefer them in this order:
enum dbDbMatchType { ddmtDescription=0, ddmtGenome=1, ddmtDb=2, ddmtSciName=3 };
struct dbDbMatch
// Info about a match of a search term to some field of dbDb, including info that
// helps prioritize matches.
{
struct dbDbMatch *next;
struct dbDb *dbDb; // the row of dbDb in which a match was found
enum dbDbMatchType type; // which field the match was found in
int offset; // offset of the search term in the dbDb value
boolean isWord; // TRUE if the the search term matches the word at offset
boolean isComplete; // TRUE if the search term matches the entire target string
};
static struct dbDbMatch *dbDbMatchNew(struct dbDb *dbDb, enum dbDbMatchType type, int offset,
boolean isWord, boolean isComplete)
/* Allocate and return a new dbDbMatch. Do not free dbDb until done with this. */
{
struct dbDbMatch *ddm;
AllocVar(ddm);
ddm->dbDb = dbDb;
ddm->type = type;
ddm->offset = offset;
ddm->isWord = isWord;
ddm->isComplete = isComplete;
return ddm;
}
static int dbDbMatchCmp(const void *va, const void *vb)
/* Compare two matches by type, orderKey, offset and genome. */
{
const struct dbDbMatch *a = *((struct dbDbMatch **)va);
const struct dbDbMatch *b = *((struct dbDbMatch **)vb);
int diff = b->isComplete - a->isComplete;
if (diff == 0)
diff = b->isWord - a->isWord;
if (diff == 0 && a->isWord && b->isWord)
diff = a->offset - b->offset;
// Use int values of type:
if (diff == 0)
diff = (int)(a->type) - (int)(b->type);
if (diff == 0)
diff = a->dbDb->orderKey - b->dbDb->orderKey;
if (diff == 0)
diff = a->offset - b->offset;
if (diff == 0)
diff = strcmp(a->dbDb->genome, b->dbDb->genome);
return diff;
}
INLINE void safeAddN(char **pDest, int *pSize, char *src, int len)
/* Copy len bytes of src into dest. Subtract len from *pSize and add len to *pDest,
* for building up a string bit by bit. */
{
safencpy(*pDest, *pSize, src, len);
*pSize -= len;
*pDest += len;
}
INLINE void safeAdd(char **pDest, int *pSize, char *src)
/* Copy src into dest. Subtract len from *pSize and add len to *pDest,
* for building up a string bit by bit. */
{
safeAddN(pDest, pSize, src, strlen(src));
}
static char *boldTerm(char *target, char *term, int offset, enum dbDbMatchType type)
/* Return a string with term swapped in for term at offset.
* If offset is negative and type is ddmtSciName, treat term as an abbreviated species
* name (term = "G. species" vs. target = "Genus species"): bold the first letter of the
* genus and the matching portion of the species. */
{
int termLen = strlen(term);
int targetLen = strlen(target);
if (type == ddmtDescription)
{
// Search of dbDb->description skips the date that precedes the actual description which is
// in parentheses. Adjust offset accordingly.
char *leftP = strchr(target, '(');
if (leftP)
offset += (leftP+1 - target);
}
if (offset + termLen > targetLen)
errAbort("boldTerm: invalid offset (%d) for term '%s' (length %d) in target '%s' (length %d)",
offset, term, termLen, target, targetLen);
else if (offset < 0 && type != ddmtSciName)
errAbort("boldTerm: negative offset (%d) given for type %d", offset, type);
// Allocate enough to have two bolded chunks:
int resultSize = targetLen + 2*strlen("") + 1;
char result[resultSize];
char *p = result;
int size = sizeof(result);
if (offset >= 0)
{
// The part of target before the term:
safeAddN(&p, &size, target, offset);
// The bolded term:
safeAdd(&p, &size, "");
safeAddN(&p, &size, target+offset, termLen);
safeAdd(&p, &size, "");
// The rest of the target after the term:
safeAdd(&p, &size, target+offset+termLen);
// Accounting tweak -- we allocate enough for two bolded chunks, but use only one here:
size -= strlen("");
}
else
{
// Term is abbreviated scientific name -- bold the first letter of the genus:
safeAdd(&p, &size, "");
safeAddN(&p, &size, target, 1);
safeAdd(&p, &size, "");
// add the rest of the genus:
char *targetSpecies = skipLeadingSpaces(skipToSpaces(target));
int targetOffset = targetSpecies - target;
safeAddN(&p, &size, target+1, targetOffset-1);
// bold the matching portion of the species:
char *termSpecies = skipLeadingSpaces(skipToSpaces(term));
termLen = strlen(termSpecies);
safeAdd(&p, &size, "");
safeAddN(&p, &size, targetSpecies, termLen);
safeAdd(&p, &size, "");
// add the rest of the species:
safeAdd(&p, &size, targetSpecies+termLen);
}
if (*p != '\0' || size != 1)
errAbort("boldTerm: bad arithmetic (size is %d, *p is '%c')", size, *p);
return cloneStringZ(result, resultSize);
}
static void writeDbDbMatch(struct jsonWrite *jw, struct dbDbMatch *match, char *term,
char *category)
/* Write out the JSON encoding of a match in dbDb. */
{
struct dbDb *dbDb = match->dbDb;
jsonWriteObjectStart(jw, NULL);
jsonWriteString(jw, "genome", dbDb->genome);
// label includes tag to highlight the match for term.
char label[PATH_LEN*4];
// value is placed in the input box when user selects the item.
char value[PATH_LEN*4];
if (match->type == ddmtSciName)
{
safef(value, sizeof(value), "%s (%s)", dbDb->scientificName, dbDb->genome);
char *bolded = boldTerm(dbDb->scientificName, term, match->offset, match->type);
safef(label, sizeof(label), "%s (%s)", bolded, dbDb->genome);
freeMem(bolded);
}
else if (match->type == ddmtGenome)
{
safecpy(value, sizeof(value), dbDb->genome);
char *bolded = boldTerm(dbDb->genome, term, match->offset, match->type);
safecpy(label, sizeof(label), bolded);
freeMem(bolded);
}
else if (match->type == ddmtDb)
{
safecpy(value, sizeof(value), dbDb->name);
char *bolded = boldTerm(dbDb->name, term, match->offset, match->type);
safef(label, sizeof(label), "%s (%s %s)",
bolded, dbDb->genome, dbDb->description);
freeMem(bolded);
jsonWriteString(jw, "db", dbDb->name);
}
else if (match->type == ddmtDescription)
{
safef(value, sizeof(value), "%s (%s %s)",
dbDb->name, dbDb->genome, dbDb->description);
char *bolded = boldTerm(dbDb->description, term, match->offset, match->type);
safef(label, sizeof(label), "%s (%s %s)",
dbDb->name, dbDb->genome, bolded);
freeMem(bolded);
jsonWriteString(jw, "db", dbDb->name);
}
else
errAbort("writeDbDbMatch: unrecognized dbDbMatchType value %d (db %s, term %s)",
match->type, dbDb->name, term);
jsonWriteString(jw, "label", label);
jsonWriteString(jw, "value", value);
jsonWriteString(jw, "org", dbDb->organism);
jsonWriteNumber(jw, "taxId", dbDb->taxId);
if (isNotEmpty(category))
jsonWriteString(jw, "category", category);
jsonWriteObjectEnd(jw);
}
int wordMatchOffset(char *term, char *target)
/* If some word of target starts with term (case insensitive), return the offset of
* that word in target; otherwise return -1. */
{
if (startsWith(term, target))
return 0;
int targetLen = strlen(target);
char targetClone[targetLen+1];
safecpy(targetClone, sizeof(targetClone), target);
char *p = targetClone;
while (nextWord(&p) && p != NULL)
{
// skip punctuation like parentheses
while (*p != '\0' && ! isalnum(*p))
p++;
if (startsWith(term, p))
return p - targetClone;
}
return -1;
}
static void addIfFirstMatch(struct dbDb *dbDb, enum dbDbMatchType type, int offset, char *target,
char *term, struct hash *matchHash, struct dbDbMatch **pMatchList)
/* If target doesn't already have a match in matchHash, compute matchLength and isWord,
* and then add the new match to pMatchList and add target to matchHash. */
{
if (dbDb->active && ! hashLookup(matchHash, target))
{
char *termInTarget = (offset >= 0) ? target+offset : target;
int matchLength = countSame(term, termInTarget);
// is the match complete up to a word boundary in termInTarget?
boolean isWord = (matchLength == strlen(term) &&
(termInTarget[matchLength] == '\0' || isspace(termInTarget[matchLength])));
boolean isComplete = sameString(term, target);
struct dbDbMatch *match = dbDbMatchNew(dbDb, type, offset, isWord, isComplete);
slAddHead(pMatchList, match);
hashStore(matchHash, target);
}
}
static void checkTerm(char *term, char *target, enum dbDbMatchType type, struct dbDb *dbDb,
struct hash *matchHash, struct dbDbMatch **pMatchList)
/* If target starts with term (case-insensitive), and target is not already in matchHash,
* add target to matchHash and add a new match to pMatchList. */
{
// Make uppercase version of target for case-insensitive matching.
int targetLen = strlen(target);
char targetUpcase[targetLen + 1];
safencpy(targetUpcase, sizeof(targetUpcase), target, targetLen);
touppers(targetUpcase);
int offset = wordMatchOffset(term, targetUpcase);
if (offset >= 0)
{
addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList);
}
else if (offset < 0 && type == ddmtSciName && term[0] == targetUpcase[0])
{
// For scientific names ("Genus species"), see if the user entered the term as 'G. species'
// e.g. term 'P. trog' for target 'Pan troglodytes'
regmatch_t substrArr[3];
if (regexMatchSubstrNoCase(term, "^[a-z](\\.| ) *([a-z]+)", substrArr, ArraySize(substrArr)))
{
char *termSpecies = term + substrArr[2].rm_so;
char *targetSpecies = skipLeadingSpaces(skipToSpaces(targetUpcase));
if (targetSpecies && startsWithNoCase(termSpecies, targetSpecies))
{
// Keep the negative offset since we can't just bold one chunk of target...
addIfFirstMatch(dbDb, type, offset, targetUpcase, term, matchHash, pMatchList);
}
}
}
}
static struct dbDbMatch *searchDbDb(struct dbDb *dbDbList, char *term)
/* Search various fields of dbDb for matches to term and sort by relevance. */
{
struct dbDbMatch *matchList = NULL;
struct hash *matchHash = hashNew(0);
struct dbDb *dbDb;
for (dbDb = dbDbList; dbDb != NULL; dbDb = dbDb->next)
{
checkTerm(term, dbDb->name, ddmtDb, dbDb, matchHash, &matchList);
// Skip experimental stuff on hgwdev with bogus taxId unless the db name matches term.
if (dbDb->taxId >= 2)
{
checkTerm(term, dbDb->genome, ddmtGenome, dbDb, matchHash, &matchList);
checkTerm(term, dbDb->scientificName, ddmtSciName, dbDb, matchHash, &matchList);
}
// dbDb.description has dozens of matches for some institutions like Broad, so suppress
// it for search terms that would get too many probably unwanted matches.
if (! (startsWith(term, "BRO") || startsWith(term, "WU") || startsWith(term, "BAY") ||
startsWith(term, "AGE")))
{
// dbDb.description also starts with dates followed by actual description in parentheses,
// so search only the part in parentheses to avoid month prefix matches.
char *leftP = strchr(dbDb->description, '(');
char *toSearch = leftP ? leftP+1 : dbDb->description;
checkTerm(term, toSearch, ddmtDescription, dbDb, matchHash, &matchList);
}
}
slSort(&matchList, dbDbMatchCmp);
return matchList;
}
// Assembly hub match:
struct aHubMatch
// description of an assembly hub db
{
struct aHubMatch *next;
char *shortLabel; // hub shortLabel
char *hubUrl; // hub url
char *aDb; // assembly db hosted by hub
char *label; // label for this db
};
static struct aHubMatch *aHubMatchNew(char *shortLabel, char *hubUrl, char *aDb, char *label)
/* Allocate and return a description of an assembly hub db. */
{
struct aHubMatch *match;
AllocVar(match);
match->shortLabel = cloneString(shortLabel);
match->hubUrl = cloneString(hubUrl);
match->aDb = cloneString(aDb);
match->label = cloneString(label);
return match;
}
// Genark hub match:
struct gHubMatch
// description of an genark hub db
{
struct gHubMatch *next;
char *gcAccession;
char *hubUrl;
char *asmName;
char *scientificName;
char *commonName;
int priority; // reserver for later ranking, currently unused
};
static struct gHubMatch *gHubMatchNew(char *acc, char *hubUrl, char *asmName, char *scientificName, char *commonName, int priority)
/* Allocate and return a description of an assembly hub db. */
{
struct gHubMatch *match;
AllocVar(match);
match->gcAccession = cloneString(acc);
match->hubUrl = cloneString(hubUrl);
match->asmName = cloneString(asmName);
match->scientificName = cloneString(scientificName);
match->commonName = cloneString(commonName);
match->priority = priority;
return match;
}
static struct hash *unpackHubDbUrlList(struct slName *hubDbUrlList, struct hash **labelHash)
/* hubDbUrlList contains strings like "db\tlabel\thubUrl" -- split on tab and return a hash of
* hubUrl to one or more dbs. */
{
struct hash *hubToDb = hashNew(0);
struct hash *dbToLabel = hashNew(0);
struct slName *hubDbUrl;
for (hubDbUrl = hubDbUrlList; hubDbUrl != NULL; hubDbUrl = hubDbUrl->next)
{
char *tab = strchr(hubDbUrl->name, '\t');
if (tab)
{
char *db = hubDbUrl->name;
*tab = '\0';
char *label = tab+1;
char *url = strchr(label, '\t');
if (url)
{
*url = '\0';
char *hubUrl = url+1;
struct hashEl *hel = hashLookup(hubToDb, hubUrl);
struct slName *dbList = hel ? hel->val : NULL;
slAddHead(&dbList, slNameNew(db));
if (hel == NULL)
hashAdd(hubToDb, hubUrl, dbList);
else
hel->val = dbList;
}
hashAdd(dbToLabel, db, label);
}
}
*labelHash = dbToLabel;
return hubToDb;
}
static struct aHubMatch *filterHubSearchTextMatches(struct dbDb *dbDbList,
struct slName *hubDbUrlList)
/* Collect the assembly hub matches (not track hub matches) from a search in hubSearchText. */
{
if (hubDbUrlList == NULL)
return NULL;
struct aHubMatch *aHubMatchList = NULL;
// Make a hash of local dbs so we can tell which hub dbs must be assembly hubs
// not track hubs.
struct hash *localDbs = hashNew(0);
struct dbDb *dbDb;
for (dbDb = dbDbList; dbDb != NULL; dbDb = dbDb->next)
if (!sameString(dbDb->nibPath, "genark"))
hashStore(localDbs, dbDb->name);
struct hash *dbLabel = NULL;
struct hash *hubToDb = unpackHubDbUrlList(hubDbUrlList, &dbLabel);
// Build up a query to find shortLabel and dbList for each hubUrl.
struct dyString *query = sqlDyStringCreate("select shortLabel,hubUrl,dbList from %s "
"where hubUrl in (",
hubPublicTableName());
struct hashEl *hel;
struct hashCookie cookie = hashFirst(hubToDb);
boolean isFirst = TRUE;
while ((hel = hashNext(&cookie)) != NULL)
{
if (isFirst)
isFirst = FALSE;
else
sqlDyStringPrintf(query, ", ");
sqlDyStringPrintf(query, "'%s'", hel->name);
}
sqlDyStringPrintf(query, ")");
struct sqlConnection *conn = hConnectCentral();
struct sqlResult *sr = sqlGetResult(conn, dyStringContents(query));
char **row;
while ((row = sqlNextRow(sr)) != NULL)
{
char *shortLabel = row[0];
char *hubUrl = row[1];
struct slName *dbName, *matchDbList = hashFindVal(hubToDb, hubUrl);
struct slName *hubDbList = slNameListFromComma(row[2]);
if (slCount(matchDbList) == 1 && isEmpty(matchDbList->name))
{
// top-level hub match, no specific db match; add all of hub's assembly dbs
for (dbName = hubDbList; dbName != NULL; dbName = dbName->next)
if (! hashLookup(localDbs, dbName->name))
slAddHead(&aHubMatchList, aHubMatchNew(shortLabel, hubUrl, dbName->name, NULL));
}
else
{
// Add matching assembly dbs that are found in hubDbList
for (dbName = matchDbList; dbName != NULL; dbName = dbName->next)
if (! hashLookup(localDbs, dbName->name) && slNameInList(hubDbList, dbName->name))
{
char *label = hashFindVal(dbLabel, dbName->name);
if (label)
slAddHead(&aHubMatchList, aHubMatchNew(shortLabel, hubUrl, dbName->name, label));
else
slAddHead(&aHubMatchList, aHubMatchNew(shortLabel, hubUrl, dbName->name, NULL));
}
}
}
slReverse(&aHubMatchList);
hDisconnectCentral(&conn);
dyStringFree(&query);
return aHubMatchList;
}
static void writeAssemblyHubMatches(struct jsonWrite *jw, struct aHubMatch *aHubMatchList)
/* Write out JSON for each assembly in each assembly hub that matched the search term. */
{
struct aHubMatch *aHubMatch;
for (aHubMatch = aHubMatchList; aHubMatch != NULL; aHubMatch = aHubMatch->next)
{
jsonWriteObjectStart(jw, NULL);
jsonWriteString(jw, "genome", aHubMatch->shortLabel);
jsonWriteString(jw, "db", aHubMatch->aDb);
jsonWriteString(jw, "hubUrl", aHubMatch->hubUrl);
jsonWriteString(jw, "hubName", hubNameFromUrl(aHubMatch->hubUrl));
// Add a category label for customized autocomplete-with-categories.
char category[PATH_LEN*4];
safef(category, sizeof(category), "Assembly Hub: %s", aHubMatch->shortLabel);
jsonWriteString(jw, "category", category);
jsonWriteString(jw, "value", aHubMatch->aDb);
// Use just the db as label, since shortLabel is included in the category label.
jsonWriteString(jw, "label", aHubMatch->label);
jsonWriteObjectEnd(jw);
}
}
static struct aHubMatch *searchPublicHubs(struct dbDb *dbDbList, char *term)
/* Search for term in public hubs -- return a list of matches to assembly hubs
* (i.e. hubs that host an assembly with 2bit etc as opposed to only providing tracks.) */
{
struct aHubMatch *aHubMatchList = NULL;
char *hubSearchTableName = cfgOptionDefault("hubSearchTextTable", "hubSearchText");
struct sqlConnection *conn = hConnectCentral();
if (sqlTableExists(conn, hubSearchTableName))
{
char query[1024];
sqlSafef(query, sizeof(query), "select distinct(concat(db, concat(concat('\t', label), concat('\t', hubUrl)))) from %s "
"where track = '' and "
"(db like '%s%%' or label like '%%%s%%' or text like '%s%%')",
hubSearchTableName, term, term, term);
struct slName *hubDbUrlList = sqlQuickList(conn, query);
aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList);
if (aHubMatchList == NULL)
{
// Try a looser query
sqlSafef(query, sizeof(query), "select distinct(concat(db, concat(concat('\t', label), concat('\t', hubUrl)))) from %s "
"where track = '' and text like '%% %s%%'",
hubSearchTableName, term);
hubDbUrlList = sqlQuickList(conn, query);
aHubMatchList = filterHubSearchTextMatches(dbDbList, hubDbUrlList);
}
}
hDisconnectCentral(&conn);
return aHubMatchList;
}
static void writeGenarkMatches(struct jsonWrite *jw, struct gHubMatch *gHubMatchList)
/* Write out JSON for each genark hub that matched the users term */
{
struct gHubMatch *gHubMatch;
for (gHubMatch = gHubMatchList; gHubMatch != NULL; gHubMatch = gHubMatch->next)
{
jsonWriteObjectStart(jw, NULL);
jsonWriteString(jw, "genome", gHubMatch->gcAccession);
jsonWriteString(jw, "db", gHubMatch->asmName);
jsonWriteString(jw, "hubUrl", gHubMatch->hubUrl);
jsonWriteString(jw, "scientificName", gHubMatch->scientificName);
// Add a category label for customized autocomplete-with-categories.
jsonWriteString(jw, "category", "UCSC GenArk - bulk-annotated assemblies from NCBI Genbank/RefSeq");
jsonWriteString(jw, "value", gHubMatch->asmName);
// Use just the db as label, since shortLabel is included in the category label.
jsonWriteStringf(jw, "label", "%s - %s", gHubMatch->commonName, gHubMatch->scientificName);
jsonWriteObjectEnd(jw);
}
}
static struct gHubMatch *filterGenarkMatches(char *genarkHubUrl, struct genark *matchList)
/* Turn the sql results into a struct gHubMatch list */
{
struct genark *match;
struct gHubMatch *ret = NULL;
for (match = matchList; match != NULL; match = match->next)
{
// the match contains tab-sep accession, hubUrl, asmName, scientificName, commonName
char hubUrl[PATH_LEN+1];
safef(hubUrl, sizeof(hubUrl), "%s/%s", genarkHubUrl, match->hubUrl);
slAddHead(&ret, gHubMatchNew(match->gcAccession, hubUrl, match->asmName, match->scientificName, match->commonName, -1));
}
if (ret)
slReverse(&ret);
return ret;
}
static struct gHubMatch *searchGenark(char *term)
/* Search through the genark table for hubs matches term */
{
char *genarkPrefix = cfgOption("genarkHubPrefix");
if (genarkPrefix == NULL)
return NULL;
struct gHubMatch *gHubMatchList = NULL;
char *genarkTbl = genarkTableName();
+int colCount = genArkColumnCount();
struct sqlConnection *conn = hConnectCentral();
if (sqlTableExists(conn, genarkTbl))
{
char query[1024];
+ if (colCount > 6)
+ {
+ sqlSafef(query, sizeof(query), "select * from %s where "
+ "(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%') order by priority",
+ genarkTbl, term, term, term, term);
+ }
+ else
+ {
sqlSafef(query, sizeof(query), "select * from %s where "
"(gcAccession like '%%%s%%' or scientificName like '%%%s%%' or commonName like '%%%s%%' or asmName like '%%%s%%') order by taxId ASC, commonName DESC",
genarkTbl, term, term, term, term);
+ }
struct genark *matchList = genarkLoadByQuery(conn, query);
gHubMatchList = filterGenarkMatches(genarkPrefix, matchList);
}
hDisconnectCentral(&conn);
return gHubMatchList;
}
static char *getSearchTermUpperCase()
/* If we don't have the SEARCH_TERM cgi param, exit with an HTTP Bad Request response.
* If we do, convert it to upper case for case-insensitive matching and return it. */
{
pushWarnHandler(htmlVaBadRequestAbort);
pushAbortHandler(htmlVaBadRequestAbort);
char *cgiTerm = cgiOptionalString(SEARCH_TERM);
char *term = skipLeadingSpaces(cgiTerm);
eraseTrailingSpaces(term);
touppers(term);
if (isEmpty(term))
errAbort("Missing required CGI parameter %s", SEARCH_TERM);
popWarnHandler();
popAbortHandler();
return term;
}
static void lookupTerm()
/* Look for matches to term in hgcentral and print as JSON for autocomplete if found. */
{
char *term = getSearchTermUpperCase();
// Write JSON response with list of matches
puts("Content-Type:text/javascript\n");
// Before accessing hubs, intialize udc cache location from hg.conf:
setUdcCacheDir();
struct dbDb *dbDbList = hDbDbList();
struct dbDbMatch *matchList = searchDbDb(dbDbList, term);
struct gHubMatch *gHubMatchList = searchGenark(term);
struct aHubMatch *aHubMatchList = searchPublicHubs(dbDbList, term);
struct jsonWrite *jw = jsonWriteNew();
jsonWriteListStart(jw, NULL);
// Write out JSON for dbDb matches, if any; add category if we found assembly hub matches too.
char *category = aHubMatchList ? "UCSC Genome Browser assemblies - annotation tracks curated by UCSC" : NULL;
struct dbDbMatch *match;
for (match = matchList; match != NULL; match = match->next)
writeDbDbMatch(jw, match, term, category);
// Write out genark matches, if any
writeGenarkMatches(jw, gHubMatchList);
// Write out assembly hub matches, if any.
writeAssemblyHubMatches(jw, aHubMatchList);
jsonWriteListEnd(jw);
puts(jw->dy->string);
jsonWriteFree(&jw);
}
int main(int argc, char *argv[])
/* Process CGI / command line. */
{
/* Null terminated list of CGI Variables we don't want to save
* permanently. */
char *excludeVars[] = {SEARCH_TERM, CARTJSON_COMMAND, NULL,};
cgiSpoof(&argc, argv);
measureTiming = cgiOptionalInt("measureTiming", 0);
enteredMainTime = clock1000();
if (cgiOptionalString(SEARCH_TERM))
{
/* less bottleneck penalty for this operation, same as hgTracks */
#define delayFraction 0.25
issueBotWarning = earlyBotCheck(enteredMainTime, "hgGateway", delayFraction, 0, 0, "json");
// Skip the cart for speedy searches
lookupTerm();
}
else
{
/* standard default bottleneck penalty for this operation */
issueBotWarning = earlyBotCheck(enteredMainTime, "hgGateway", 0.0, 0, 0, "html");
oldVars = hashNew(10);
cartEmptyShellNoContent(doMiddle, hUserCookie(), excludeVars, oldVars);
cgiExitTime("hgGateway", enteredMainTime);
}
return 0;
}