0a456bff8d6200e3bda8bc9f73cfd5d75f135f73 max Tue Apr 30 15:23:37 2013 -0700 filters work on main table now if table is in new format, added publisher filter diff --git src/hg/hgTracks/pubsTracks.c src/hg/hgTracks/pubsTracks.c index 44ead49..ec05414 100644 --- src/hg/hgTracks/pubsTracks.c +++ src/hg/hgTracks/pubsTracks.c @@ -1,485 +1,520 @@ /* pubsTracks - code for the publications tracks */ #include "common.h" #include "hgTracks.h" #include "hgFind.h" #include "bedCart.h" static char* pubsArticleTable(struct track *tg) /* return the name of the pubs articleTable, either * the value from the trackDb statement 'articleTable' * or the default value: Article */ { char *articleTable = trackDbSettingClosestToHome(tg->tdb, "pubsArticleTable"); if (isEmpty(articleTable)) { char buf[256]; safef(buf, sizeof(buf), "%sArticle", tg->track); articleTable = cloneString(buf); } return articleTable; } static char *makeMysqlMatchStr(char *str) { // return a string with all words prefixed with a '+' to force a boolean AND query; // we also strip leading/trailing spaces. char *matchStr = needMem(strlen(str) * 2 + 1); int i = 0; for(;*str && isspace(*str);str++) ; while(*str) { matchStr[i++] = '+'; for(; *str && !isspace(*str);str++) matchStr[i++] = *str; for(;*str && isspace(*str);str++) ; } matchStr[i++] = 0; return matchStr; } struct pubsExtra /* additional info needed for publication blat linked features: author+year and title */ { char *label; // usually author+year char *mouseOver; // usually title of article char *class; // class of article, usually a curated database // color depends on cart settings, either based on topic, impact or year // support to ways to color: either by shade (year, impact) or directly with rgb values int shade; // year or impact are shades which we can't resolve to rgb easily struct rgbColor *color; }; /* assignment of pubs classes to colors */ static struct hash* pubsClassColors = NULL; static void pubsParseClassColors() /* parse class colors from hgFixed.pubsClassColors into the hash pubsClassColors */ { if (pubsClassColors!=NULL) return; pubsClassColors = hashNew(0); struct sqlConnection *conn = hAllocConn(database); if (!sqlTableExists(conn, "hgFixed.pubsClassColors")) { return; } char *query = "SELECT class, rgbColor FROM hgFixed.pubsClassColors"; struct sqlResult *sr = sqlGetResult(conn, query); char **row = NULL; while ((row = sqlNextRow(sr)) != NULL) { char *class = row[0]; char *colStr = row[1]; // copied from genePredItemClassColor - is there no function for this? // convert comma sep rgb string to array char *rgbVals[5]; chopString(colStr, ",", rgbVals, sizeof(rgbVals)); struct rgbColor *rgb; AllocVar(rgb); rgb->r = (sqlUnsigned(rgbVals[0])); rgb->g = (sqlUnsigned(rgbVals[1])); rgb->b = (sqlUnsigned(rgbVals[2])); //printf("Adding hash: %s -> %d,%d,%d", class, rgb->r, rgb->g, rgb->b); hashAdd(pubsClassColors, cloneString(class), rgb); } sqlFreeResult(&sr); } static char* pubsFeatureLabel(char* author, char* year) /* create label given authors and year strings */ { char* authorYear = NULL; if (isEmpty(author)) author = "NoAuthor"; if (isEmpty(year)) year = "NoYear"; authorYear = catTwoStrings(author, year); return authorYear; } static struct pubsExtra *pubsMakeExtra(struct track* tg, char* articleTable, struct sqlConnection* conn, struct linkedFeatures* lf) /* bad solution: a function that is called before the extra field is * accessed and that fills it from a sql query. Will need to redo this like gencode, * drawing from atom, variome and bedLoadN or bedDetails */ { char query[LARGEBUF]; struct sqlResult *sr = NULL; char **row = NULL; struct pubsExtra *extra = NULL; /* support two different storage places for article data: either the bed table directly * includes the title + author of the article or we have to look it up from the articles * table. Having a copy of the title in the bed table is faster */ bool newFormat = false; if (sqlColumnExists(conn, tg->table, "title")) { safef(query, sizeof(query), "SELECT firstAuthor, year, title, impact, classes FROM %s " "WHERE chrom = '%s' and chromStart = '%d' and name='%s'", tg->table, chromName, lf->start, lf->name); newFormat = true; } else { safef(query, sizeof(query), "SELECT firstAuthor, year, title FROM %s WHERE articleId = '%s'", articleTable, lf->name); newFormat = false; } sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { char* firstAuthor = row[0]; char* year = row[1]; char* title = row[2]; char* impact = NULL; char* classes = NULL; extra = needMem(sizeof(struct pubsExtra)); extra->label = pubsFeatureLabel(firstAuthor, year); if (isEmpty(title)) extra->mouseOver = extra->label; else extra->mouseOver = cloneString(title); extra->color = NULL; extra->shade = -1; if (newFormat) { impact = row[3]; classes = row[4]; if (!isEmpty(impact)) { char *colorBy = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "pubsColorBy"); if ((colorBy==NULL) || strcmp(colorBy,"topic")==0) { char *classCopy = classes; char* mainClass = cloneNextWordByDelimiter(&classes, ','); classes = classCopy; if (mainClass!=NULL) { struct rgbColor *col = (struct rgbColor*) hashFindVal(pubsClassColors, mainClass); extra->color = col; // add class to mouseover text struct dyString *mo = dyStringNew(0); dyStringAppend(mo, extra->mouseOver); dyStringAppend(mo, " (categories: "); dyStringAppend(mo, classes); dyStringAppend(mo, ")"); freeMem(extra->mouseOver); extra->mouseOver = dyStringContents(mo); } } else { if (strcmp(colorBy,"impact")==0) { char impInt = atoi(impact); extra->shade = impInt/25; } if (strcmp(colorBy,"year")==0) { int relYear = (atoi(year)-1990); extra->shade = min(relYear/3, 10); //extra->color = shadesOfGray[yearShade]; } } } } } sqlFreeResult(&sr); return extra; } static void pubsAddExtra(struct track* tg, struct linkedFeatures* lf) /* add authorYear and title to linkedFeatures->extra */ { char *articleTable = trackDbSettingClosestToHome(tg->tdb, "pubsArticleTable"); if(isEmpty(articleTable)) return; if (lf->extra != NULL) { return; } struct sqlConnection *conn = hAllocConn(database); struct pubsExtra* extra = pubsMakeExtra(tg, articleTable, conn, lf); lf->extra = extra; hFreeConn(&conn); } +static void dyStringPrintfWithSep(struct dyString *ds, char* sep, char *format, ...) +/* Printf to end of dyString. Prefix with sep if dyString is not empty. */ +{ +if (ds->stringSize!=0) + dyStringAppend(ds, sep); +va_list args; +va_start(args, format); +dyStringVaPrintf(ds, format, args); +va_end(args); +} + static void pubsLoadKeywordYearItems(struct track *tg) /* load items that fulfill keyword and year filter */ { pubsParseClassColors(); struct sqlConnection *conn = hAllocConn(database); -char *keywords = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "pubsKeywords"); -char *yearFilter = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "pubsYear"); +char *keywords = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "pubsFilterKeywords"); +char *yearFilter = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "pubsFilterYear"); +char *publFilter = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "pubsFilterPublisher"); char *articleTable = pubsArticleTable(tg); if(yearFilter == NULL || sameWord(yearFilter, "anytime")) yearFilter = NULL; if(isNotEmpty(keywords)) keywords = makeMysqlMatchStr(sqlEscapeString(keywords)); if(isEmpty(yearFilter) && isEmpty(keywords)) { loadGappedBed(tg); } else { char* oldLabel = tg->longLabel; tg->longLabel = catTwoStrings(oldLabel, " (filter activated)"); freeMem(oldLabel); - char extra[2048], yearWhere[256], keywordsWhere[1024], prefix[256]; + char yearWhere[256], keywordsWhere[1024], prefix[256]; char **row; struct linkedFeatures *lfList = NULL; struct trackDb *tdb = tg->tdb; int scoreMin = atoi(trackDbSettingClosestToHomeOrDefault(tdb, "scoreMin", "0")); int scoreMax = atoi(trackDbSettingClosestToHomeOrDefault(tdb, "scoreMax", "1000")); boolean useItemRgb = bedItemRgb(tdb); + char *extra; + struct dyString *extraDy = dyStringNew(0); + if (sqlColumnExists(conn, tg->table, "year")) + // new table schema: filter fields are on main bed table + { + if (isNotEmpty(keywords)) + dyStringPrintf(extraDy, "name IN (SELECT articleId FROM %s WHERE " + "MATCH (citation, title, authors, abstract) AGAINST ('%s' IN BOOLEAN MODE))", + articleTable, keywords); + if (isNotEmpty(yearFilter)) + dyStringPrintfWithSep(extraDy, " AND ", " year >= '%s'", sqlEscapeString(yearFilter)); + if (isNotEmpty(publFilter)) + dyStringPrintfWithSep(extraDy, " AND ", " publisher = '%s'", sqlEscapeString(publFilter)); + extra = extraDy->string; + } + else + // old table schema, filter by doing a join on article table + { + char extraTmp[4096]; safef(prefix, sizeof(prefix), "name IN (SELECT articleId FROM %s WHERE", articleTable); if(isNotEmpty(keywords)) safef(keywordsWhere, sizeof(keywordsWhere), \ "MATCH (citation, title, authors, abstract) AGAINST ('%s' IN BOOLEAN MODE)", keywords); if(isNotEmpty(yearFilter)) safef(yearWhere, sizeof(yearWhere), "year >= '%s'", sqlEscapeString(yearFilter)); if(isEmpty(keywords)) - safef(extra, sizeof(extra), "%s %s)", prefix, yearWhere); + safef(extraTmp, sizeof(extraTmp), "%s %s)", prefix, yearWhere); else if(isEmpty(yearFilter)) - safef(extra, sizeof(extra), "%s %s)", prefix, keywordsWhere); + safef(extraTmp, sizeof(extraTmp), "%s %s)", prefix, keywordsWhere); else - safef(extra, sizeof(extra), "%s %s AND %s)", prefix, yearWhere, keywordsWhere); + safef(extraTmp, sizeof(extraTmp), "%s %s AND %s)", prefix, yearWhere, keywordsWhere); + extra = extraTmp; + } int rowOffset = 0; struct sqlResult *sr = hExtendedRangeQuery(conn, tg->table, chromName, winStart, winEnd, extra, FALSE, NULL, &rowOffset); + freeDyString(&extraDy); + while ((row = sqlNextRow(sr)) != NULL) { struct bed *bed = bedLoad12(row+rowOffset); slAddHead(&lfList, bedMungToLinkedFeatures(&bed, tdb, 12, scoreMin, scoreMax, useItemRgb)); } sqlFreeResult(&sr); slReverse(&lfList); slSort(&lfList, linkedFeaturesCmp); tg->items = lfList; } hFreeConn(&conn); } #define PUBSFILTERNAME "pubsFilterArticleId" static void activatePslTrackIfCgi(struct track *tg) /* the publications hgc creates links back to the browser with * the cgi param pubsFilterArticleId to show only a single type * of feature for the pubsBlatPsl track. - * If the parameter was supplied, we save this parameter here + * If the parameter was supplied, we save it here * into the cart and activate the track. */ { char *articleId = cgiOptionalString(PUBSFILTERNAME); //if (articleId==NULL) //articleId = cartOptionalString(cart, PUBSFILTERNAME); if (articleId!=NULL) { cartSetString(cart, PUBSFILTERNAME, articleId); tdbSetCartVisibility(tg->tdb, cart, hCarefulTrackOpenVis(database, tg->track)); tg->visibility=tvPack; } } Color pubsItemColor(struct track *tg, void *item, struct hvGfx *hvg) /* get color from extra field */ { //pubsParseClassColors(); struct linkedFeatures *lf = item; pubsAddExtra(tg, lf); struct pubsExtra* extra = lf->extra; if (extra==NULL || (extra->color==NULL && extra->shade==-1)) return MG_BLACK; if (extra->shade != -1) return shadesOfBlue[extra->shade]; else { //printf("got item color %d", extra->color->r); return hvGfxFindRgb(hvg, extra->color); } } static char *pubsItemName(struct track *tg, void *item) /* get author/year from extra field */ { struct linkedFeatures *lf = item; pubsAddExtra(tg, lf); struct pubsExtra* extra = lf->extra; if (extra!=NULL) return extra->label; else return lf->name; } static void pubsMapItem(struct track *tg, struct hvGfx *hvg, void *item, char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height) /* create mouse over with title for pubs blat features. */ { if (!theImgBox || tg->limitedVis != tvDense || !tdbIsCompositeChild(tg->tdb)) { struct linkedFeatures *lf = item; pubsAddExtra(tg, lf); struct pubsExtra* extra = lf->extra; char* mouseOver = NULL; if (extra != NULL) mouseOver = extra->mouseOver; else mouseOver = itemName; mapBoxHc(hvg, start, end, x, y, width, height, tg->track, mapItemName, mouseOver); } } static char *pubsMarkerItemName(struct track *tg, void *item) /* retrieve article count from score field and return.*/ { struct bed *bed = item; char newName[64]; safef(newName, sizeof(newName), "%d articles", (int) bed->score); return cloneString(newName); } static void pubsMarkerMapItem(struct track *tg, struct hvGfx *hvg, void *item, char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height) { struct bed *bed = item; genericMapItem(tg, hvg, item, bed->name, bed->name, start, end, x, y, width, height); } static struct hash* pubsLookupSequences(struct track *tg, struct sqlConnection* conn, char* articleId, bool getSnippet) /* create a hash with a mapping annotId -> snippet or annotId -> shortSeq for an articleId*/ { char query[LARGEBUF]; char *sequenceTable = trackDbRequiredSetting(tg->tdb, "pubsSequenceTable"); char *selectValSql = NULL; if (getSnippet) selectValSql = "replace(replace(snippet, \"\", \"\\n>>> \"), \"\", \" <<<\\n\")"; else selectValSql = "concat(substr(sequence,1,4),\"...\",substr(sequence,-4))"; safef(query, sizeof(query), "SELECT annotId, %s FROM %s WHERE articleId='%s' ", selectValSql, sequenceTable, articleId); struct hash *seqIdHash = sqlQuickHash(conn, query); //freeMem(sequenceTable); // XX Why does this crash?? return seqIdHash; } static char *pubsArticleDispId(struct track *tg, struct sqlConnection *conn, char* articleId) /* given an articleId, lookup author and year and create label for it */ { char* dispLabel = NULL; char *articleTable = pubsArticleTable(tg); char query[LARGEBUF]; safef(query, sizeof(query), "SELECT firstAuthor, year FROM %s WHERE articleId = '%s'", articleTable, articleId); struct sqlResult *sr = sqlGetResult(conn, query); if (sr!=NULL) { char **row = NULL; row = sqlNextRow(sr); if (row != NULL) dispLabel = pubsFeatureLabel(row[0], row[1]); else dispLabel = articleId; } else dispLabel = articleId; sqlFreeResult(&sr); return dispLabel; } static void pubsPslLoadItems(struct track *tg) /* load only psl items from a single article */ { // get articleId to filter on char *articleId = cartOptionalString(cart, PUBSFILTERNAME); if (articleId==NULL) return; struct sqlConnection *conn = hAllocConn(database); char* dispLabel = pubsArticleDispId(tg, conn, articleId); struct hash *idToSnip = pubsLookupSequences(tg, conn, articleId, TRUE); struct hash *idToSeq = pubsLookupSequences(tg, conn, articleId, FALSE); // change track label char* oldLabel = tg->longLabel; tg->longLabel = catTwoStrings("Individual matches for article ", dispLabel); freeMem(oldLabel); // filter and load items for this articleId char where[256]; safef(where, sizeof(where), " articleId=%s ", articleId); int rowOffset = 0; struct sqlResult *sr = NULL; sr = hRangeQuery(conn, tg->table, chromName, winStart, winEnd, where, &rowOffset); struct linkedFeatures *lfList = NULL; char **row = NULL; while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row+rowOffset); slAddHead(&lfList, lfFromPsl(psl, TRUE)); char* shortSeq = hashFindVal(idToSeq, lfList->name); char* snip = hashFindVal(idToSnip, lfList->name); struct pubsExtra *extra = needMem(sizeof(struct pubsExtra)); extra->mouseOver=snip; extra->label=shortSeq; lfList->extra = extra; } sqlFreeResult(&sr); slReverse(&lfList); slSort(&lfList, linkedFeaturesCmp); tg->items = lfList; hFreeConn(&conn); } void pubsBlatPslMethods(struct track *tg) /* a track that shows only the indiv matches for one single article */ { activatePslTrackIfCgi(tg); tg->loadItems = pubsPslLoadItems; tg->itemName = pubsItemName; tg->mapItem = pubsMapItem; } void pubsBlatMethods(struct track *tg) /* publication blat tracks are bed12+2 tracks of sequences in text, mapped with BLAT */ { //bedMethods(tg); tg->loadItems = pubsLoadKeywordYearItems; tg->itemName = pubsItemName; tg->itemColor = pubsItemColor; tg->mapItem = pubsMapItem; } void pubsMarkerMethods(struct track *tg) /* publication marker tracks are bed5 tracks of genome marker occurences like rsXXXX found in text*/ { tg->mapItem = pubsMarkerMapItem; tg->itemName = pubsMarkerItemName; }