32c0818813157d38ca7581b1e2eb6288c6ddfef7 max Tue Apr 30 11:31:44 2013 -0700 moving the publication track code into its own .h/.c files diff --git src/hg/hgTracks/pubsTracks.c src/hg/hgTracks/pubsTracks.c new file mode 100644 index 0000000..44ead49 --- /dev/null +++ src/hg/hgTracks/pubsTracks.c @@ -0,0 +1,485 @@ +/* pubsTracks - code for the publications tracks */ +#include "common.h" +#include "hgTracks.h" +#include "hgFind.h" +#include "bedCart.h" + +static char* pubsArticleTable(struct track *tg) +/* return the name of the pubs articleTable, either + * the value from the trackDb statement 'articleTable' + * or the default value: Article */ +{ +char *articleTable = trackDbSettingClosestToHome(tg->tdb, "pubsArticleTable"); +if (isEmpty(articleTable)) + { + char buf[256]; + safef(buf, sizeof(buf), "%sArticle", tg->track); + articleTable = cloneString(buf); + } +return articleTable; +} + +static char *makeMysqlMatchStr(char *str) +{ +// return a string with all words prefixed with a '+' to force a boolean AND query; +// we also strip leading/trailing spaces. +char *matchStr = needMem(strlen(str) * 2 + 1); +int i = 0; +for(;*str && isspace(*str);str++) + ; while(*str) + { + matchStr[i++] = '+'; + for(; *str && !isspace(*str);str++) + matchStr[i++] = *str; + for(;*str && isspace(*str);str++) + ; + } +matchStr[i++] = 0; +return matchStr; +} + +struct pubsExtra +/* additional info needed for publication blat linked features: author+year and title */ +{ + char *label; // usually author+year + char *mouseOver; // usually title of article + char *class; // class of article, usually a curated database + // color depends on cart settings, either based on topic, impact or year + // support to ways to color: either by shade (year, impact) or directly with rgb values + int shade; // year or impact are shades which we can't resolve to rgb easily + struct rgbColor *color; +}; + +/* assignment of pubs classes to colors */ +static struct hash* pubsClassColors = NULL; + +static void pubsParseClassColors() +/* parse class colors from hgFixed.pubsClassColors into the hash pubsClassColors */ +{ +if (pubsClassColors!=NULL) + return; + +pubsClassColors = hashNew(0); +struct sqlConnection *conn = hAllocConn(database); +if (!sqlTableExists(conn, "hgFixed.pubsClassColors")) + { + return; + } +char *query = "SELECT class, rgbColor FROM hgFixed.pubsClassColors"; +struct sqlResult *sr = sqlGetResult(conn, query); +char **row = NULL; +while ((row = sqlNextRow(sr)) != NULL) + { + char *class = row[0]; + char *colStr = row[1]; + // copied from genePredItemClassColor - is there no function for this? + // convert comma sep rgb string to array + char *rgbVals[5]; + chopString(colStr, ",", rgbVals, sizeof(rgbVals)); + struct rgbColor *rgb; + AllocVar(rgb); + rgb->r = (sqlUnsigned(rgbVals[0])); + rgb->g = (sqlUnsigned(rgbVals[1])); + rgb->b = (sqlUnsigned(rgbVals[2])); + //printf("Adding hash: %s -> %d,%d,%d", class, rgb->r, rgb->g, rgb->b); + hashAdd(pubsClassColors, cloneString(class), rgb); + } +sqlFreeResult(&sr); +} + +static char* pubsFeatureLabel(char* author, char* year) +/* create label given authors and year strings */ +{ +char* authorYear = NULL; + +if (isEmpty(author)) + author = "NoAuthor"; +if (isEmpty(year)) + year = "NoYear"; +authorYear = catTwoStrings(author, year); + +return authorYear; +} + +static struct pubsExtra *pubsMakeExtra(struct track* tg, char* articleTable, + struct sqlConnection* conn, struct linkedFeatures* lf) +/* bad solution: a function that is called before the extra field is + * accessed and that fills it from a sql query. Will need to redo this like gencode, + * drawing from atom, variome and bedLoadN or bedDetails */ +{ +char query[LARGEBUF]; +struct sqlResult *sr = NULL; +char **row = NULL; +struct pubsExtra *extra = NULL; + +/* support two different storage places for article data: either the bed table directly + * includes the title + author of the article or we have to look it up from the articles + * table. Having a copy of the title in the bed table is faster */ +bool newFormat = false; +if (sqlColumnExists(conn, tg->table, "title")) + { + safef(query, sizeof(query), "SELECT firstAuthor, year, title, impact, classes FROM %s " + "WHERE chrom = '%s' and chromStart = '%d' and name='%s'", tg->table, chromName, lf->start, lf->name); + newFormat = true; + } +else + { + safef(query, sizeof(query), "SELECT firstAuthor, year, title FROM %s WHERE articleId = '%s'", + articleTable, lf->name); + newFormat = false; + } + +sr = sqlGetResult(conn, query); +if ((row = sqlNextRow(sr)) != NULL) + { + char* firstAuthor = row[0]; + char* year = row[1]; + char* title = row[2]; + char* impact = NULL; + char* classes = NULL; + + + extra = needMem(sizeof(struct pubsExtra)); + extra->label = pubsFeatureLabel(firstAuthor, year); + if (isEmpty(title)) + extra->mouseOver = extra->label; + else + extra->mouseOver = cloneString(title); + extra->color = NULL; + extra->shade = -1; + + if (newFormat) + { + impact = row[3]; + classes = row[4]; + if (!isEmpty(impact)) + { + char *colorBy = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "pubsColorBy"); + if ((colorBy==NULL) || strcmp(colorBy,"topic")==0) + { + char *classCopy = classes; + char* mainClass = cloneNextWordByDelimiter(&classes, ','); + classes = classCopy; + if (mainClass!=NULL) + { + struct rgbColor *col = (struct rgbColor*) hashFindVal(pubsClassColors, mainClass); + extra->color = col; + // add class to mouseover text + struct dyString *mo = dyStringNew(0); + dyStringAppend(mo, extra->mouseOver); + dyStringAppend(mo, " (categories: "); + dyStringAppend(mo, classes); + dyStringAppend(mo, ")"); + freeMem(extra->mouseOver); + extra->mouseOver = dyStringContents(mo); + } + } + else + { + if (strcmp(colorBy,"impact")==0) + { + char impInt = atoi(impact); + extra->shade = impInt/25; + } + if (strcmp(colorBy,"year")==0) + { + int relYear = (atoi(year)-1990); + extra->shade = min(relYear/3, 10); + //extra->color = shadesOfGray[yearShade]; + } + } + } + } + } + + +sqlFreeResult(&sr); +return extra; +} + +static void pubsAddExtra(struct track* tg, struct linkedFeatures* lf) +/* add authorYear and title to linkedFeatures->extra */ +{ +char *articleTable = trackDbSettingClosestToHome(tg->tdb, "pubsArticleTable"); +if(isEmpty(articleTable)) + return; +if (lf->extra != NULL) { + return; + } + +struct sqlConnection *conn = hAllocConn(database); +struct pubsExtra* extra = pubsMakeExtra(tg, articleTable, conn, lf); +lf->extra = extra; +hFreeConn(&conn); +} + +static void pubsLoadKeywordYearItems(struct track *tg) +/* load items that fulfill keyword and year filter */ +{ +pubsParseClassColors(); +struct sqlConnection *conn = hAllocConn(database); +char *keywords = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "pubsKeywords"); +char *yearFilter = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "pubsYear"); +char *articleTable = pubsArticleTable(tg); + +if(yearFilter == NULL || sameWord(yearFilter, "anytime")) + yearFilter = NULL; + +if(isNotEmpty(keywords)) + keywords = makeMysqlMatchStr(sqlEscapeString(keywords)); + +if(isEmpty(yearFilter) && isEmpty(keywords)) +{ + loadGappedBed(tg); +} +else + { + char* oldLabel = tg->longLabel; + tg->longLabel = catTwoStrings(oldLabel, " (filter activated)"); + freeMem(oldLabel); + + char extra[2048], yearWhere[256], keywordsWhere[1024], prefix[256]; + char **row; + struct linkedFeatures *lfList = NULL; + struct trackDb *tdb = tg->tdb; + int scoreMin = atoi(trackDbSettingClosestToHomeOrDefault(tdb, "scoreMin", "0")); + int scoreMax = atoi(trackDbSettingClosestToHomeOrDefault(tdb, "scoreMax", "1000")); + boolean useItemRgb = bedItemRgb(tdb); + + safef(prefix, sizeof(prefix), "name IN (SELECT articleId FROM %s WHERE", articleTable); + if(isNotEmpty(keywords)) + safef(keywordsWhere, sizeof(keywordsWhere), \ + "MATCH (citation, title, authors, abstract) AGAINST ('%s' IN BOOLEAN MODE)", keywords); + if(isNotEmpty(yearFilter)) + safef(yearWhere, sizeof(yearWhere), "year >= '%s'", sqlEscapeString(yearFilter)); + + if(isEmpty(keywords)) + safef(extra, sizeof(extra), "%s %s)", prefix, yearWhere); + else if(isEmpty(yearFilter)) + safef(extra, sizeof(extra), "%s %s)", prefix, keywordsWhere); + else + safef(extra, sizeof(extra), "%s %s AND %s)", prefix, yearWhere, keywordsWhere); + + int rowOffset = 0; + struct sqlResult *sr = hExtendedRangeQuery(conn, tg->table, chromName, winStart, winEnd, extra, + FALSE, NULL, &rowOffset); + while ((row = sqlNextRow(sr)) != NULL) + { + struct bed *bed = bedLoad12(row+rowOffset); + slAddHead(&lfList, bedMungToLinkedFeatures(&bed, tdb, 12, scoreMin, scoreMax, useItemRgb)); + } + sqlFreeResult(&sr); + slReverse(&lfList); + slSort(&lfList, linkedFeaturesCmp); + tg->items = lfList; + } +hFreeConn(&conn); +} + +#define PUBSFILTERNAME "pubsFilterArticleId" + +static void activatePslTrackIfCgi(struct track *tg) +/* the publications hgc creates links back to the browser with + * the cgi param pubsFilterArticleId to show only a single type + * of feature for the pubsBlatPsl track. + * If the parameter was supplied, we save this parameter here + * into the cart and activate the track. + */ +{ +char *articleId = cgiOptionalString(PUBSFILTERNAME); +//if (articleId==NULL) + //articleId = cartOptionalString(cart, PUBSFILTERNAME); + +if (articleId!=NULL) +{ + cartSetString(cart, PUBSFILTERNAME, articleId); + tdbSetCartVisibility(tg->tdb, cart, hCarefulTrackOpenVis(database, tg->track)); + tg->visibility=tvPack; +} +} + +Color pubsItemColor(struct track *tg, void *item, struct hvGfx *hvg) +/* get color from extra field */ +{ +//pubsParseClassColors(); +struct linkedFeatures *lf = item; +pubsAddExtra(tg, lf); + +struct pubsExtra* extra = lf->extra; +if (extra==NULL || (extra->color==NULL && extra->shade==-1)) + return MG_BLACK; + +if (extra->shade != -1) + return shadesOfBlue[extra->shade]; +else + { + //printf("got item color %d", extra->color->r); + return hvGfxFindRgb(hvg, extra->color); + } +} + +static char *pubsItemName(struct track *tg, void *item) +/* get author/year from extra field */ +{ +struct linkedFeatures *lf = item; +pubsAddExtra(tg, lf); + +struct pubsExtra* extra = lf->extra; +if (extra!=NULL) + return extra->label; +else + return lf->name; + +} + +static void pubsMapItem(struct track *tg, struct hvGfx *hvg, void *item, + char *itemName, char *mapItemName, int start, int end, + int x, int y, int width, int height) +/* create mouse over with title for pubs blat features. */ +{ +if (!theImgBox || tg->limitedVis != tvDense || !tdbIsCompositeChild(tg->tdb)) +{ + struct linkedFeatures *lf = item; + pubsAddExtra(tg, lf); + struct pubsExtra* extra = lf->extra; + char* mouseOver = NULL; + if (extra != NULL) + mouseOver = extra->mouseOver; + else + mouseOver = itemName; + + mapBoxHc(hvg, start, end, x, y, width, height, tg->track, mapItemName, mouseOver); +} +} + +static char *pubsMarkerItemName(struct track *tg, void *item) +/* retrieve article count from score field and return.*/ +{ +struct bed *bed = item; +char newName[64]; +safef(newName, sizeof(newName), "%d articles", (int) bed->score); +return cloneString(newName); +} + +static void pubsMarkerMapItem(struct track *tg, struct hvGfx *hvg, void *item, + char *itemName, char *mapItemName, int start, int end, + int x, int y, int width, int height) +{ +struct bed *bed = item; +genericMapItem(tg, hvg, item, bed->name, bed->name, start, end, x, y, width, height); +} + +static struct hash* pubsLookupSequences(struct track *tg, struct sqlConnection* conn, char* articleId, bool getSnippet) +/* create a hash with a mapping annotId -> snippet or annotId -> shortSeq for an articleId*/ +{ + char query[LARGEBUF]; + char *sequenceTable = trackDbRequiredSetting(tg->tdb, "pubsSequenceTable"); + char *selectValSql = NULL; + if (getSnippet) + selectValSql = "replace(replace(snippet, \"\", \"\\n>>> \"), \"\", \" <<<\\n\")"; + else + selectValSql = "concat(substr(sequence,1,4),\"...\",substr(sequence,-4))"; + + safef(query, sizeof(query), "SELECT annotId, %s FROM %s WHERE articleId='%s' ", + selectValSql, sequenceTable, articleId); + struct hash *seqIdHash = sqlQuickHash(conn, query); + //freeMem(sequenceTable); // XX Why does this crash?? + return seqIdHash; +} + +static char *pubsArticleDispId(struct track *tg, struct sqlConnection *conn, char* articleId) +/* given an articleId, lookup author and year and create label for it */ +{ +char* dispLabel = NULL; +char *articleTable = pubsArticleTable(tg); +char query[LARGEBUF]; +safef(query, sizeof(query), "SELECT firstAuthor, year FROM %s WHERE articleId = '%s'", + articleTable, articleId); +struct sqlResult *sr = sqlGetResult(conn, query); +if (sr!=NULL) + { + char **row = NULL; + row = sqlNextRow(sr); + if (row != NULL) + dispLabel = pubsFeatureLabel(row[0], row[1]); + else + dispLabel = articleId; + } +else + dispLabel = articleId; +sqlFreeResult(&sr); +return dispLabel; +} + +static void pubsPslLoadItems(struct track *tg) +/* load only psl items from a single article */ +{ +// get articleId to filter on +char *articleId = cartOptionalString(cart, PUBSFILTERNAME); +if (articleId==NULL) + return; + +struct sqlConnection *conn = hAllocConn(database); +char* dispLabel = pubsArticleDispId(tg, conn, articleId); +struct hash *idToSnip = pubsLookupSequences(tg, conn, articleId, TRUE); +struct hash *idToSeq = pubsLookupSequences(tg, conn, articleId, FALSE); + +// change track label +char* oldLabel = tg->longLabel; +tg->longLabel = catTwoStrings("Individual matches for article ", dispLabel); +freeMem(oldLabel); + +// filter and load items for this articleId +char where[256]; +safef(where, sizeof(where), " articleId=%s ", articleId); + +int rowOffset = 0; +struct sqlResult *sr = NULL; +sr = hRangeQuery(conn, tg->table, chromName, winStart, winEnd, where, &rowOffset); + +struct linkedFeatures *lfList = NULL; +char **row = NULL; +while ((row = sqlNextRow(sr)) != NULL) + { + struct psl *psl = pslLoad(row+rowOffset); + slAddHead(&lfList, lfFromPsl(psl, TRUE)); + char* shortSeq = hashFindVal(idToSeq, lfList->name); + char* snip = hashFindVal(idToSnip, lfList->name); + struct pubsExtra *extra = needMem(sizeof(struct pubsExtra)); + extra->mouseOver=snip; + extra->label=shortSeq; + lfList->extra = extra; + } +sqlFreeResult(&sr); +slReverse(&lfList); +slSort(&lfList, linkedFeaturesCmp); +tg->items = lfList; +hFreeConn(&conn); +} + +void pubsBlatPslMethods(struct track *tg) +/* a track that shows only the indiv matches for one single article */ +{ +activatePslTrackIfCgi(tg); +tg->loadItems = pubsPslLoadItems; +tg->itemName = pubsItemName; +tg->mapItem = pubsMapItem; +} + +void pubsBlatMethods(struct track *tg) +/* publication blat tracks are bed12+2 tracks of sequences in text, mapped with BLAT */ +{ +//bedMethods(tg); +tg->loadItems = pubsLoadKeywordYearItems; +tg->itemName = pubsItemName; +tg->itemColor = pubsItemColor; +tg->mapItem = pubsMapItem; +} + +void pubsMarkerMethods(struct track *tg) +/* publication marker tracks are bed5 tracks of genome marker occurences like rsXXXX found in text*/ +{ +tg->mapItem = pubsMarkerMapItem; +tg->itemName = pubsMarkerItemName; +} +