d72c06628ce3c8a977c5eec289b6e8c7fc2cc5e3 galt Fri May 27 16:36:45 2022 -0700 oops minor NOSQLINJv2 fix for pubs.c, I missed proper initialization of artFilterSql should have been sqlSafef(artFilterSql, sizeof(artFilterSql), "%s", ""); Revamped to be better and simpler with dyString. refs #29274 diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c index a8668d6..77a4d6f 100644 --- src/hg/hgc/pubs.c +++ src/hg/hgc/pubs.c @@ -1,1219 +1,1221 @@ /* pubs.c - display details of publiations literature track (pubsxxx tables) */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "jksql.h" #include "hdb.h" #include "hgc.h" #include "hgColors.h" #include "trackDb.h" #include "web.h" #include "hash.h" #include "net.h" #include "obscure.h" #include "common.h" #include "string.h" #include "dystring.h" #include "dnautil.h" //#include "ctype.h" // cgi var to activate debug output static int pubsDebug = 0; // global var for printArticleInfo to indicate if article has suppl info // Most publishers have supp data. // If they don't have it, we can skip the fileType column in the table bool pubsHasSupp = TRUE; // global var for printArticleInfo to indicate if article is elsevier // If it's elsevier, we print the copyright line bool pubsIsElsevier = FALSE; // the article source is used to modify other parts of the page static char *articleSource; // we need the external article PMC Id for YIF links static char *extId = NULL; // section types in mysql table, for all annotations tables // we note where the hit is located in the document static char *pubsSecNames[] ={ "header", "abstract", "intro", "methods", "results", "discussion", "conclusions", "ack", "refs", "supplement", "unknown" }; // labels to show to user, have to correspond to pubsSecNames static char *secLabels[] ={ "Title", "Abstract", "Introduction", "Methods", "Results", "Discussion", "Conclusions", "Acknowledgements", "References", "Supplement", "Undetermined section (e.g. for a brief communication)" }; // whether a checkbox is checked by default, have to correspond to pubsSecNames static int pubsSecChecked[] ={ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; static char *pubsSequenceTable; /* ------ functions to replace HTML4 tables with HTML5 constructs */ /* Web wrappers incorporating tag, id, and class HTML attributes, to support * styling and test */ /* Suffix -S for "function accepts style parameter" * Suffix -C for "function accepts class parameter" * Suffix -CI for "function accepts class and id parameter" * * Some functions are commented out because they are not yet used. */ static void web2Start(char *tag) { printf("<%s>\n", tag); } static void web2End(char *tag) { printf("</%s>\n", tag); } static void web2StartS(char *style, char *tag) { printf("<%s style=\"%s\">\n", tag, style); } static void web2StartC(char *class, char *tag) { printf("<%s class=\"%s\">\n", tag, class); } static void web2StartCI(char *class, char *id, char *tag) { if ((id==NULL) && (class==NULL)) web2Start(tag); else if (id==NULL) web2StartC(class, tag); else printf("<%s class=\"%s\" id=\"%s\">\n", tag, class, id); } static void web2PrintS(char *style, char *tag, char *label) { printf("<%s style=\"%s\">%s</%s>\n", tag, style, label, tag); } //static void web2PrintC(char *class, char *tag, char *label) //{ //printf("<%s class=\"%s\">%s</%s>\n", tag, class, label, tag); //} //static void web2Print(char *tag, char *label) //{ //printf("<%s>%s</%s>\n", tag, label, tag); //} static void web2StartTableC(char *class) { web2StartC(class, "table"); } static void web2StartTheadC(char *class) { web2StartC(class, "thead"); } static void web2EndThead() { web2End("thead"); } static void web2StartTbodyS(char *style) { web2StartS(style, "tbody"); } static void web2StartCell() { web2Start("td"); } static void web2EndCell() { web2End("td"); } static void web2StartCellS(char *style) { web2StartS(style, "td"); } //static void web2PrintCell(char *label) { web2Print("td", label); } static void web2PrintCellS(char *style, char *label) { web2PrintS(style, "td", label); } static void web2StartRow() { web2Start("tr"); } static void web2EndRow() { web2End("tr"); } //static void web2StartTbody() { web2Start("tbody"); } static void web2EndTbody() { web2End("tbody"); } //static void web2StartTable() { web2Start("table"); } static void web2EndTable() { web2EndTbody(); web2End("table"); } static void web2StartDivCI (char *class, char *id) { web2StartCI(class, id, "div"); } static void web2StartDivC (char *class) { web2StartC(class, "div"); } static void web2EndDiv(char *comment) { printf("</div> <!-- %s -->\n", comment); } //static void web2Img(char *url, char *alt, int width, int hspace, int vspace) //{ //printf("<img src=\"%s\" alt=\"%s\" width=\"%d\" hspace=\"%d\" vspace=\"%d\">\n", url, alt, width, hspace, vspace); //} static void web2ImgLink(char *url, char *imgUrl, char *alt, int width, int hspace, int vspace) { printf("<a href=\"%s\"><img src=\"%s\" alt=\"%s\" width=\"%d\" hspace=\"%d\" vspace=\"%d\"></a>\n", url, imgUrl, alt, width, hspace, vspace); } static void web2PrintHeaderCell(char *label, int width) /* Print th heading cell with given width in percent */ { printf("<th width=\"%d%%\">", width); printf("%s</th>", label); } static void web2PrintCellF(char *format, ...) /* print a td with format */ { va_list args; va_start(args, format); web2StartCell(); vprintf(format, args); web2EndCell(); va_end(args); } static void web2StartSection(char *id, char *format, ...) /* create a new section on the web page */ { va_list args; va_start(args, format); puts("<!-- START NEW SECTION -->\n"); web2StartDivCI("section", id); web2StartDivC("subheadingBar windowSize"); vprintf(format, args); web2EndDiv("subheadingBar"); va_end(args); } static void web2EndSection() /* end section */ { web2EndDiv("section"); } /* ------ */ static void printDebug(char *text) { if (pubsDebug) printf("%s<BR>", text); } static char *mangleUrl(char *url) /* add publisher specific parameters to url and return new url*/ { if (!stringIn("sciencedirect.com", url)) return url; // cgi param to add the "UCSC matches" sciverse application to elsevier's sciencedirect char *sdAddParam = "?svAppaddApp=298535"; char *longUrl = catTwoStrings(url, sdAddParam); char *newUrl = replaceChars(longUrl, "article", "svapps"); return newUrl; } static void printPositionAndSize(int start, int end, bool showSize) { printf("<B>Position:</B> " "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">", hgTracksPathAndSettings(), database, seqName, start+1, end); char startBuf[64], endBuf[64]; sprintLongWithCommas(startBuf, start + 1); sprintLongWithCommas(endBuf, end); printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf); long size = end - start; sprintLongWithCommas(startBuf, size); if (showSize) printf("<B>Genomic Size:</B> %s<BR>\n", startBuf); } static void printFilterLink(char *pslTrack, char *articleId, char *articleTable) /* print a link to hgTracks with an additional cgi param to activate the single article filter */ { int start = cgiOptionalInt("o", -1); if (start==-1) return; int end = cgiInt("t"); char qBuf[1024]; struct sqlConnection *conn = hAllocConn(database); sqlSafef(qBuf, sizeof(qBuf), "SELECT CONCAT(firstAuthor, year) FROM %s WHERE articleId='%s';", articleTable, articleId); char *dispId = sqlQuickString(conn, qBuf); printf(" <div class=\"subsection\">"); if (!containsStringNoCase(pslTrack, "Bing")) { printf( " <P><A HREF=\"%s&db=%s&position=%s%%3A%d-%d&pubsFilterArticleId=%s&%s=pack&hgFind.matches=%s\">", hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack, dispId); printf("Show these sequence matches individually on genome browser</A> (activates track \"" "Individual matches for article\")</P>"); } printPositionAndSize(start, end, 1); printf( " </div> <!-- class: subsection --> \n"); hFreeConn(&conn); } static char *makeSqlMarkerList(void) /* return list of sections from cgi vars, format like "'abstract','header'" */ { int secCount = sizeof(pubsSecNames)/sizeof(char *); struct dyString *dy = dyStringNew(1024); int i; int found = 0; for (i=0; i<secCount; i++) { // add ' around name and add to list char *secName = pubsSecNames[i]; if (cgiOptionalInt(secName, pubsSecChecked[i])) { if (i != 0) sqlDyStringPrintf(dy, ","); sqlDyStringPrintf(dy, "'%s'", secName); ++found; } } if (found==0) errAbort("You need to specify at least one article section."); return dyStringCannibalize(&dy); } static struct sqlResult *queryMarkerRows(struct sqlConnection *conn, char *markerTable, \ char *articleTable, char *item, int itemLimit, char *sectionList, char *artExtIdFilter) /* query marker rows from mysql, based on http parameters * optionally filter on sections or just a single article * */ { -char query[4000]; /* Mysql specific setting to make the group_concat function return longer strings */ //sqlSafef(query, sizeof query, "SET SESSION group_concat_max_len = 100000"); //sqlUpdate(conn, query); -char artFilterSql[4000]; -artFilterSql[0] = 0; -if (isNotEmpty(artExtIdFilter)) - sqlSafef(artFilterSql, sizeof(artFilterSql), " AND extId='%s' ", artExtIdFilter); +struct dyString *query = dyStringNew(4000); // no need to check for illegal characters in sectionList -sqlSafef(query, sizeof(query), "SELECT distinct %s.articleId, url, title, authors, citation, year, " +sqlDyStringPrintf(query, "SELECT distinct %s.articleId, url, title, authors, citation, year, " "pmid FROM %s " //"group_concat(snippet, concat(\" (section: \", section, \")\") SEPARATOR ' (...) ') FROM %s " "JOIN %s USING (articleId) " - "WHERE markerId='%s' AND section in (%-s) " - "%-s" + "WHERE markerId='%s' AND section in (%-s) ", + markerTable, markerTable, articleTable, item, sectionList); + +if (isNotEmpty(artExtIdFilter)) + sqlDyStringPrintf(query, " AND extId='%s' ", artExtIdFilter); + +sqlDyStringPrintf(query, //"GROUP by articleId " "ORDER BY year DESC " "LIMIT %d", - markerTable, markerTable, articleTable, item, sectionList, artFilterSql, itemLimit); + itemLimit); - printDebug(query); + printDebug(dyStringContents(query)); -struct sqlResult *sr = sqlGetResult(conn, query); +struct sqlResult *sr = sqlGetResult(conn, dyStringContents(query)); +dyStringFree(&query); return sr; } static struct sqlResult *querySnippets(struct sqlConnection *conn, char *markerTable, \ char *articleId, char *markerId, char *sectionList) /* query marker snippet rows from mysql for an article, markerId combination */ { char query[4000]; sqlSafef(query, sizeof(query), "SELECT section, snippet FROM %s " "WHERE articleId=%s AND markerId='%s' AND section in (%-s) ", markerTable, articleId, markerId, sectionList); struct sqlResult *sr = sqlGetResult(conn, query); return sr; } static void printSectionCheckboxes() /* show a little form with checkboxes where user can select sections they want to show */ { int labelCount = sizeof(secLabels)/sizeof(char *); int i; printf("<P>\n"); printf("<B>Sections of article searched:</B><BR>\n"); printf("<FORM ACTION=\"hgc?%s&o=%s&t=%s&g=%s&i=%s\" METHOD=\"get\">\n", cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i")); for (i=0; i<labelCount; i++) { char *name = pubsSecNames[i]; // checkboxes default to 0 unless checked, see // http://stackoverflow.com/questions/2520952/how-come-checkbox-state-is-not-always-passed-along-to-php-script printf("<INPUT TYPE=\"hidden\" name=\"%s\" value=\"0\" />\n", pubsSecNames[i]); printf("<INPUT TYPE=\"checkbox\" name=\"%s\" ", name); int isChecked = cgiOptionalInt(name, pubsSecChecked[i]); if (isChecked) printf("value=\"1\" checked=\"yes\">%s</INPUT>\n", secLabels[i]); else printf("value=\"1\">%s</INPUT>\n", secLabels[i]); } printf("<INPUT TYPE=\"hidden\" name=\"o\" value=\"%s\" />\n", cgiString("o")); printf("<INPUT TYPE=\"hidden\" name=\"g\" value=\"%s\" />\n", cgiString("g")); printf("<INPUT TYPE=\"hidden\" name=\"t\" value=\"%s\" />\n", cgiString("t")); printf("<INPUT TYPE=\"hidden\" name=\"i\" value=\"%s\" />\n", cgiString("i")); printf("<INPUT TYPE=\"hidden\" name=\"hgsid\" value=\"%s\" />\n", cart->sessionId); printf("<BR>"); printf("<INPUT TYPE=\"submit\" VALUE=\"Submit\" />\n"); printf("</FORM><P>\n"); } static void printLimitWarning(struct sqlConnection *conn, char *markerTable, char *item, int itemLimit, char *sectionList) { char query[4000]; // no need to check for illegal characters in sectionList sqlSafef(query, sizeof(query), "SELECT COUNT(*) from %s WHERE markerId='%s' AND section in (%-s) ", markerTable, item, sectionList); if (sqlNeedQuickNum(conn, query) > itemLimit) { printf("<b>This marker is mentioned more than %d times</b><BR>\n", itemLimit); printf("The results would take too long to load in your browser and are " "therefore limited to the %d most recent articles.<P>\n", itemLimit); } } /* keep only uppercase letters in string*/ void eraseAllButUpper(char *s) { char *in, *out; char c; in = out = s; for (;;) { c = *in++; if (c == 0) break; if (isupper(c)) *out++ = c; } *out = 0; } static char* printShortArticleInfo(char **row) { /* print a two-line description of article */ char *articleId = row[0]; char *url = row[1]; char *title = row[2]; char *authors = row[3]; char *citation = row[4]; char *year = row[5]; char *pmid = row[6]; url = mangleUrl(url); printf("<A HREF=\"%s\">%s</A><BR> ", url, title); // cut author string at 40 chars, like scholar printf("<span style=\"color:gray\">"); if (strlen(authors)>40) { authors[60] = 0; printf("<SMALL>%s...</SMALL> ", authors); } else if (!isEmpty(authors)) printf("<SMALL>%s</SMALL> ", authors); // first word of citation is journal name char *words[10]; int wordCount = chopCommas(citation, words); char *journal = NULL; if (wordCount!=0) journal = words[0]; // optional: print the little gray line with author, journal, year info bool didPrint = FALSE; printf("<small>"); if (year!=NULL && differentWord(year, "0")) { printf("%s", year); didPrint = TRUE; } if (!isEmpty(journal)) { printf(" - %s ", journal); didPrint = TRUE; } if (!isEmpty(pmid) && strcmp(pmid, "0")!=0 ) { printf(", <A HREF=\"https://www.ncbi.nlm.nih.gov/pubmed/%s\">PMID%s</A>\n", pmid, pmid); didPrint = TRUE; } printf("</small></span>\n"); if (didPrint) printf("<BR>\n"); if (pubsDebug) printf("articleId=%s", articleId); return articleId; } static void printSnippets(struct sqlResult *srSnip) { char **snipRow; struct hash *doneSnips = newHash(0); // avoid printing a sentence twice int snipCount = 0; struct slPair *secSnips = NULL; // add all pairs to the list, remove duplicated snippets (ignore all lowercase chars) while ((snipRow = sqlNextRow(srSnip)) != NULL) { char *section = cloneString(snipRow[0]); char *snippet = cloneString(snipRow[1]); char *snipHash = cloneString(snippet); eraseAllButUpper(snipHash); if (hashLookup(doneSnips, snipHash)!=NULL) { //printf("<b>already seen</b></br>"); continue; } slPairAdd(&secSnips, section, snippet); hashAdd(doneSnips, snipHash, 0); snipCount++; } hashFree(&doneSnips); // now iterate over list and print struct slPair *pair; printf("<DIV CLASS=\"snips\">\n"); int i = 0; for (pair = secSnips; pair != NULL; pair = pair->next) { char *section = pair->name; char *snippet = pair->val; // print snippet printf("<I>"); printAddWbr(snippet, 40); printf("<style=\"color:gray\">...</style>\n"); if (differentWord(section, "unknown")) printf(" <SPAN style=\"color:gray\">(%s)</SPAN>", section); printf("</I>"); printf("<BR>"); if (snipCount>2) { if (i==0) { // alternative to "more": <img src=\"../images/add_sm.gif\"> printf("<A id='snowSnips_more' class=\"showSnips\" href=\"#\" >more</A><BR><DIV class=\"hiddenSnips\" style=\"display:none\">"); jsOnEventById("click", "snowSnips_more", "$(this).nextUntil('.shownSnips').slideToggle(); return false;"); } if (i==snipCount-2) printf("</DIV><div class=\"shownSnips\"></div>"); } i++; } slPairFreeList(&secSnips); sqlFreeResult(&srSnip); printf("</DIV><P>"); } static void printMarkerSnippets(struct sqlConnection *conn, char *articleTable, char *markerTable, char *item) /* print page with article info and snippets from articles */ { /* do not show more snippets than this limit */ int itemLimit=100; char *artExtIdFilter = cgiOptionalString("pubsFilterExtId"); /* This will have to be activated with the move to new Elsevier identifiers, ~Oct 2013 */ //if (startsWith(artExtIdFilter)) //replaceInStr(artExtIdFilter, "ELS", "PII") char *sectionList = makeSqlMarkerList(); if (artExtIdFilter==NULL) { printSectionCheckboxes(); printLimitWarning(conn, markerTable, item, itemLimit, sectionList); printf("<H3>Snippets from Publications:</H3>"); } struct sqlResult *sr = queryMarkerRows(conn, markerTable, articleTable, item, \ itemLimit, sectionList, artExtIdFilter); // better readable if not across the whole screen printf("<DIV style=\"width:1024px; font-size:100%%\">\n"); char **row; // loop over articles and print out snippets for each while ((row = sqlNextRow(sr)) != NULL) { char *articleId = printShortArticleInfo(row); struct sqlConnection *snipConn = hAllocConn(database); struct sqlResult *srSnip = querySnippets(snipConn, markerTable, articleId, item, sectionList); printSnippets(srSnip); hFreeConn(&snipConn); printf("<HR>"); } printf("</DIV>\n"); freeMem(sectionList); sqlFreeResult(&sr); } static char *urlToLogoUrl(char *pubsArticleTable, char *articleId, char *urlOrig) /* return a string with relative path of logo for publisher given the url of * fulltext or a table/articleId, has to be freed */ { struct sqlConnection *conn = hAllocConn(database); char *pubCode = NULL; if (hHasField("hgFixed", pubsArticleTable, "publisher")) { char query[4000]; sqlSafef(query, sizeof(query), "SELECT publisher from %s where articleId=%s", pubsArticleTable, articleId); pubCode = sqlQuickString(conn, query); // if no publisher is specified, we use the source (e.g. bing, pmc, elsevier, etc) // to find the logo if (isEmpty(pubCode)) { sqlSafef(query, sizeof(query), "SELECT source from %s where articleId=%s", pubsArticleTable, articleId); pubCode = sqlQuickString(conn, query); } } else { // get top-level domain url if not publisher field char url[1024]; memcpy(url, urlOrig, sizeof(url)); char *slashParts[20]; // split http://www.sgi.com/test -> to [http:,www.sgi.com,test] int partCount = chopString(url, "/", slashParts, ArraySize(slashParts)); if (partCount<3) return NULL; // split www.sgi.com to [www,sgi,com] char *dotParts[20]; partCount = chopString(slashParts[1], ".", dotParts, ArraySize(dotParts)); if (partCount<3) return NULL; pubCode = dotParts[partCount-2]; } // construct path to image char *logoUrl = needMem(512); safef(logoUrl, 512, "../images/pubs_%s.png", pubCode); return logoUrl; } static char *printArticleInfo(struct sqlConnection *conn, char *item, char *pubsArticleTable) /* Header with information about paper, return documentId */ { char query[512]; sqlSafef(query, sizeof(query), "SELECT articleId, url, title, authors, citation, abstract, pmid, " "source, extId FROM %s WHERE articleId='%s'", pubsArticleTable, item); struct sqlResult *sr = sqlGetResult(conn, query); char **row; char *articleId=NULL; if ((row = sqlNextRow(sr)) == NULL) { printf("Could not resolve articleId %s, this is an internal error.\n", item); printf("Please send an email to max@soe.ucsc.edu\n"); sqlFreeResult(&sr); return NULL; } articleId = cloneString(row[0]); char *url = row[1]; char *title = row[2]; char *authors = row[3]; char *cit = row[4]; char *abstract = row[5]; char *pmid = row[6]; articleSource = row[7]; extId = row[8]; url = mangleUrl(url); if (strlen(abstract)==0) abstract = "(No abstract available for this article. " "Please follow the link to the fulltext above by clicking on the titel or the fulltext image.)"; if (stringIn("sciencedirect.com", url)) { pubsHasSupp = FALSE; pubsIsElsevier = TRUE; } // authors title printf("<DIV style=\"width:1024px; font-size:100%%\">\n"); printf("<P>%s</P>\n", authors); // // logo of publisher char *logoUrl = urlToLogoUrl(pubsArticleTable, articleId, url); if (logoUrl) printf("<a href=\"%s\"><img align=\"right\" hspace=\"20\" src=\"%s\"></a>\n", url, logoUrl); freeMem(logoUrl); printf("<div style=\"width:800px\">"); printf("<A TARGET=\"_blank\" HREF=\"%s\"><B>%s</B></A></div>\n", url, title); printf("</DIV>\n"); printf("</DIV>\n"); printf("<P style=\"width:1024px; font-size:80%%\">%s", cit); if (strlen(pmid)!=0 && strcmp(pmid, "0")) printf(", <A HREF=\"https://www.ncbi.nlm.nih.gov/pubmed/%s\">PMID%s</A>\n", pmid, pmid); printf("</P>\n"); printf("<P style=\"width:1024px; font-size:100%%\">%s</P>\n", abstract); if (pubsIsElsevier) printf("<P><SMALL>Copyright 2012 Elsevier B.V. All rights reserved.</SMALL></P>"); sqlFreeResult(&sr); return articleId; } static struct hash *getSeqIdHash(struct sqlConnection *conn, char *trackTable, \ char *articleId, char *item, char *seqName, int start) /* return a hash with the sequence IDs for a given chain of BLAT matches */ { if (start==-1) return NULL; char query[512]; /* check first if the column exists (some debugging tables on hgwdev don't have seqIds) */ sqlSafef(query, sizeof(query), "SHOW COLUMNS FROM %s LIKE 'seqIds';", trackTable); char *seqIdPresent = sqlQuickString(conn, query); if (!seqIdPresent) { return NULL; } /* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash*/ sqlSafef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' " "and chrom='%s' and chromStart=%d;", trackTable, item, seqName, start); printDebug(query); // split comma-sep list into parts char *seqIdCoordString = sqlQuickString(conn, query); char *seqIdCoords[1024]; if (isEmpty(seqIdCoordString)) return NULL; int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords)); int i; struct hash *seqIdHash = NULL; seqIdHash = newHash(0); for (i=0; i<partCount; i++) { if (pubsDebug) printf("annotId %s<br>", seqIdCoords[i]); hashAdd(seqIdHash, seqIdCoords[i], NULL); } return seqIdHash; } static void printSeqHeaders(bool showDesc, bool isClickedSection) /* print table and headers */ { //style=\"margin: 10px auto; width: 98%%\"style=\"background-color: #fcecc0\" web2StartTableC("stdTbl centeredStdTbl"); web2StartTheadC("stdTblHead"); if (showDesc) web2PrintHeaderCell("Article file", 10); // yif sequences have no flanking text at M. Krauthammer's request if (stringIn("yif", articleSource)) web2PrintHeaderCell("Matching sequences", 60); else web2PrintHeaderCell("One row per sequence, with flanking text, sequence in bold", 60); if (pubsDebug) web2PrintHeaderCell("Identifiers", 30); if (!isClickedSection && !pubsDebug) web2PrintHeaderCell("Link to matching genomic location", 20); web2EndThead(); web2StartTbodyS("font-family: Arial, Helvetica, sans-serif; line-height: 1.5em; font-size: 0.9em;"); } void printHgTracksLink(char *db, char *chrom, int start, int end, char *linkText, char *optUrlStr) /* print link to hgTracks for db at pos */ { char buf[1024]; if (linkText==NULL) { char startBuf[64], endBuf[64]; sprintLongWithCommas(startBuf, start + 1); sprintLongWithCommas(endBuf, end); safef(buf, sizeof(buf), "%s:%s-%s (%s)", chrom, startBuf, endBuf, db); linkText = buf; } if (optUrlStr==NULL) optUrlStr = ""; printf("<A HREF=\"%s&db=%s&position=%s:%d-%d&%s\">%s</A>\n", hgTracksPathAndSettings(), db, chrom, start, end, optUrlStr, linkText); } void printGbLinks(struct slName *locs) /* print hash keys in format hg19/chr1:1-1000 as links */ { struct slName *el; for (el = locs; el != NULL; el = el->next) { char *locString = el->name; char *db = cloneNextWordByDelimiter(&locString, '/'); char *chrom = cloneNextWordByDelimiter(&locString, ':'); char *startStr = cloneNextWordByDelimiter(&locString, '-'); char *endStr = cloneString(locString); int start = atoi(startStr); int end = atoi(endStr); printHgTracksLink(db, chrom, start, end, NULL, NULL); printf("<br>"); freeMem(endStr); //XX why can't I free these? freeMem(chrom); freeMem(startStr); freeMem(db); } } void removeFlank (char *snippet) /* keep only the parts inside <b> to </b> of a string, modifies the string in place */ { char *startPtr = stringIn("<B>", snippet); char *endPtr = stringIn("</B>", snippet); if (startPtr!=0 && endPtr!=0 && startPtr<endPtr) { char *buf = stringBetween("<B>", "</B>", snippet); memcpy(snippet, buf, strlen(buf)+1); freeMem(buf); } } static void printYifSection(char *clickedFileUrl) /* print section with the image on yif and a link to it */ { // parse out yif file Id from yif url to generate link to yif page struct netParsedUrl npu; netParseUrl(clickedFileUrl, &npu); struct hash *params = NULL; struct cgiVar* paramList = NULL; char *paramStr = strchr(npu.file, '?'); cgiParseInput(paramStr, ¶ms, ¶mList); struct cgiVar *var = hashFindVal(params, "file"); char *figId = NULL; if (var!=NULL && var->val!=NULL) figId = var->val; char yifPageUrl[4096]; if (figId) { safef(yifPageUrl, sizeof(yifPageUrl), "http://krauthammerlab.med.yale.edu/imagefinder/Figure.external?sp=S%s%%2F%s", extId, figId); } else return; web2StartSection("section", "<A href=\"%s\">Yale Image Finder</a>: figure where sequences were found", yifPageUrl); web2ImgLink(yifPageUrl, clickedFileUrl, "Image from YIF", 600, 10, 10); web2EndSection(); } static bool printSeqSection(char *articleId, char *title, bool showDesc, struct sqlConnection *conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta, char *pslTable, char *articleTable) /* print a section with a table of sequences, show only sequences with IDs in hash, * There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs * - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs * - other seqs (isClickedSection=False) -> show all other seqs * * */ { // get data from mysql // I support two different schemas: new and old. On old tables, there is no fileUrl yet on the annotations // that means that oldQuery just uses an empty string for the fileUrl field. char *oldQuery = "SELECT fileDesc, snippet, locations, annotId, sequence, \"\" FROM %s WHERE articleId='%s'"; char *newQuery = "SELECT fileDesc, snippet, locations, annotId, sequence, fileUrl FROM %s WHERE articleId='%s'"; char *queryTemplate = oldQuery; if (hHasField("hgFixed", pubsSequenceTable, "fileUrl")) queryTemplate = newQuery; char query[4096]; sqlSafef(query, sizeof(query), queryTemplate, pubsSequenceTable, articleId); printDebug(query); struct sqlResult *sr = sqlGetResult(conn, query); // construct title for section char *otherFormat = NULL; if (fasta) otherFormat = "table"; else otherFormat = "fasta"; char fullTitle[5000]; safef(fullTitle, sizeof(fullTitle), "%s <A HREF=\"../cgi-bin/hgc?%s&o=%s&t=%s&g=%s&i=%s&fasta=%d\"><SMALL>(%s format)</SMALL></A>\n", title, cartSidUrlString(cart), cgiOptionalString("o"), cgiOptionalString("t"), cgiString("g"), cgiString("i"), !fasta, otherFormat); web2StartSection("pubsSection", "%s", fullTitle); // print filtering link at start of table & table headers if (isClickedSection) { printFilterLink(pslTable, articleId, articleTable); } if (!fasta) printSeqHeaders(showDesc, isClickedSection); // output rows char **row; // the URL of the file from the clicked sequences, for YIF char *clickedFileUrl = NULL; bool foundSkippedRows = FALSE; int rowId = 0; while ((row = sqlNextRow(sr)) != NULL) { rowId++; char *fileDesc = row[0]; char *snippet = row[1]; char *locString= row[2]; //char *artId = row[3]; //char *fileId = row[4]; //char *seqId = row[5]; char *annotId = row[3]; char *seq = row[4]; char *fileUrl = row[5]; // annotation (=sequence) ID is a 64 bit int with 10 digits for // article, 3 digits for file, 5 for annotation //char annotId[100]; // some debugging help //safef(annotId, 100, "%10d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId)); //if (pubsDebug) //printf("artId %s, file %s, annot %s -> annotId %s<br>", artId, fileId, seqId, annotId); // only display this sequence if we're in the right section if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) { foundSkippedRows = TRUE; continue; } // if we're in the clicked section and the current sequence is one that matched here // then keep the current URL, as we might need it afterwards else clickedFileUrl = cloneString(fileUrl); // suppress non-matches if the sequences come from YIF as figures can // contain tons of non-matching sequences if (stringIn("yif", articleSource) && isEmpty(locString)) { foundSkippedRows = TRUE; continue; } if (fasta) { if (strlen(extId)!=0) printf("<tt><pre>>%s-%d\n", extId, rowId); else printf("<tt><pre>>seq%d\n", rowId); writeSeqWithBreaks(stdout, seq, strlen(seq), 80); printf("</pre></tt>\n"); } else { web2StartRow(); // column 1: type of file (main or supp) if (showDesc) { char linkStr[4096]; if (isEmpty(fileDesc)) fileDesc = "main text"; safef(linkStr, sizeof(linkStr), "<a href=\"%s\">%s</a>", fileUrl, fileDesc); web2PrintCellS("word-break:break-all", linkStr); } // column 2: snippet web2StartCellS("word-break:break-all"); if (stringIn("yif", articleSource)) removeFlank(snippet); printAddWbr(snippet, 40); web2EndCell(); // optional debug info column if (pubsDebug) //web2PrintCellF("article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId); web2PrintCellF("annotId %s", annotId); // column 3: print links to locations, only print this in the 2nd section if (!isClickedSection && !pubsDebug) { // format: hg19/chr1:300-400,mm9/chr1:60006-23234 // split on "," then split on "/" //locs = charSepToSlNames(locString, ','); web2StartCell(); char *locArr[1024]; int partCount = chopString(locString, ",", locArr, ArraySize(locArr)); if (partCount==0) printf("No matches"); else { struct slName *locs; locs = slNameListFromStringArray(locArr, partCount); slUniqify(&locs, slNameCmp, slNameFree); printGbLinks(locs); printf("<br>"); slFreeList(&locs); } web2EndCell(); } web2EndRow(); } } if (!fasta) web2EndTable(); web2EndSection(); /* Yale Image finder files contain links to the image itself */ if (pubsDebug) printf("%s %s %d", articleSource, clickedFileUrl, isClickedSection); if (stringIn("yif", articleSource) && (clickedFileUrl!=NULL) && isClickedSection) printYifSection(clickedFileUrl); freeMem(clickedFileUrl); sqlFreeResult(&sr); return foundSkippedRows; } static void printSeqInfo(struct sqlConnection *conn, char *trackTable, char *pslTable, char *articleId, char *item, char *seqName, int start, bool fileDesc, bool fasta, char *articleTable) /* print sequences, split into two sections * two sections: one for sequences that were clicked, one for all others*/ { struct hash *clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start); bool skippedRows; if (clickedSeqs) skippedRows = printSeqSection(articleId, "Sequences matching here", \ fileDesc, conn, clickedSeqs, 1, fasta, pslTable, articleTable); else skippedRows=1; if (skippedRows) { // the section title should change if the data comes from the yale image finder = a figure char *docType = "article"; if (stringIn("yif", articleSource)) docType = "figure"; char title[1024]; if (clickedSeqs) safef(title, sizeof(title), "Other Sequences in this %s", docType); // NO clicked seqs can happen if the hgc was called with no o or t parameters // from somewhere outside the browser, like elsevier or europmc else safef(title, sizeof(title), "Sequences in this %s", docType); printSeqSection(articleId, title, \ fileDesc, conn, clickedSeqs, 0, fasta, pslTable, articleTable); } freeHash(&clickedSeqs); } static void printTrackVersion(struct trackDb *tdb, struct sqlConnection *conn, char *item) { char versionString[256]; char dateReference[256]; char headerTitle[512]; /* see if hgFixed.trackVersion exists */ boolean trackVersionExists = hTableExists("hgFixed", "trackVersion"); if (trackVersionExists) { char query[256]; sqlSafef(query, sizeof(query), \ "SELECT version,dateReference FROM hgFixed.trackVersion " "WHERE db = '%s' AND name = 'pubs' ORDER BY updateTime DESC limit 1", database); struct sqlResult *sr = sqlGetResult(conn, query); char **row; /* in case of NULL result from the table */ versionString[0] = 0; while ((row = sqlNextRow(sr)) != NULL) { safef(versionString, sizeof(versionString), "version %s", row[0]); safef(dateReference, sizeof(dateReference), "%s", row[1]); } sqlFreeResult(&sr); } else { versionString[0] = 0; dateReference[0] = 0; } if (versionString[0]) safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString); else safef(headerTitle, sizeof(headerTitle), "%s", item); genericHeader(tdb, headerTitle); } static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id) /* copied from otherOrgs.c */ { char query[512]; struct sqlResult *sr; char **row; bioSeq *seq = NULL; sqlSafef(query, sizeof(query), "select sequence from %s where annotId = '%s'", table, id); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { AllocVar(seq); seq->name = cloneString(id); seq->dna = cloneString(row[0]); seq->size = strlen(seq->dna); } sqlFreeResult(&sr); return seq; } void pubsAli(struct sqlConnection *conn, char *pslTable, char *seqTable, char *item) /* this is just a ripoff from htcCdnaAli, similar to markd's transMapAli */ { bioSeq *oSeq = NULL; char title[1024]; safef(title, sizeof title, "Literature Sequence vs Genomic"); htmlFramesetStart(title); struct psl *psl = getAlignments(conn, pslTable, item); if (psl == NULL) errAbort("Couldn't find alignment at %s:%s", pslTable, item); oSeq = getSeq(conn, seqTable, item); if (oSeq == NULL) errAbort("%s is in pslTable but not in sequence table. Internal error.", item); enum gfType qt; if (psl->qSize!=oSeq->size) { qt = gftProt; // trying to correct pslMap's changes to qSize/qStarts and blockSizes psl->strand[1]=psl->strand[0]; psl->strand[0]='+'; psl->strand[2]=0; psl->qSize = psl->qSize/3; psl->match = psl->match/3; // Take care of codons that go over block boundaries: // Convert a block with blockSizes=58,32 and qStarts=0,58, // to blockSizes=19,11 and qStarts=0,19 int i; int remaind = 0; for (i=0; i<psl->blockCount; i++) { psl->qStarts[i] = psl->qStarts[i]/3; int bs = psl->blockSizes[i]; remaind += (bs % 3); if (remaind>=3) { bs += 1; remaind -= 3; } psl->blockSizes[i] = bs/3; } } else qt = gftDna; showSomeAlignment(psl, oSeq, qt, 0, oSeq->size, NULL, 0, 0); } void doPubsDetails(struct trackDb *tdb, char *item) /* publications custom display */ { int start = cgiOptionalInt("o", -1); int end = cgiOptionalInt("t", -1); char *trackTable = cgiString("g"); char *aliTable = cgiOptionalString("aliTable"); int fasta = cgiOptionalInt("fasta", 0); pubsDebug = cgiOptionalInt("debug", 0); struct sqlConnection *conn = hAllocConn(database); char *articleTable = trackDbRequiredSetting(tdb, "pubsArticleTable"); if (stringIn("Psl", trackTable)) { if (aliTable!=NULL) { pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable"); pubsAli(conn, trackTable, pubsSequenceTable, item); return; } else { genericHeader(tdb, item); struct psl *psl = getAlignments(conn, trackTable, item); printf("<H3>Genomic Alignment with sequence found in publication fulltext</H3>"); printAlignmentsSimple(psl, start, trackTable, trackTable, item); } } else { printTrackVersion(tdb, conn, item); if (stringIn("Marker", trackTable)) { char *markerTable = trackDbRequiredSetting(tdb, "pubsMarkerTable"); if (start!=-1) printPositionAndSize(start, end, 0); printMarkerSnippets(conn, articleTable, markerTable, item); } else { pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable"); char *articleId = printArticleInfo(conn, item, articleTable); if (articleId!=NULL) { char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack"); printSeqInfo(conn, trackTable, pslTable, articleId, item, \ seqName, start, pubsHasSupp, fasta, articleTable); } } } printTrackHtml(tdb); hFreeConn(&conn); }