7f33fb42a1fb58a02a3f798e56332e5beac65133 max Sat Mar 17 13:21:09 2012 -0700 renaming all publications tracks tables to pubsXXXX diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c new file mode 100644 index 0000000..c731e05 --- /dev/null +++ src/hg/hgc/pubs.c @@ -0,0 +1,509 @@ +/* pubs.c - display details of publiations literature track (pubsxxx tables) */ + +#include "common.h" +#include "jksql.h" +#include "hdb.h" +#include "hgc.h" +#include "hgColors.h" +#include "trackDb.h" +#include "web.h" +#include "hash.h" +#include "obscure.h" +//include "hgTrackUi.h" + +// cgi var to activate debug output +static int pubsDebug = 0; + +// internal section types in mysql table +static char* pubsSecNames[] ={ + "header", "abstract", + "intro", "methods", + "results", "discussion", + "conclusions", "ack", + "refs", "unknown" }; +// +// whether a checkbox is checked by default, have to correspond to pubsSecNames +static int pubsSecChecked[] ={ + 1, 1, + 1, 1, + 1, 1, + 1, 0, + 0, 1 }; + +static char* pubsSequenceTable; +static char* pubsArticleTable; + +static char* makeSqlMarkerList(void) +/* return list of sections from cgi vars, format like "'abstract','header'" */ +{ +int secCount = sizeof(pubsSecNames)/sizeof(char *); +struct slName* names = NULL; +int i; +for (i=0; i<secCount; i++) +{ + // add ' around name and add to list + char* secName = pubsSecNames[i]; + if (cgiOptionalInt(secName, pubsSecChecked[i])) + { + char nameBuf[100]; + safef(nameBuf, sizeof(nameBuf), "'%s'", secName); + slAddHead(&names, slNameNew(nameBuf)); + } +} + +if (names==0) + errAbort("You need to specify at least one article section."); + +char* nameListString = slNameListToString(names, ','); +slNameFree(names); +return nameListString; +} + + +static struct sqlResult* queryMarkerRows(struct sqlConnection* conn, char* markerTable, \ + char* articleTable, char* item, int itemLimit, char* sectionList) +/* query marker rows from mysql, based on http parameters */ +{ +char query[4000]; +/* Mysql specific setting to make the group_concat function return longer strings */ +sqlUpdate(conn, "SET SESSION group_concat_max_len = 100000"); + +safef(query, sizeof(query), "SELECT distinct %s.articleId, url, title, authors, citation," + "group_concat(snippet, section SEPARATOR ' (...) ') FROM %s " + "JOIN %s USING (articleId) " + "WHERE markerId='%s' AND section in (%s) " + "GROUP by articleId " + "ORDER BY year DESC " + "LIMIT %d", + markerTable, markerTable, articleTable, item, sectionList, itemLimit); + +if (pubsDebug) + printf("%s", query); + +struct sqlResult *sr = sqlGetResult(conn, query); + +return sr; +} + + +static void printSectionCheckboxes() +/* show a little form with checkboxes where user can select sections they want to show */ +{ +// labels to show to user, have to correspond to pubsSecNames +char *secLabels[] ={ + "Title", "Abstract", + "Introduction", "Methods", + "Results", "Discussion", + "Conclusions", "Acknowledgements", + "References", "Not determined" }; + +int labelCount = sizeof(secLabels)/sizeof(char *); + +int i; +printf("<P>\n"); +printf("<B>Sections of article shown:</B><BR>\n"); +printf("<FORM ACTION=\"hgc?%s&o=%s&t=%s&g=%s&i=%s\" METHOD=\"get\">\n", + cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i")); + +for (i=0; i<labelCount; i++) +{ + char* name = pubsSecNames[i]; + // checkboxes default to 0 unless checked, see + // http://stackoverflow.com/questions/2520952/how-come-checkbox-state-is-not-always-passed-along-to-php-script + printf("<INPUT TYPE=\"hidden\" name=\"%s\" value=\"0\" />\n", pubsSecNames[i]); + printf("<INPUT TYPE=\"checkbox\" name=\"%s\" ", name); + + int isChecked = cgiOptionalInt(name, pubsSecChecked[i]); + if (isChecked) + printf("value=\"1\" checked=\"yes\">%s</INPUT>\n", secLabels[i]); + else + printf("value=\"1\">%s</INPUT>\n", secLabels[i]); +} + +printf("<INPUT TYPE=\"hidden\" name=\"o\" value=\"%s\" />\n", cgiString("o")); +printf("<INPUT TYPE=\"hidden\" name=\"g\" value=\"%s\" />\n", cgiString("g")); +printf("<INPUT TYPE=\"hidden\" name=\"t\" value=\"%s\" />\n", cgiString("t")); +printf("<INPUT TYPE=\"hidden\" name=\"i\" value=\"%s\" />\n", cgiString("i")); +printf("<INPUT TYPE=\"hidden\" name=\"hgsid\" value=\"%d\" />\n", cart->sessionId); +printf("<BR>"); +printf("<INPUT TYPE=\"submit\" VALUE=\"Submit\" />\n"); +printf("</FORM><P>\n"); +} + +static void printLimitWarning(struct sqlConnection *conn, char* markerTable, + char* item, int itemLimit, char* sectionList) +{ +char query[4000]; +safef(query, sizeof(query), "SELECT COUNT(*) from %s WHERE markerId='%s' AND section in (%s) ", markerTable, item, sectionList); +if (sqlNeedQuickNum(conn, query) > itemLimit) +{ + printf("<b>This marker is mentioned more than %d times</b><BR>\n", itemLimit); + printf("The results would take too long to load in your browser and are " + "therefore limited to %d articles.<P>\n", itemLimit); +} +} + +static void printMarkerSnippets(struct sqlConnection *conn, char* articleTable, char* markerTable, char* item) +{ + +/* do not show more snippets than this limit */ +int itemLimit=1000; + +printSectionCheckboxes(); +char* sectionList = makeSqlMarkerList(); +printLimitWarning(conn, markerTable, item, itemLimit, sectionList); + +printf("<H3>Snippets from Publications:</H3>"); +struct sqlResult* sr = queryMarkerRows(conn, markerTable, articleTable, item, itemLimit, sectionList); + +char **row; +while ((row = sqlNextRow(sr)) != NULL) +{ + char* articleId = row[0]; + char* url = row[1]; + char* title = row[2]; + char* authors = row[3]; + char* citation = row[4]; + char* snippets = row[5]; + char* addParam = ""; + if (strstrNoCase(url, "sciencedirect.com")) + addParam = "?svAppaddApp=298535"; // add the "UCSC matches" sciverse application to article view + printf("<A HREF=\"%s%s\">%s</A> ", url, addParam, title); + printf("<SMALL>%s</SMALL>; ", authors); + printf("<SMALL>%s</SMALL><BR>", citation); + if (pubsDebug) + printf("articleId=%s", articleId); + printf("<I>%s</I><P>", snippets); + printf("<HR>"); +} + +freeMem(sectionList); +sqlFreeResult(&sr); +} + +static char* printArticleInfo(struct sqlConnection *conn, char* item) +/* Header with information about paper, return documentId */ +{ + char query[512]; + + safef(query, sizeof(query), "SELECT articleId, url, title, authors, citation, abstract FROM %s WHERE displayId='%s'", pubsArticleTable, item); + + struct sqlResult *sr = sqlGetResult(conn, query); + char **row; + char *articleId=NULL; + if ((row = sqlNextRow(sr)) != NULL) + { + char* abstract = row[5]; + if (strlen(abstract)==0) + { + abstract = "(No abstract found for this article. Please use the link to the fulltext above.)"; + } + articleId = cloneString(row[0]); + printf("<P>%s</P>\n", row[3]); + printf("<A TARGET=\"_blank\" HREF=\"%s\"><B>%s</B></A>\n", row[1], row[2]); + printf("<P style=\"width:800px; font-size:80%%\">%s</P>\n", row[4]); + printf("<P style=\"width:800px; font-size:100%%\">%s</P>\n", abstract); + } + sqlFreeResult(&sr); + return articleId; +} + +static struct hash* getSeqIdHash(struct sqlConnection* conn, char* trackTable, \ + char* articleId, char *item, char* seqName, int start) +/* return a hash with the sequence IDs for a given chain of BLAT matches */ +{ + char query[512]; + /* check first if the column exists (some debugging tables on hgwdev don't have seqIds) */ + safef(query, sizeof(query), "SHOW COLUMNS FROM %s LIKE 'seqIds';", trackTable); + char* seqIdPresent = sqlQuickString(conn, query); + if (!seqIdPresent) { + return NULL; + } + + /* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash*/ + safef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' " + "and chrom='%s' and chromStart=%d", trackTable, item, seqName, start); + if (pubsDebug) + puts(query); + + // split comma-sep list into parts + char* seqIdCoordString = sqlQuickString(conn, query); + char* seqIdCoords[1024]; + int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords)); + int i; + + // format of part <fileId:3><annotId:5>:<start>-<end>, e.g. 12312345:0-23 + struct hash *seqIdHash = NULL; + seqIdHash = newHash(0); + for (i=0; i<partCount; i++) + { + char* annotIdHitCoord[1024]; + chopString(seqIdCoords[i], ":", annotIdHitCoord, ArraySize(annotIdHitCoord)); + if (pubsDebug) + printf("%s, %s<br>", annotIdHitCoord[0], annotIdHitCoord[1]); + + // put articleId and annotId together to create annotation(=sequence) Id + char fullAnnotId[4096]; + safef(fullAnnotId, sizeof(fullAnnotId), "%s%s", articleId, annotIdHitCoord[0]); + hashAdd(seqIdHash, fullAnnotId, annotIdHitCoord[1]); + } + freeMem(seqIdCoordString); + return seqIdHash; +} + +static void printSeqHeaders(bool showDesc, bool isClickedSection) +{ + printf("<TABLE style=\"background-color: #%s\" WIDTH=\"100%%\" CELLPADDING=\"2\">\n", HG_COL_BORDER); + printf("<TR style=\"background-color: #%s; color: #FFFFFF\">\n", HG_COL_TABLE_LABEL); + if (showDesc) + puts(" <TH style=\"width: 10%\">Article file</TH>\n"); + puts(" <TH style=\"width: 70%\">One table row per sequence, with flanking text, sequence in bold</TH>\n"); + if (pubsDebug) + puts(" <TH style=\"width: 30%\">Identifiers</TH>\n"); + + if (!isClickedSection && !pubsDebug) + puts(" <TH style=\"width: 20%\">Feature that includes this match</TH>\n"); + puts("</TR>\n"); +} + +static void printAddWbr(char* text, int distance) +/* a crazy hack for firefox/mozilla that is unable to break long words in tables + * We need to add a <wbr> tag every x characters in the text to make text breakable. + */ +{ +int i; +i = 0; +char* c; +c = text; +while (*c != 0){ + { + if (i % distance == 0) + printf("<wbr>"); + printf("%c", *c); + c++; + i++; + } +} +} + +static bool printSeqSection(char* articleId, char* title, bool showDesc, struct sqlConnection* conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta) +/* print a table of sequences, show only sequences with IDs in hash, + * There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs + * - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs + * - other seqs (isClickedSection=False) -> show all other seqs + * + * */ +{ + // get data from mysql + char query[4096]; + safef(query, sizeof(query), + "SELECT fileDesc, snippet, locations, articleId, fileId, seqId, sequence " + "FROM %s WHERE articleId='%s';", pubsSequenceTable, articleId); + if (pubsDebug) + puts(query); + struct sqlResult *sr = sqlGetResult(conn, query); + + // construct title for section + char fullTitle[5000]; + safef(fullTitle, sizeof(fullTitle), + "%s <A HREF=\"../cgi-bin/hgc?%s&o=%s&t=%s&g=%s&i=%s&fasta=%d\"><SMALL>(switch fasta format)</SMALL></A>", + title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"), + !fasta); + + webNewSection(fullTitle); + + if (!fasta) + printSeqHeaders(showDesc, isClickedSection); + + char **row; + bool foundSkippedRows = FALSE; + while ((row = sqlNextRow(sr)) != NULL) + { + char* fileDesc = row[0]; + char* snippet = row[1]; + char* locList = row[2]; + char* artId = row[3]; + char* fileId = row[4]; + char* seqId = row[5]; + char* seq = row[6]; + + // annotation (=sequence) ID is a 64 bit int with 10 digits for + // article, 3 digits for file, 5 for annotation + char annotId[100]; + safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId)); + + // only display this sequence if we're in the right section + if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) { + foundSkippedRows = TRUE; + continue; + } + + if (fasta) + { + printf("<TT>>%s<BR>%s<BR></TT>", annotId, seq); + } + else + { + printf("<TR style=\"background-color: #%s\">\n", HG_COL_LOCAL_TABLE); + if (showDesc) + printf("<TD style=\"word-break:break-all\">%s\n", fileDesc); + //printf("<TD><I>%s</I></TD>\n", snippet); + printf("<TD style=\"word-break:break-all;\"><I>"); + printAddWbr(snippet, 40); + printf("</I></TD>\n"); + if (pubsDebug) + { + printf("<TD>article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId); + } + + // print links to locations + if (!isClickedSection && !pubsDebug) { + struct slName *locs; + // format: hg19/chr1:300-400,mm9/chr1:60006-23234 + // split on "," then split on "/" + locs = charSepToSlNames(locList, ','); + printf("<TD>"); + if (locs==NULL) + printf("No matches"); + for ( ; locs!=NULL; locs = locs->next) + { + char* locString = locs->name; + char* parts[2]; + int partCount; + partCount = chopString(locString, "/", parts, ArraySize(parts)); + assert(partCount==2); + char* db = parts[0]; + char* pos = parts[1]; + printf("<A HREF=\"../cgi-bin/hgTracks?%s&db=%s&position=%s\">%s (%s)</A>", cartSidUrlString(cart), db, pos, pos, db); + printf("<BR>"); + } + printf("</TD>\n"); + } + printf("</TR>\n"); + } + } + printf("</TR>\n"); + webEndSectionTables(); + sqlFreeResult(&sr); + return foundSkippedRows; +} + +static void printSeqInfo(struct sqlConnection* conn, char* trackTable, + char* articleId, char* item, char* seqName, int start, bool fileDesc, bool fasta) + /* print sequences, split into two sections + * two sections: one for sequences that were clicked, one for all others*/ +{ + struct hash* clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start); + + bool skippedRows; + if (clickedSeqs) + skippedRows = printSeqSection(articleId, "Sequences used to construct this feature", \ + fileDesc, conn, clickedSeqs, 1, fasta); + else + skippedRows=1; + + if (skippedRows) + printSeqSection(articleId, "Other Sequences in this article", \ + fileDesc, conn, clickedSeqs, 0, fasta); + printf("<P><SMALL>Copyright 2011 Elsevier B.V. All rights reserved.</SMALL><P>"); + freeHash(&clickedSeqs); + +} + +static void printTrackVersion(struct trackDb *tdb, struct sqlConnection* conn, char* item) +{ + char versionString[256]; + char dateReference[256]; + char headerTitle[512]; + /* see if hgFixed.trackVersion exists */ + boolean trackVersionExists = hTableExists("hgFixed", "trackVersion"); + + if (trackVersionExists) + { + char query[256]; + safef(query, sizeof(query), \ + "SELECT version,dateReference FROM hgFixed.trackVersion " + "WHERE db = '%s' AND name = 'pubs' ORDER BY updateTime DESC limit 1", database); + struct sqlResult *sr = sqlGetResult(conn, query); + char **row; + + /* in case of NULL result from the table */ + versionString[0] = 0; + while ((row = sqlNextRow(sr)) != NULL) + { + safef(versionString, sizeof(versionString), "version %s", + row[0]); + safef(dateReference, sizeof(dateReference), "%s", + row[1]); + } + sqlFreeResult(&sr); + } + else + { + versionString[0] = 0; + dateReference[0] = 0; + } + + if (versionString[0]) + safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString); + else + safef(headerTitle, sizeof(headerTitle), "%s", item); + + genericHeader(tdb, headerTitle); +} + +static void printPositionAndSize(int start, int end, bool showSize) +{ + printf("<B>Position:</B> " + "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">", + hgTracksPathAndSettings(), database, seqName, start+1, end); + char startBuf[64], endBuf[64]; + sprintLongWithCommas(startBuf, start + 1); + sprintLongWithCommas(endBuf, end); + printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf); + long size = end - start; + sprintLongWithCommas(startBuf, size); + if (showSize) + printf("<B>Genomic Size:</B> %s<BR>\n", startBuf); +} + +void doPubsDetails(struct trackDb *tdb, char *item) +/* publications custom display */ +{ + +int start = cgiInt("o"); +int end = cgiInt("t"); +char* trackTable = cgiString("g"); +int fasta = cgiOptionalInt("fasta", 0); + +pubsDebug = cgiOptionalInt("debug", 0); + +struct sqlConnection *conn = hAllocConn(database); +printTrackVersion(tdb, conn, item); + +if (startsWith("pubsMarker", trackTable)) +{ + char* markerTable = hashMustFindVal(tdb->settingsHash, "pubsMarkerTable"); + char* articleTable = hashMustFindVal(tdb->settingsHash, "pubsArticleTable"); + printPositionAndSize(start, end, 0); + printMarkerSnippets(conn, articleTable, markerTable, item); +} +else +{ + printPositionAndSize(start, end, 1); + pubsSequenceTable = hashMustFindVal(tdb->settingsHash, "pubsSequenceTable"); + pubsArticleTable = hashMustFindVal(tdb->settingsHash, "pubsArticleTable"); + + char* articleId = printArticleInfo(conn, item); + if (articleId!=NULL) + { + bool showDesc; + showDesc = (! endsWith(trackTable, "Elsevier")); + // avoid clutter: Elsevier has only main text + printSeqInfo(conn, trackTable, articleId, item, seqName, start, showDesc, fasta); + } +} + +printTrackHtml(tdb); +hFreeConn(&conn); +}