e359dec8e2174a7d00c0d37153e59c81756208a9 max Mon Feb 27 11:07:32 2012 -0800 lowered maximum for t2g article snippet view diff --git src/hg/hgc/t2g.c src/hg/hgc/t2g.c index 129d548..8e596d3 100644 --- src/hg/hgc/t2g.c +++ src/hg/hgc/t2g.c @@ -1,488 +1,488 @@ /* t2g.c - display details of text2genome literature track (t2gxxx tables) */ #include "common.h" #include "jksql.h" #include "hdb.h" #include "hgc.h" #include "hgColors.h" #include "trackDb.h" #include "web.h" #include "hash.h" #include "obscure.h" //include "hgTrackUi.h" // cgi var to activate debug output static int t2gDebug = 0; // internal section types in mysql table static char* t2gSecNames[] ={ "header", "abstract", "intro", "methods", "results", "discussion", "conclusions", "ack", "refs", "unknown" }; // // whether a checkbox is checked by default, have to correspond to t2gSecNames static int t2gSecChecked[] ={ 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 }; static char* t2gSequenceTable; static char* t2gArticleTable; static char* makeSqlMarkerList(void) /* return list of sections from cgi vars, format like "'abstract','header'" */ { int secCount = sizeof(t2gSecNames)/sizeof(char *); struct slName* names = NULL; int i; for (i=0; i<secCount; i++) { // add ' around name and add to list char* secName = t2gSecNames[i]; if (cgiOptionalInt(secName, t2gSecChecked[i])) { char nameBuf[100]; safef(nameBuf, sizeof(nameBuf), "'%s'", secName); slAddHead(&names, slNameNew(nameBuf)); } } if (names==0) errAbort("You need to specify at least one article section."); char* nameListString = slNameListToString(names, ','); slNameFree(names); return nameListString; } static struct sqlResult* queryMarkerRows(struct sqlConnection* conn, char* markerTable, char* articleTable, char* item, int itemLimit, char* sectionList) /* query marker rows from mysql, based on http parameters */ { char query[4000]; /* Mysql specific setting to make the group_concat function return longer strings */ sqlUpdate(conn, "SET SESSION group_concat_max_len = 40000"); safef(query, sizeof(query), "SELECT distinct %s.articleId, url, title, authors, citation," "group_concat(snippet, section SEPARATOR ' (...) ') FROM %s " "JOIN %s USING (articleId) " "WHERE markerId='%s' AND section in (%s) " "GROUP by articleId LIMIT %d;", markerTable, markerTable, articleTable, item, sectionList, itemLimit); if (t2gDebug) printf("%s", query); struct sqlResult *sr = sqlGetResult(conn, query); return sr; } static void printSectionCheckboxes() /* show a little form with checkboxes where user can select sections they want to show */ { // labels to show to user, have to correspond to t2gSecNames char *secLabels[] ={ "Title", "Abstract", "Introduction", "Methods", "Results", "Discussion", "Conclusions", "Acknowledgements", "References", "Not determined" }; int labelCount = sizeof(secLabels)/sizeof(char *); int i; printf("<P>\n"); printf("<B>Sections of article shown:</B><BR>\n"); printf("<FORM ACTION=\"hgc?%s&o=%s&t=%s&g=%s&i=%s\" METHOD=\"get\">\n", cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i")); for (i=0; i<labelCount; i++) { char* name = t2gSecNames[i]; // checkboxes default to 0 unless checked, see // http://stackoverflow.com/questions/2520952/how-come-checkbox-state-is-not-always-passed-along-to-php-script printf("<INPUT TYPE=\"hidden\" name=\"%s\" value=\"0\" />\n", t2gSecNames[i]); printf("<INPUT TYPE=\"checkbox\" name=\"%s\" ", name); int isChecked = cgiOptionalInt(name, t2gSecChecked[i]); if (isChecked) printf("value=\"1\" checked=\"yes\">%s</INPUT>\n", secLabels[i]); else printf("value=\"1\">%s</INPUT>\n", secLabels[i]); } printf("<INPUT TYPE=\"hidden\" name=\"o\" value=\"%s\" />\n", cgiString("o")); printf("<INPUT TYPE=\"hidden\" name=\"g\" value=\"%s\" />\n", cgiString("g")); printf("<INPUT TYPE=\"hidden\" name=\"t\" value=\"%s\" />\n", cgiString("t")); printf("<INPUT TYPE=\"hidden\" name=\"i\" value=\"%s\" />\n", cgiString("i")); printf("<INPUT TYPE=\"hidden\" name=\"hgsid\" value=\"%s\" />\n", cgiString("hgsid")); printf("<BR>"); printf("<INPUT TYPE=\"submit\" VALUE=\"Submit\" />\n"); printf("</FORM><P>\n"); } static void printLimitWarning(struct sqlConnection *conn, char* markerTable, char* item, int itemLimit, char* sectionList) { char query[4000]; safef(query, sizeof(query), "SELECT COUNT(*) from %s WHERE markerId='%s' AND section in (%s) ", markerTable, item, sectionList); if (sqlNeedQuickNum(conn, query) > itemLimit) { printf("<b>This marker is mentioned more than %d times</b><BR>\n", itemLimit); printf("The results would take too long to load in your browser and are " "therefore limited to %d articles.<P>\n", itemLimit); } } static void printMarkerSnippets(struct sqlConnection *conn, char* articleTable, char* markerTable, char* item) { /* do not show more snippets than this limit */ -int itemLimit=4000; +int itemLimit=1000; printSectionCheckboxes(); char* sectionList = makeSqlMarkerList(); printLimitWarning(conn, markerTable, item, itemLimit, sectionList); printf("<H3>Snippets from Publications:</H3>"); struct sqlResult* sr = queryMarkerRows(conn, markerTable, articleTable, item, itemLimit, sectionList); char **row; while ((row = sqlNextRow(sr)) != NULL) { char* articleId = row[0]; char* url = row[1]; char* title = row[2]; char* authors = row[3]; char* citation = row[4]; char* snippets = row[5]; printf("<A HREF=\"%s\">%s</A> ", url, title); printf("<SMALL>%s</SMALL>; ", authors); printf("<SMALL>%s</SMALL><BR>", citation); if (t2gDebug) printf("articleId=%s", articleId); printf("<I>%s</I><P>", snippets); printf("<HR>"); } freeMem(sectionList); } static char* printArticleInfo(struct sqlConnection *conn, char* item) /* Header with information about paper, return documentId */ { char query[512]; safef(query, sizeof(query), "SELECT articleId, url, title, authors, citation, abstract FROM %s WHERE displayId='%s'", t2gArticleTable, item); struct sqlResult *sr = sqlGetResult(conn, query); char **row; char *docId=0; if ((row = sqlNextRow(sr)) != NULL) { char* abstract = row[5]; if (strlen(abstract)==0) { abstract = "(No abstract found for this article. Please use the link to the fulltext above.)"; } docId = cloneString(row[0]); printf("<P>%s</P>\n", row[3]); printf("<A TARGET=\"_blank\" HREF=\"%s\"><B>%s</B></A>\n", row[1], row[2]); printf("<P style=\"width:800px; font-size:80%%\">%s</P>\n", row[4]); printf("<P style=\"width:800px; font-size:100%%\">%s</P>\n", abstract); } sqlFreeResult(&sr); return docId; } static struct hash* getSeqIdHash(struct sqlConnection* conn, char* trackTable, char* docId, char *item, char* seqName, int start) { char query[512]; /* check first if the column exists (some debugging tables on hgwdev don't have seqIds) */ safef(query, sizeof(query), "SHOW COLUMNS FROM %s LIKE 'seqIds';", trackTable); char* seqIdPresent = sqlQuickString(conn, query); if (!seqIdPresent) { return NULL; } /* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash */ safef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' " "and chrom='%s' and chromStart=%d", trackTable, item, seqName, start); if (t2gDebug) puts(query); char* seqIdCoordString = sqlQuickString(conn, query); char* seqIdCoords[1024]; int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords)); int i; struct hash *seqIdHash = NULL; seqIdHash = newHash(0); for (i=0; i<partCount; i++) { char* seqId[1024]; chopString(seqIdCoords[i], ":", seqId, ArraySize(seqId)); if (t2gDebug) printf("%s, %s<br>", seqId[0], seqId[1]); hashAdd(seqIdHash, seqId[0], seqId[1]); } freeMem(seqIdCoordString); return seqIdHash; } static void printSeqHeaders(bool showDesc, bool isClickedSection) { printf("<TABLE style=\"background-color: #%s\" WIDTH=\"100%%\" CELLPADDING=\"2\">\n", HG_COL_BORDER); printf("<TR style=\"background-color: #%s; color: #FFFFFF\">\n", HG_COL_TABLE_LABEL); if (showDesc) puts(" <TH style=\"width: 10%\">Article file</TH>\n"); puts(" <TH style=\"width: 70%\">One table row per sequence, with flanking text, sequence in bold</TH>\n"); if (t2gDebug) puts(" <TH style=\"width: 30%\">Identifiers</TH>\n"); if (!isClickedSection && !t2gDebug) puts(" <TH style=\"width: 20%\">Feature that includes this match</TH>\n"); puts("</TR>\n"); } static void printAddWbr(char* text, int distance) /* a crazy hack for firefox/mozilla that is unable to break long words in tables * We need to add a <wbr> tag every x characters in the text to make text breakable. */ { int i; i = 0; char* c; c = text; while (*c != 0){ { if (i % distance == 0) printf("<wbr>"); printf("%c", *c); c++; i++; } } } static bool printSeqSection(char* docId, char* title, bool showDesc, struct sqlConnection* conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta) /* print a table of sequences, show only sequences with IDs in hash, * There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs * - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs * - other seqs (isClickedSection=False) -> show all other seqs * * */ { // get data from mysql char query[4096]; safef(query, sizeof(query), "SELECT fileDesc, snippet, locations, articleId,fileId, seqId, sequence FROM %s WHERE articleId='%s';", t2gSequenceTable, docId); if (t2gDebug) puts(query); struct sqlResult *sr = sqlGetResult(conn, query); // construct title for section char fullTitle[5000]; safef(fullTitle, sizeof(fullTitle), "%s <A HREF=\"../cgi-bin/hgc?%s&o=%s&t=%s&g=%s&i=%s&fasta=%d\"><SMALL>(switch fasta format)</SMALL></A>", title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"), !fasta); webNewSection(fullTitle); if (!fasta) printSeqHeaders(showDesc, isClickedSection); char **row; bool foundSkippedRows = FALSE; while ((row = sqlNextRow(sr)) != NULL) { char* fileDesc = row[0]; char* snippet = row[1]; char* locList = row[2]; char* artId = row[3]; char* fileId = row[4]; char* seqId = row[5]; char* seq = row[6]; // annotation (=sequence) ID is a 64 bit int with 10 digits for // article, 3 digits for file, 5 for annotation char annotId[100]; safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId)); // only display this sequence if we're in the right section if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) { foundSkippedRows = TRUE; continue; } if (fasta) { printf("<TT>>%s<BR>%s<BR></TT>", annotId, seq); } else { printf("<TR style=\"background-color: #%s\">\n", HG_COL_LOCAL_TABLE); if (showDesc) printf("<TD style=\"word-break:break-all\">%s\n", fileDesc); //printf("<TD><I>%s</I></TD>\n", snippet); printf("<TD style=\"word-break:break-all;\"><I>"); printAddWbr(snippet, 40); printf("</I></TD>\n"); if (t2gDebug) { printf("<TD>article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId); } // print links to locations if (!isClickedSection && !t2gDebug) { struct slName *locs; // format: hg19/chr1:300-400,mm9/chr1:60006-23234 // split on "," then split on "/" locs = charSepToSlNames(locList, ','); printf("<TD>"); if (locs==NULL) printf("No matches"); for ( ; locs!=NULL; locs = locs->next) { char* locString = locs->name; char* parts[2]; int partCount; partCount = chopString(locString, "/", parts, ArraySize(parts)); assert(partCount==2); char* db = parts[0]; char* pos = parts[1]; printf("<A HREF=\"../cgi-bin/hgTracks?%s&db=%s&position=%s\">%s (%s)</A>", cartSidUrlString(cart), db, pos, pos, db); printf("<BR>"); } printf("</TD>\n"); } printf("</TR>\n"); } } printf("</TR>\n"); webEndSectionTables(); sqlFreeResult(&sr); return foundSkippedRows; } static void printSeqInfo(struct sqlConnection* conn, char* trackTable, char* docId, char* item, char* seqName, int start, bool fileDesc, bool fasta) /* print sequences, split into two sections * two sections: one for sequences that were clicked, one for all others*/ { struct hash* clickedSeqs = getSeqIdHash(conn, trackTable, docId, item, seqName, start); bool skippedRows; if (clickedSeqs) skippedRows = printSeqSection(docId, "Sequences used to construct this feature", fileDesc, conn, clickedSeqs, 1, fasta); else skippedRows=1; if (skippedRows) printSeqSection(docId, "Other Sequences in this article", fileDesc, conn, clickedSeqs, 0, fasta); //else //printf("<P>No more sequences<P>"); if (endsWith(trackTable, "Elsevier")) printf("<P><SMALL>Article information and excerpts are Copyright 2011 Elsevier B.V. All rights reserved.</SMALL><P>"); freeHash(&clickedSeqs); } static void printTrackVersion(struct trackDb *tdb, struct sqlConnection* conn, char* item) { char versionString[256]; char dateReference[256]; char headerTitle[512]; /* see if hgFixed.trackVersion exists */ boolean trackVersionExists = hTableExists("hgFixed", "trackVersion"); if (trackVersionExists) { char query[256]; safef(query, sizeof(query), "select version,dateReference from hgFixed.trackVersion where db = '%s' AND name = 't2g' order by updateTime DESC limit 1", database); struct sqlResult *sr = sqlGetResult(conn, query); char **row; /* in case of NULL result from the table */ versionString[0] = 0; while ((row = sqlNextRow(sr)) != NULL) { safef(versionString, sizeof(versionString), "version %s", row[0]); safef(dateReference, sizeof(dateReference), "%s", row[1]); } sqlFreeResult(&sr); } else { versionString[0] = 0; dateReference[0] = 0; } if (versionString[0]) safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString); else safef(headerTitle, sizeof(headerTitle), "%s", item); genericHeader(tdb, headerTitle); } static void printPositionAndSize(int start, int end, bool showSize) { printf("<B>Position:</B> " "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">", hgTracksPathAndSettings(), database, seqName, start+1, end); char startBuf[64], endBuf[64]; sprintLongWithCommas(startBuf, start + 1); sprintLongWithCommas(endBuf, end); printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf); long size = end - start; sprintLongWithCommas(startBuf, size); if (showSize) printf("<B>Genomic Size:</B> %s<BR>\n", startBuf); } void doT2gDetails(struct trackDb *tdb, char *item) /* text2genome.org custom display */ { int start = cgiInt("o"); int end = cgiInt("t"); char* trackTable = cgiString("g"); int fasta = cgiOptionalInt("fasta", 0); t2gDebug = cgiOptionalInt("debug", 0); struct sqlConnection *conn = hAllocConn(database); printTrackVersion(tdb, conn, item); if (startsWith("t2gMarker", trackTable)) { char* markerTable = hashMustFindVal(tdb->settingsHash, "t2gMarkerTable"); char* articleTable = hashMustFindVal(tdb->settingsHash, "t2gArticleTable"); printPositionAndSize(start, end, 0); printMarkerSnippets(conn, articleTable, markerTable, item); } else { printPositionAndSize(start, end, 1); t2gSequenceTable = hashMustFindVal(tdb->settingsHash, "t2gSequenceTable"); t2gArticleTable = hashMustFindVal(tdb->settingsHash, "t2gArticleTable"); char* docId = printArticleInfo(conn, item); if (docId!=NULL) { bool showDesc; showDesc = (! endsWith(trackTable, "Elsevier")); // avoid clutter: Elsevier has only main text printSeqInfo(conn, trackTable, docId, item, seqName, start, showDesc, fasta); } } printTrackHtml(tdb); hFreeConn(&conn); }