847161d7b92ce779e0d8caee3efc1e4711749748 max Mon Mar 11 12:48:00 2013 -0700 many changes after review by krauthammer lab to adapt display to yif OCR images diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c index 267ff80..3c53c8f 100644 --- src/hg/hgc/pubs.c +++ src/hg/hgc/pubs.c @@ -178,48 +178,65 @@ static char *mangleUrl(char *url) /* add publisher specific parameters to url and return new url*/ { if (!stringIn("sciencedirect.com", url)) return url; // cgi param to add the "UCSC matches" sciverse application to elsevier's sciencedirect char *sdAddParam = "?svAppaddApp=298535"; char *longUrl = catTwoStrings(url, sdAddParam); char *newUrl = replaceChars(longUrl, "article", "svapps"); return newUrl; } +static void printPositionAndSize(int start, int end, bool showSize) +{ +printf("<B>Position:</B> " + "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">", + hgTracksPathAndSettings(), database, seqName, start+1, end); +char startBuf[64], endBuf[64]; +sprintLongWithCommas(startBuf, start + 1); +sprintLongWithCommas(endBuf, end); +printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf); +long size = end - start; +sprintLongWithCommas(startBuf, size); +if (showSize) + printf("<B>Genomic Size:</B> %s<BR>\n", startBuf); +} + static void printFilterLink(char *pslTrack, char *articleId, char *articleTable) /* print a link to hgTracks with an additional cgi param to activate the single article filter */ { int start = cgiInt("o"); int end = cgiInt("t"); char qBuf[1024]; struct sqlConnection *conn = hAllocConn(database); safef(qBuf, sizeof(qBuf), "SELECT CONCAT(firstAuthor, year) FROM %s WHERE articleId='%s';", articleTable, articleId); char *dispId = sqlQuickString(conn, qBuf); printf( " <div class=\"subsection\">"); printf( " <P><A HREF=\"%s&db=%s&position=%s%%3A%d-%d&pubsFilterArticleId=%s&%s=pack&hgFind.matches=%s\">", hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack, dispId); printf("Show these sequence matches individually on genome browser</A> (activates track \"" "Individual matches for article\")</P>"); + + printPositionAndSize(start, end, 1); printf( " </div> <!-- class: subsection --> \n"); hFreeConn(&conn); } static char *makeSqlMarkerList(void) /* return list of sections from cgi vars, format like "'abstract','header'" */ { int secCount = sizeof(pubsSecNames)/sizeof(char *); struct slName *names = NULL; int i; for (i=0; i<secCount; i++) { // add ' around name and add to list @@ -469,31 +486,37 @@ { hashAdd(seqIdHash, seqIdCoords[i], NULL); } return seqIdHash; } static void printSeqHeaders(bool showDesc, bool isClickedSection) /* print table and headers */ { //style=\"margin: 10px auto; width: 98%%\"style=\"background-color: #fcecc0\" web2StartTableC("stdTbl centeredStdTbl"); web2StartTheadC("stdTblHead"); if (showDesc) web2PrintHeaderCell("Article file", 10); + +// yif sequences have no flanking text at M. Krauthammer's request +if (stringIn("yif", articleSource)) + web2PrintHeaderCell("Matching sequences", 60); +else web2PrintHeaderCell("One row per sequence, with flanking text, sequence in bold", 60); + if (pubsDebug) web2PrintHeaderCell("Identifiers", 30); if (!isClickedSection && !pubsDebug) web2PrintHeaderCell("Chained matches with this sequence", 20); web2EndThead(); web2StartTbodyS("font-family: Arial, Helvetica, sans-serif; line-height: 1.5em; font-size: 0.9em;"); } static void printAddWbr(char *text, int distance) /* a crazy hack for firefox/mozilla that is unable to break long words in tables * We need to add a <wbr> tag every x characters in the text to make text breakable. */ { int i; @@ -546,31 +569,41 @@ char *chrom = cloneNextWordByDelimiter(&locString, ':'); char *startStr = cloneNextWordByDelimiter(&locString, '-'); char *endStr = cloneString(locString); int start = atoi(startStr); int end = atoi(endStr); printHgTracksLink(db, chrom, start, end, NULL, NULL); printf("<br>"); freeMem(endStr); //XX why can't I free these? freeMem(chrom); freeMem(startStr); freeMem(db); } } - +void removeFlank (char *snippet) +/* keep only the parts inside <b> to </b> of a string, modifies the string in place */ +{ +char* startPtr = stringIn("<B>", snippet); +char* endPtr = stringIn("</B>", snippet); +if (startPtr!=0 && endPtr!=0 && startPtr<endPtr) { + char* buf = stringBetween("<B>", "</B>", snippet); + memcpy(snippet, buf, strlen(buf)+1); + freeMem(buf); + } +} static bool printSeqSection(char *articleId, char *title, bool showDesc, struct sqlConnection *conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta, char *pslTable, char *articleTable) /* print a section with a table of sequences, show only sequences with IDs in hash, * There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs * - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs * - other seqs (isClickedSection=False) -> show all other seqs * * */ { // get data from mysql char query[4096]; safef(query, sizeof(query), "SELECT fileDesc, snippet, locations, articleId, fileId, seqId, sequence, fileUrl " "FROM %s WHERE articleId='%s';", pubsSequenceTable, articleId); @@ -582,76 +615,99 @@ char *otherFormat = NULL; if (fasta) otherFormat = "table"; else otherFormat = "fasta"; char fullTitle[5000]; safef(fullTitle, sizeof(fullTitle), "%s <A HREF=\"../cgi-bin/hgc?%s&o=%s&t=%s&g=%s&i=%s&fasta=%d\"><SMALL>(%s format)</SMALL></A>\n", title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"), !fasta, otherFormat); web2StartSection("pubsSection", "%s", fullTitle); // print filtering link at start of table & table headers -if (isClickedSection) +if (isClickedSection) { printFilterLink(pslTable, articleId, articleTable); + } if (!fasta) printSeqHeaders(showDesc, isClickedSection); // output rows char **row; -char *fileUrl = NULL; // we might need this after the loop for yif articles + +// the URL of the file from the clicked sequences, for YIF +char *clickedFileUrl = NULL; + bool foundSkippedRows = FALSE; while ((row = sqlNextRow(sr)) != NULL) { char *fileDesc = row[0]; char *snippet = row[1]; char *locString= row[2]; char *artId = row[3]; char *fileId = row[4]; char *seqId = row[5]; char *seq = row[6]; - fileUrl = row[7]; + char *fileUrl = row[7]; // annotation (=sequence) ID is a 64 bit int with 10 digits for // article, 3 digits for file, 5 for annotation char annotId[100]; + + // some debugging help safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId)); if (pubsDebug) printf("%s", annotId); // only display this sequence if we're in the right section if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) { foundSkippedRows = TRUE; continue; } + // if we're in the clicked section and the current sequence is one that matched here + // then keep the current URL, as we might need it afterwards + else + clickedFileUrl = cloneString(fileUrl); + + // suppress non-matches if the sequences come from YIF as figures can + // contain tons of non-matching sequences + if (stringIn("yif", articleSource) && isEmpty(locString)) { + foundSkippedRows = TRUE; + continue; + } if (fasta) printf(">%s<br>%s<br>", annotId, seq); else { web2StartRow(); // column 1: type of file (main or supp) if (showDesc) - web2PrintCellS("word-break:break-all", fileDesc); + { + char linkStr[4096]; + safef(linkStr, sizeof(linkStr), "<a href=\"%s\">%s</a>", fileUrl, fileDesc); + web2PrintCellS("word-break:break-all", linkStr); + } // column 2: snippet web2StartCellS("word-break:break-all"); + if (stringIn("yif", articleSource)) + removeFlank(snippet); printAddWbr(snippet, 40); web2EndCell(); // optional debug info column if (pubsDebug) web2PrintCellF("article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId); // column 3: print links to locations, only print this in the 2nd section if (!isClickedSection && !pubsDebug) { // format: hg19/chr1:300-400,mm9/chr1:60006-23234 // split on "," then split on "/" //locs = charSepToSlNames(locString, ','); web2StartCell(); @@ -667,52 +723,53 @@ printGbLinks(locs); printf("<br>"); slFreeList(&locs); } web2EndCell(); } web2EndRow(); } } if (!fasta) web2EndTable(); web2EndSection(); /* Yale Image finder files contain links to the image itself */ -if (stringIn("yif", articleSource) && (fileUrl!=NULL) && isClickedSection) { - char* imgTitle = "Sequences were found in text obtained with optical character recognition from this figure:\n"; +if (stringIn("yif", articleSource) && (clickedFileUrl!=NULL) && isClickedSection) { + char* imgTitle = "<A href=\"http://krauthammerlab.med.yale.edu/imagefinder/\">Yale Image Finder</a>: figure where sequences were found"; web2StartSection("section", "%s", imgTitle); - web2Img(fileUrl, "Image from YIF", 600, 10, 10); + web2Img(clickedFileUrl, "Image from YIF", 600, 10, 10); web2EndSection(); } +freeMem(clickedFileUrl); sqlFreeResult(&sr); return foundSkippedRows; } static void printSeqInfo(struct sqlConnection *conn, char *trackTable, char *pslTable, char *articleId, char *item, char *seqName, int start, bool fileDesc, bool fasta, char *articleTable) /* print sequences, split into two sections * two sections: one for sequences that were clicked, one for all others*/ { struct hash *clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start); bool skippedRows; if (clickedSeqs) - skippedRows = printSeqSection(articleId, "Sequences used to construct this feature", \ + skippedRows = printSeqSection(articleId, "Sequences matching here", \ fileDesc, conn, clickedSeqs, 1, fasta, pslTable, articleTable); else skippedRows=1; if (skippedRows) { // the section title should change if the data comes from the yale image finder = a figure char* docType = "article"; if (stringIn("yif", articleSource)) docType = "figure"; char title[1024]; safef(title, sizeof(title), "Other Sequences in this %s", docType); printSeqSection(articleId, title, \ fileDesc, conn, clickedSeqs, 0, fasta, pslTable, articleTable); @@ -750,45 +807,30 @@ } else { versionString[0] = 0; dateReference[0] = 0; } if (versionString[0]) safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString); else safef(headerTitle, sizeof(headerTitle), "%s", item); genericHeader(tdb, headerTitle); } -static void printPositionAndSize(int start, int end, bool showSize) -{ -printf("<B>Position:</B> " - "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">", - hgTracksPathAndSettings(), database, seqName, start+1, end); -char startBuf[64], endBuf[64]; -sprintLongWithCommas(startBuf, start + 1); -sprintLongWithCommas(endBuf, end); -printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf); -long size = end - start; -sprintLongWithCommas(startBuf, size); -if (showSize) - printf("<B>Genomic Size:</B> %s<BR>\n", startBuf); -} - static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id) /* copied from otherOrgs.c */ { char query[512]; struct sqlResult *sr; char **row; bioSeq *seq = NULL; safef(query, sizeof(query), "select sequence from %s where annotId = '%s'", table, id); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { AllocVar(seq); seq->name = cloneString(id); seq->dna = cloneString(row[0]); @@ -883,29 +925,28 @@ printf("<H3>Genomic Alignment with sequence found in publication fulltext</H3>"); printAlignmentsSimple(psl, start, trackTable, trackTable, item); } } else { printTrackVersion(tdb, conn, item); if (stringIn("Marker", trackTable)) { char *markerTable = trackDbRequiredSetting(tdb, "pubsMarkerTable"); printPositionAndSize(start, end, 0); printMarkerSnippets(conn, articleTable, markerTable, item); } else { - printPositionAndSize(start, end, 1); pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable"); char *articleId = printArticleInfo(conn, item, articleTable); if (articleId!=NULL) { char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack"); printSeqInfo(conn, trackTable, pslTable, articleId, item, seqName, start, pubsHasSupp, fasta, articleTable); } } } printTrackHtml(tdb); hFreeConn(&conn); }