847161d7b92ce779e0d8caee3efc1e4711749748 max Mon Mar 11 12:48:00 2013 -0700 many changes after review by krauthammer lab to adapt display to yif OCR images diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c index 267ff80..3c53c8f 100644 --- src/hg/hgc/pubs.c +++ src/hg/hgc/pubs.c @@ -178,48 +178,65 @@ static char *mangleUrl(char *url) /* add publisher specific parameters to url and return new url*/ { if (!stringIn("sciencedirect.com", url)) return url; // cgi param to add the "UCSC matches" sciverse application to elsevier's sciencedirect char *sdAddParam = "?svAppaddApp=298535"; char *longUrl = catTwoStrings(url, sdAddParam); char *newUrl = replaceChars(longUrl, "article", "svapps"); return newUrl; } +static void printPositionAndSize(int start, int end, bool showSize) +{ +printf("Position: " + "", + hgTracksPathAndSettings(), database, seqName, start+1, end); +char startBuf[64], endBuf[64]; +sprintLongWithCommas(startBuf, start + 1); +sprintLongWithCommas(endBuf, end); +printf("%s:%s-%s
\n", seqName, startBuf, endBuf); +long size = end - start; +sprintLongWithCommas(startBuf, size); +if (showSize) + printf("Genomic Size: %s
\n", startBuf); +} + static void printFilterLink(char *pslTrack, char *articleId, char *articleTable) /* print a link to hgTracks with an additional cgi param to activate the single article filter */ { int start = cgiInt("o"); int end = cgiInt("t"); char qBuf[1024]; struct sqlConnection *conn = hAllocConn(database); safef(qBuf, sizeof(qBuf), "SELECT CONCAT(firstAuthor, year) FROM %s WHERE articleId='%s';", articleTable, articleId); char *dispId = sqlQuickString(conn, qBuf); printf( "
"); printf( "

", hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack, dispId); printf("Show these sequence matches individually on genome browser (activates track \"" "Individual matches for article\")

"); + + printPositionAndSize(start, end, 1); printf( "
\n"); hFreeConn(&conn); } static char *makeSqlMarkerList(void) /* return list of sections from cgi vars, format like "'abstract','header'" */ { int secCount = sizeof(pubsSecNames)/sizeof(char *); struct slName *names = NULL; int i; for (i=0; i tag every x characters in the text to make text breakable. */ { int i; @@ -546,31 +569,41 @@ char *chrom = cloneNextWordByDelimiter(&locString, ':'); char *startStr = cloneNextWordByDelimiter(&locString, '-'); char *endStr = cloneString(locString); int start = atoi(startStr); int end = atoi(endStr); printHgTracksLink(db, chrom, start, end, NULL, NULL); printf("
"); freeMem(endStr); //XX why can't I free these? freeMem(chrom); freeMem(startStr); freeMem(db); } } - +void removeFlank (char *snippet) +/* keep only the parts inside to of a string, modifies the string in place */ +{ +char* startPtr = stringIn("", snippet); +char* endPtr = stringIn("", snippet); +if (startPtr!=0 && endPtr!=0 && startPtr", "", snippet); + memcpy(snippet, buf, strlen(buf)+1); + freeMem(buf); + } +} static bool printSeqSection(char *articleId, char *title, bool showDesc, struct sqlConnection *conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta, char *pslTable, char *articleTable) /* print a section with a table of sequences, show only sequences with IDs in hash, * There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs * - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs * - other seqs (isClickedSection=False) -> show all other seqs * * */ { // get data from mysql char query[4096]; safef(query, sizeof(query), "SELECT fileDesc, snippet, locations, articleId, fileId, seqId, sequence, fileUrl " "FROM %s WHERE articleId='%s';", pubsSequenceTable, articleId); @@ -582,76 +615,99 @@ char *otherFormat = NULL; if (fasta) otherFormat = "table"; else otherFormat = "fasta"; char fullTitle[5000]; safef(fullTitle, sizeof(fullTitle), "%s (%s format)\n", title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"), !fasta, otherFormat); web2StartSection("pubsSection", "%s", fullTitle); // print filtering link at start of table & table headers -if (isClickedSection) +if (isClickedSection) { printFilterLink(pslTable, articleId, articleTable); + } if (!fasta) printSeqHeaders(showDesc, isClickedSection); // output rows char **row; -char *fileUrl = NULL; // we might need this after the loop for yif articles + +// the URL of the file from the clicked sequences, for YIF +char *clickedFileUrl = NULL; + bool foundSkippedRows = FALSE; while ((row = sqlNextRow(sr)) != NULL) { char *fileDesc = row[0]; char *snippet = row[1]; char *locString= row[2]; char *artId = row[3]; char *fileId = row[4]; char *seqId = row[5]; char *seq = row[6]; - fileUrl = row[7]; + char *fileUrl = row[7]; // annotation (=sequence) ID is a 64 bit int with 10 digits for // article, 3 digits for file, 5 for annotation char annotId[100]; + + // some debugging help safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId)); if (pubsDebug) printf("%s", annotId); // only display this sequence if we're in the right section if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) { foundSkippedRows = TRUE; continue; } + // if we're in the clicked section and the current sequence is one that matched here + // then keep the current URL, as we might need it afterwards + else + clickedFileUrl = cloneString(fileUrl); + + // suppress non-matches if the sequences come from YIF as figures can + // contain tons of non-matching sequences + if (stringIn("yif", articleSource) && isEmpty(locString)) { + foundSkippedRows = TRUE; + continue; + } if (fasta) printf(">%s
%s
", annotId, seq); else { web2StartRow(); // column 1: type of file (main or supp) if (showDesc) - web2PrintCellS("word-break:break-all", fileDesc); + { + char linkStr[4096]; + safef(linkStr, sizeof(linkStr), "%s", fileUrl, fileDesc); + web2PrintCellS("word-break:break-all", linkStr); + } // column 2: snippet web2StartCellS("word-break:break-all"); + if (stringIn("yif", articleSource)) + removeFlank(snippet); printAddWbr(snippet, 40); web2EndCell(); // optional debug info column if (pubsDebug) web2PrintCellF("article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId); // column 3: print links to locations, only print this in the 2nd section if (!isClickedSection && !pubsDebug) { // format: hg19/chr1:300-400,mm9/chr1:60006-23234 // split on "," then split on "/" //locs = charSepToSlNames(locString, ','); web2StartCell(); @@ -667,52 +723,53 @@ printGbLinks(locs); printf("
"); slFreeList(&locs); } web2EndCell(); } web2EndRow(); } } if (!fasta) web2EndTable(); web2EndSection(); /* Yale Image finder files contain links to the image itself */ -if (stringIn("yif", articleSource) && (fileUrl!=NULL) && isClickedSection) { - char* imgTitle = "Sequences were found in text obtained with optical character recognition from this figure:\n"; +if (stringIn("yif", articleSource) && (clickedFileUrl!=NULL) && isClickedSection) { + char* imgTitle = "Yale Image Finder: figure where sequences were found"; web2StartSection("section", "%s", imgTitle); - web2Img(fileUrl, "Image from YIF", 600, 10, 10); + web2Img(clickedFileUrl, "Image from YIF", 600, 10, 10); web2EndSection(); } +freeMem(clickedFileUrl); sqlFreeResult(&sr); return foundSkippedRows; } static void printSeqInfo(struct sqlConnection *conn, char *trackTable, char *pslTable, char *articleId, char *item, char *seqName, int start, bool fileDesc, bool fasta, char *articleTable) /* print sequences, split into two sections * two sections: one for sequences that were clicked, one for all others*/ { struct hash *clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start); bool skippedRows; if (clickedSeqs) - skippedRows = printSeqSection(articleId, "Sequences used to construct this feature", \ + skippedRows = printSeqSection(articleId, "Sequences matching here", \ fileDesc, conn, clickedSeqs, 1, fasta, pslTable, articleTable); else skippedRows=1; if (skippedRows) { // the section title should change if the data comes from the yale image finder = a figure char* docType = "article"; if (stringIn("yif", articleSource)) docType = "figure"; char title[1024]; safef(title, sizeof(title), "Other Sequences in this %s", docType); printSeqSection(articleId, title, \ fileDesc, conn, clickedSeqs, 0, fasta, pslTable, articleTable); @@ -750,45 +807,30 @@ } else { versionString[0] = 0; dateReference[0] = 0; } if (versionString[0]) safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString); else safef(headerTitle, sizeof(headerTitle), "%s", item); genericHeader(tdb, headerTitle); } -static void printPositionAndSize(int start, int end, bool showSize) -{ -printf("Position: " - "", - hgTracksPathAndSettings(), database, seqName, start+1, end); -char startBuf[64], endBuf[64]; -sprintLongWithCommas(startBuf, start + 1); -sprintLongWithCommas(endBuf, end); -printf("%s:%s-%s
\n", seqName, startBuf, endBuf); -long size = end - start; -sprintLongWithCommas(startBuf, size); -if (showSize) - printf("Genomic Size: %s
\n", startBuf); -} - static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id) /* copied from otherOrgs.c */ { char query[512]; struct sqlResult *sr; char **row; bioSeq *seq = NULL; safef(query, sizeof(query), "select sequence from %s where annotId = '%s'", table, id); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { AllocVar(seq); seq->name = cloneString(id); seq->dna = cloneString(row[0]); @@ -883,29 +925,28 @@ printf("

Genomic Alignment with sequence found in publication fulltext

"); printAlignmentsSimple(psl, start, trackTable, trackTable, item); } } else { printTrackVersion(tdb, conn, item); if (stringIn("Marker", trackTable)) { char *markerTable = trackDbRequiredSetting(tdb, "pubsMarkerTable"); printPositionAndSize(start, end, 0); printMarkerSnippets(conn, articleTable, markerTable, item); } else { - printPositionAndSize(start, end, 1); pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable"); char *articleId = printArticleInfo(conn, item, articleTable); if (articleId!=NULL) { char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack"); printSeqInfo(conn, trackTable, pslTable, articleId, item, seqName, start, pubsHasSupp, fasta, articleTable); } } } printTrackHtml(tdb); hFreeConn(&conn); }