847161d7b92ce779e0d8caee3efc1e4711749748
max
Mon Mar 11 12:48:00 2013 -0700
many changes after review by krauthammer lab to adapt display to yif OCR images
diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c
index 267ff80..3c53c8f 100644
--- src/hg/hgc/pubs.c
+++ src/hg/hgc/pubs.c
@@ -178,48 +178,65 @@
static char *mangleUrl(char *url)
/* add publisher specific parameters to url and return new url*/
{
if (!stringIn("sciencedirect.com", url))
return url;
// cgi param to add the "UCSC matches" sciverse application to elsevier's sciencedirect
char *sdAddParam = "?svAppaddApp=298535";
char *longUrl = catTwoStrings(url, sdAddParam);
char *newUrl = replaceChars(longUrl, "article", "svapps");
return newUrl;
}
+static void printPositionAndSize(int start, int end, bool showSize)
+{
+printf("Position: "
+ "",
+ hgTracksPathAndSettings(), database, seqName, start+1, end);
+char startBuf[64], endBuf[64];
+sprintLongWithCommas(startBuf, start + 1);
+sprintLongWithCommas(endBuf, end);
+printf("%s:%s-%s
\n", seqName, startBuf, endBuf);
+long size = end - start;
+sprintLongWithCommas(startBuf, size);
+if (showSize)
+ printf("Genomic Size: %s
\n", startBuf);
+}
+
static void printFilterLink(char *pslTrack, char *articleId, char *articleTable)
/* print a link to hgTracks with an additional cgi param to activate the single article filter */
{
int start = cgiInt("o");
int end = cgiInt("t");
char qBuf[1024];
struct sqlConnection *conn = hAllocConn(database);
safef(qBuf, sizeof(qBuf), "SELECT CONCAT(firstAuthor, year) FROM %s WHERE articleId='%s';", articleTable, articleId);
char *dispId = sqlQuickString(conn, qBuf);
printf(
"
");
printf(
"
",
hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack, dispId);
printf("Show these sequence matches individually on genome browser (activates track \""
"Individual matches for article\")
");
+
+ printPositionAndSize(start, end, 1);
printf(
"
\n");
hFreeConn(&conn);
}
static char *makeSqlMarkerList(void)
/* return list of sections from cgi vars, format like "'abstract','header'" */
{
int secCount = sizeof(pubsSecNames)/sizeof(char *);
struct slName *names = NULL;
int i;
for (i=0; i tag every x characters in the text to make text breakable.
*/
{
int i;
@@ -546,31 +569,41 @@
char *chrom = cloneNextWordByDelimiter(&locString, ':');
char *startStr = cloneNextWordByDelimiter(&locString, '-');
char *endStr = cloneString(locString);
int start = atoi(startStr);
int end = atoi(endStr);
printHgTracksLink(db, chrom, start, end, NULL, NULL);
printf("
");
freeMem(endStr); //XX why can't I free these?
freeMem(chrom);
freeMem(startStr);
freeMem(db);
}
}
-
+void removeFlank (char *snippet)
+/* keep only the parts inside to of a string, modifies the string in place */
+{
+char* startPtr = stringIn("", snippet);
+char* endPtr = stringIn("", snippet);
+if (startPtr!=0 && endPtr!=0 && startPtr", "", snippet);
+ memcpy(snippet, buf, strlen(buf)+1);
+ freeMem(buf);
+ }
+}
static bool printSeqSection(char *articleId, char *title, bool showDesc, struct sqlConnection *conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta, char *pslTable, char *articleTable)
/* print a section with a table of sequences, show only sequences with IDs in hash,
* There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs
* - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs
* - other seqs (isClickedSection=False) -> show all other seqs
*
* */
{
// get data from mysql
char query[4096];
safef(query, sizeof(query),
"SELECT fileDesc, snippet, locations, articleId, fileId, seqId, sequence, fileUrl "
"FROM %s WHERE articleId='%s';", pubsSequenceTable, articleId);
@@ -582,76 +615,99 @@
char *otherFormat = NULL;
if (fasta)
otherFormat = "table";
else
otherFormat = "fasta";
char fullTitle[5000];
safef(fullTitle, sizeof(fullTitle),
"%s (%s format)\n",
title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"),
!fasta, otherFormat);
web2StartSection("pubsSection", "%s", fullTitle);
// print filtering link at start of table & table headers
-if (isClickedSection)
+if (isClickedSection) {
printFilterLink(pslTable, articleId, articleTable);
+ }
if (!fasta)
printSeqHeaders(showDesc, isClickedSection);
// output rows
char **row;
-char *fileUrl = NULL; // we might need this after the loop for yif articles
+
+// the URL of the file from the clicked sequences, for YIF
+char *clickedFileUrl = NULL;
+
bool foundSkippedRows = FALSE;
while ((row = sqlNextRow(sr)) != NULL)
{
char *fileDesc = row[0];
char *snippet = row[1];
char *locString= row[2];
char *artId = row[3];
char *fileId = row[4];
char *seqId = row[5];
char *seq = row[6];
- fileUrl = row[7];
+ char *fileUrl = row[7];
// annotation (=sequence) ID is a 64 bit int with 10 digits for
// article, 3 digits for file, 5 for annotation
char annotId[100];
+
+ // some debugging help
safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId));
if (pubsDebug)
printf("%s", annotId);
// only display this sequence if we're in the right section
if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) {
foundSkippedRows = TRUE;
continue;
}
+ // if we're in the clicked section and the current sequence is one that matched here
+ // then keep the current URL, as we might need it afterwards
+ else
+ clickedFileUrl = cloneString(fileUrl);
+
+ // suppress non-matches if the sequences come from YIF as figures can
+ // contain tons of non-matching sequences
+ if (stringIn("yif", articleSource) && isEmpty(locString)) {
+ foundSkippedRows = TRUE;
+ continue;
+ }
if (fasta)
printf(">%s
%s
", annotId, seq);
else
{
web2StartRow();
// column 1: type of file (main or supp)
if (showDesc)
- web2PrintCellS("word-break:break-all", fileDesc);
+ {
+ char linkStr[4096];
+ safef(linkStr, sizeof(linkStr), "%s", fileUrl, fileDesc);
+ web2PrintCellS("word-break:break-all", linkStr);
+ }
// column 2: snippet
web2StartCellS("word-break:break-all");
+ if (stringIn("yif", articleSource))
+ removeFlank(snippet);
printAddWbr(snippet, 40);
web2EndCell();
// optional debug info column
if (pubsDebug)
web2PrintCellF("article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId);
// column 3: print links to locations, only print this in the 2nd section
if (!isClickedSection && !pubsDebug)
{
// format: hg19/chr1:300-400,mm9/chr1:60006-23234
// split on "," then split on "/"
//locs = charSepToSlNames(locString, ',');
web2StartCell();
@@ -667,52 +723,53 @@
printGbLinks(locs);
printf("
");
slFreeList(&locs);
}
web2EndCell();
}
web2EndRow();
}
}
if (!fasta)
web2EndTable();
web2EndSection();
/* Yale Image finder files contain links to the image itself */
-if (stringIn("yif", articleSource) && (fileUrl!=NULL) && isClickedSection) {
- char* imgTitle = "Sequences were found in text obtained with optical character recognition from this figure:\n";
+if (stringIn("yif", articleSource) && (clickedFileUrl!=NULL) && isClickedSection) {
+ char* imgTitle = "Yale Image Finder: figure where sequences were found";
web2StartSection("section", "%s", imgTitle);
- web2Img(fileUrl, "Image from YIF", 600, 10, 10);
+ web2Img(clickedFileUrl, "Image from YIF", 600, 10, 10);
web2EndSection();
}
+freeMem(clickedFileUrl);
sqlFreeResult(&sr);
return foundSkippedRows;
}
static void printSeqInfo(struct sqlConnection *conn, char *trackTable,
char *pslTable, char *articleId, char *item, char *seqName, int start,
bool fileDesc, bool fasta, char *articleTable)
/* print sequences, split into two sections
* two sections: one for sequences that were clicked, one for all others*/
{
struct hash *clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start);
bool skippedRows;
if (clickedSeqs)
- skippedRows = printSeqSection(articleId, "Sequences used to construct this feature", \
+ skippedRows = printSeqSection(articleId, "Sequences matching here", \
fileDesc, conn, clickedSeqs, 1, fasta, pslTable, articleTable);
else
skippedRows=1;
if (skippedRows)
{
// the section title should change if the data comes from the yale image finder = a figure
char* docType = "article";
if (stringIn("yif", articleSource))
docType = "figure";
char title[1024];
safef(title, sizeof(title), "Other Sequences in this %s", docType);
printSeqSection(articleId, title, \
fileDesc, conn, clickedSeqs, 0, fasta, pslTable, articleTable);
@@ -750,45 +807,30 @@
}
else
{
versionString[0] = 0;
dateReference[0] = 0;
}
if (versionString[0])
safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString);
else
safef(headerTitle, sizeof(headerTitle), "%s", item);
genericHeader(tdb, headerTitle);
}
-static void printPositionAndSize(int start, int end, bool showSize)
-{
-printf("Position: "
- "",
- hgTracksPathAndSettings(), database, seqName, start+1, end);
-char startBuf[64], endBuf[64];
-sprintLongWithCommas(startBuf, start + 1);
-sprintLongWithCommas(endBuf, end);
-printf("%s:%s-%s
\n", seqName, startBuf, endBuf);
-long size = end - start;
-sprintLongWithCommas(startBuf, size);
-if (showSize)
- printf("Genomic Size: %s
\n", startBuf);
-}
-
static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id)
/* copied from otherOrgs.c */
{
char query[512];
struct sqlResult *sr;
char **row;
bioSeq *seq = NULL;
safef(query, sizeof(query),
"select sequence from %s where annotId = '%s'", table, id);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
{
AllocVar(seq);
seq->name = cloneString(id);
seq->dna = cloneString(row[0]);
@@ -883,29 +925,28 @@
printf("Genomic Alignment with sequence found in publication fulltext
");
printAlignmentsSimple(psl, start, trackTable, trackTable, item);
}
}
else
{
printTrackVersion(tdb, conn, item);
if (stringIn("Marker", trackTable))
{
char *markerTable = trackDbRequiredSetting(tdb, "pubsMarkerTable");
printPositionAndSize(start, end, 0);
printMarkerSnippets(conn, articleTable, markerTable, item);
}
else
{
- printPositionAndSize(start, end, 1);
pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable");
char *articleId = printArticleInfo(conn, item, articleTable);
if (articleId!=NULL)
{
char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack");
printSeqInfo(conn, trackTable, pslTable, articleId, item, seqName, start, pubsHasSupp, fasta, articleTable);
}
}
}
printTrackHtml(tdb);
hFreeConn(&conn);
}