847161d7b92ce779e0d8caee3efc1e4711749748
max
  Mon Mar 11 12:48:00 2013 -0700
many changes after review by krauthammer lab to adapt display to yif OCR images
diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c
index 267ff80..3c53c8f 100644
--- src/hg/hgc/pubs.c
+++ src/hg/hgc/pubs.c
@@ -178,48 +178,65 @@
 
 
 static char *mangleUrl(char *url) 
 /* add publisher specific parameters to url and return new url*/
 {
 if (!stringIn("sciencedirect.com", url))
     return url;
     
 // cgi param to add the "UCSC matches" sciverse application to elsevier's sciencedirect
 char *sdAddParam = "?svAppaddApp=298535"; 
 char *longUrl = catTwoStrings(url, sdAddParam);
 char *newUrl = replaceChars(longUrl, "article", "svapps");
 return newUrl;
 }
 
+static void printPositionAndSize(int start, int end, bool showSize)
+{
+printf("<B>Position:</B>&nbsp;"
+           "<A HREF=\"%s&amp;db=%s&amp;position=%s%%3A%d-%d\">",
+                  hgTracksPathAndSettings(), database, seqName, start+1, end);
+char startBuf[64], endBuf[64];
+sprintLongWithCommas(startBuf, start + 1);
+sprintLongWithCommas(endBuf, end);
+printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf);
+long size = end - start;
+sprintLongWithCommas(startBuf, size);
+if (showSize)
+    printf("<B>Genomic Size:</B>&nbsp;%s<BR>\n", startBuf);
+}
+
 static void printFilterLink(char *pslTrack, char *articleId, char *articleTable)
 /* print a link to hgTracks with an additional cgi param to activate the single article filter */
 {
     int start = cgiInt("o");
     int end = cgiInt("t");
     char qBuf[1024];
     struct sqlConnection *conn = hAllocConn(database);
     safef(qBuf, sizeof(qBuf), "SELECT CONCAT(firstAuthor, year) FROM %s WHERE articleId='%s';", articleTable, articleId);
     char *dispId = sqlQuickString(conn, qBuf);
 
     printf(
         "      <div class=\"subsection\">");
     printf(
         "      <P><A HREF=\"%s&amp;db=%s&amp;position=%s%%3A%d-%d&amp;pubsFilterArticleId=%s&amp;%s=pack&amp;hgFind.matches=%s\">",
         hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack, dispId);
 
     printf("Show these sequence matches individually on genome browser</A> (activates track \""
         "Individual matches for article\")</P>");
+
+    printPositionAndSize(start, end, 1);
     printf(
         "      </div> <!-- class: subsection --> \n");
     hFreeConn(&conn);
     
 }
 
 static char *makeSqlMarkerList(void)
 /* return list of sections from cgi vars, format like "'abstract','header'" */
 {
 int secCount = sizeof(pubsSecNames)/sizeof(char *);
 struct slName *names = NULL;
 int i;
 for (i=0; i<secCount; i++) 
 {
     // add ' around name and add to list
@@ -469,31 +486,37 @@
     {
     hashAdd(seqIdHash, seqIdCoords[i], NULL);
     }
 return seqIdHash;
 }
 
 
 static void printSeqHeaders(bool showDesc, bool isClickedSection) 
 /* print table and headers */
 {
 //style=\"margin: 10px auto; width: 98%%\"style=\"background-color: #fcecc0\"
 web2StartTableC("stdTbl centeredStdTbl");
 web2StartTheadC("stdTblHead");
 if (showDesc)
     web2PrintHeaderCell("Article file", 10);
+
+// yif sequences have no flanking text at M. Krauthammer's request
+if (stringIn("yif", articleSource))
+    web2PrintHeaderCell("Matching sequences", 60);
+else
 web2PrintHeaderCell("One row per sequence, with flanking text, sequence in bold", 60);
+
 if (pubsDebug)
     web2PrintHeaderCell("Identifiers", 30);
 
 if (!isClickedSection && !pubsDebug)
     web2PrintHeaderCell("Chained matches with this sequence", 20);
 web2EndThead();
 web2StartTbodyS("font-family: Arial, Helvetica, sans-serif; line-height: 1.5em; font-size: 0.9em;");
 }
 
 static void printAddWbr(char *text, int distance) 
 /* a crazy hack for firefox/mozilla that is unable to break long words in tables
  * We need to add a <wbr> tag every x characters in the text to make text breakable.
  */
 {
 int i;
@@ -546,31 +569,41 @@
     char *chrom    = cloneNextWordByDelimiter(&locString, ':');
     char *startStr = cloneNextWordByDelimiter(&locString, '-');
     char *endStr   = cloneString(locString);
 
     int start = atoi(startStr);
     int end = atoi(endStr);
     printHgTracksLink(db, chrom, start, end, NULL, NULL);
     printf("<br>");
     freeMem(endStr); //XX why can't I free these?
     freeMem(chrom);
     freeMem(startStr);
     freeMem(db);
     }
 }
 
-
+void removeFlank (char *snippet) 
+/* keep only the parts inside <b> to </b> of a string, modifies the string in place */
+{
+char* startPtr = stringIn("<B>", snippet);
+char* endPtr   = stringIn("</B>", snippet);
+if (startPtr!=0 && endPtr!=0 && startPtr<endPtr) {
+    char* buf = stringBetween("<B>", "</B>", snippet);
+    memcpy(snippet, buf, strlen(buf)+1);
+    freeMem(buf);
+    }
+}
 
 
 static bool printSeqSection(char *articleId, char *title, bool showDesc, struct sqlConnection *conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta, char *pslTable, char *articleTable)
 /* print a section with a table of sequences, show only sequences with IDs in hash,
  * There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs 
  *   - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs
  *   - other seqs (isClickedSection=False) -> show all other seqs
  * 
  * */
 {
 // get data from mysql
 char query[4096];
 safef(query, sizeof(query), 
 "SELECT fileDesc, snippet, locations, articleId, fileId, seqId, sequence, fileUrl "
 "FROM %s WHERE articleId='%s';", pubsSequenceTable, articleId);
@@ -582,76 +615,99 @@
 char *otherFormat = NULL;
 if (fasta)
     otherFormat = "table";
 else
     otherFormat = "fasta";
 
 char fullTitle[5000];
 safef(fullTitle, sizeof(fullTitle), 
 "%s&nbsp;<A HREF=\"../cgi-bin/hgc?%s&o=%s&t=%s&g=%s&i=%s&fasta=%d\"><SMALL>(%s format)</SMALL></A>\n", 
 title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"), 
 !fasta, otherFormat);
 
 web2StartSection("pubsSection", "%s", fullTitle);
 
 // print filtering link at start of table & table headers
-if (isClickedSection)
+if (isClickedSection) {
     printFilterLink(pslTable, articleId, articleTable);
+    }
 
 if (!fasta) 
     printSeqHeaders(showDesc, isClickedSection);
 
 // output rows
 char **row;
-char *fileUrl = NULL; // we might need this after the loop for yif articles
+
+// the URL of the file from the clicked sequences, for YIF
+char *clickedFileUrl = NULL; 
+
 bool foundSkippedRows = FALSE;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *fileDesc = row[0];
     char *snippet  = row[1];
     char *locString= row[2];
     char *artId    = row[3];
     char *fileId   = row[4];
     char *seqId    = row[5];
     char *seq      = row[6];
-    fileUrl  = row[7];
+    char *fileUrl  = row[7];
 
     // annotation (=sequence) ID is a 64 bit int with 10 digits for 
     // article, 3 digits for file, 5 for annotation
     char annotId[100];
+    
+    // some debugging help
     safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId));
     if (pubsDebug)
         printf("%s", annotId);
 
     // only display this sequence if we're in the right section
     if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) {
         foundSkippedRows = TRUE;
         continue;
     }
+    // if we're in the clicked section and the current sequence is one that matched here
+    // then keep the current URL, as we might need it afterwards
+    else
+        clickedFileUrl = cloneString(fileUrl);
+
+    // suppress non-matches if the sequences come from YIF as figures can 
+    // contain tons of non-matching sequences
+    if (stringIn("yif", articleSource) && isEmpty(locString)) {
+        foundSkippedRows = TRUE;
+        continue;
+    }
 
     if (fasta)
         printf(">%s<br>%s<br>", annotId, seq);
     else
         {
         web2StartRow();
 
         // column 1: type of file (main or supp)
         if (showDesc)
-            web2PrintCellS("word-break:break-all", fileDesc);
+            {
+            char linkStr[4096];
+            safef(linkStr, sizeof(linkStr), "<a href=\"%s\">%s</a>", fileUrl, fileDesc);
+            web2PrintCellS("word-break:break-all", linkStr);
+            }
         
         // column 2: snippet
         web2StartCellS("word-break:break-all");
+        if (stringIn("yif", articleSource))
+            removeFlank(snippet);
         printAddWbr(snippet, 40);
         web2EndCell();
 
         // optional debug info column
         if (pubsDebug) 
             web2PrintCellF("article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId);
 
         // column 3: print links to locations, only print this in the 2nd section
         if (!isClickedSection && !pubsDebug) 
             {
             // format: hg19/chr1:300-400,mm9/chr1:60006-23234
             // split on "," then split on "/"
             //locs = charSepToSlNames(locString, ',');
 
             web2StartCell();
@@ -667,52 +723,53 @@
                 printGbLinks(locs);
                 printf("<br>");
                 slFreeList(&locs);
                 }
             web2EndCell();
             }
         web2EndRow();
         }
     }
 
 if (!fasta)
     web2EndTable();
 
 web2EndSection();
 /* Yale Image finder files contain links to the image itself */
-if (stringIn("yif", articleSource) && (fileUrl!=NULL) && isClickedSection) {
-    char* imgTitle = "Sequences were found in text obtained with optical character recognition from this figure:\n";
+if (stringIn("yif", articleSource) && (clickedFileUrl!=NULL) && isClickedSection) {
+    char* imgTitle = "<A href=\"http://krauthammerlab.med.yale.edu/imagefinder/\">Yale Image Finder</a>: figure where sequences were found";
     web2StartSection("section", "%s", imgTitle);
-    web2Img(fileUrl, "Image from YIF", 600, 10, 10); 
+    web2Img(clickedFileUrl, "Image from YIF", 600, 10, 10); 
     web2EndSection();
 }
+freeMem(clickedFileUrl);
 
 sqlFreeResult(&sr);
 return foundSkippedRows;
 }
 
 static void printSeqInfo(struct sqlConnection *conn, char *trackTable,
     char *pslTable, char *articleId, char *item, char *seqName, int start, 
     bool fileDesc, bool fasta, char *articleTable)
     /* print sequences, split into two sections 
      * two sections: one for sequences that were clicked, one for all others*/
 {
 struct hash *clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start);
 
 bool skippedRows;
 if (clickedSeqs) 
-    skippedRows = printSeqSection(articleId, "Sequences used to construct this feature", \
+    skippedRows = printSeqSection(articleId, "Sequences matching here", \
         fileDesc, conn, clickedSeqs, 1, fasta, pslTable, articleTable);
 else 
     skippedRows=1;
 
 if (skippedRows) 
     {
     // the section title should change if the data comes from the yale image finder = a figure
     char* docType = "article";
     if (stringIn("yif", articleSource))
         docType = "figure";
     char title[1024];
     safef(title, sizeof(title), "Other Sequences in this %s", docType);
 
     printSeqSection(articleId, title, \
         fileDesc, conn, clickedSeqs, 0, fasta, pslTable, articleTable);
@@ -750,45 +807,30 @@
     }
 else
     {
     versionString[0] = 0;
     dateReference[0] = 0;
     }
 
 if (versionString[0])
     safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString);
 else
     safef(headerTitle, sizeof(headerTitle), "%s", item);
 
 genericHeader(tdb, headerTitle);
 }
 
-static void printPositionAndSize(int start, int end, bool showSize)
-{
-printf("<B>Position:</B>&nbsp;"
-           "<A HREF=\"%s&amp;db=%s&amp;position=%s%%3A%d-%d\">",
-                  hgTracksPathAndSettings(), database, seqName, start+1, end);
-char startBuf[64], endBuf[64];
-sprintLongWithCommas(startBuf, start + 1);
-sprintLongWithCommas(endBuf, end);
-printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf);
-long size = end - start;
-sprintLongWithCommas(startBuf, size);
-if (showSize)
-    printf("<B>Genomic Size:</B>&nbsp;%s<BR>\n", startBuf);
-}
-
 static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id)
 /* copied from otherOrgs.c */
 {
 char query[512];
 struct sqlResult *sr;
 char **row;
 bioSeq *seq = NULL;
 safef(query, sizeof(query), 
     "select sequence from %s where annotId = '%s'", table, id);
 sr = sqlGetResult(conn, query);
 if ((row = sqlNextRow(sr)) != NULL)
     {
     AllocVar(seq);
     seq->name = cloneString(id);
     seq->dna = cloneString(row[0]);
@@ -883,29 +925,28 @@
         printf("<H3>Genomic Alignment with sequence found in publication fulltext</H3>");
         printAlignmentsSimple(psl, start, trackTable, trackTable, item);
         }
     }
 else
     {
     printTrackVersion(tdb, conn, item);
     if (stringIn("Marker", trackTable))
         {
         char *markerTable = trackDbRequiredSetting(tdb, "pubsMarkerTable");
         printPositionAndSize(start, end, 0);
         printMarkerSnippets(conn, articleTable, markerTable, item);
         }
     else
         {
-        printPositionAndSize(start, end, 1);
         pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable");
         char *articleId = printArticleInfo(conn, item, articleTable);
         if (articleId!=NULL) 
             {
             char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack");
             printSeqInfo(conn, trackTable, pslTable, articleId, item, seqName, start, pubsHasSupp, fasta, articleTable);
             }
     }
 }
 
 printTrackHtml(tdb);
 hFreeConn(&conn);
 }