df08d88ccff94a899f7aca37c01955c89fcffea9
max
  Thu Aug 22 14:00:56 2013 -0700
revamping the pubs marker page integrating all of Brookes comments that I can remember, adding javascript for "more" links, allowing additional filters for future ensembl incoming links, #6833
diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c
index dbbeb44..d5abca1 100644
--- src/hg/hgc/pubs.c
+++ src/hg/hgc/pubs.c
@@ -1,30 +1,31 @@
 /* pubs.c - display details of publiations literature track (pubsxxx tables) */
 
 #include "common.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "hgc.h"
 #include "hgColors.h"
 #include "trackDb.h"
 #include "web.h"
 #include "hash.h"
 #include "net.h"
 #include "obscure.h"
 #include "common.h"
 #include "string.h"
 #include "dystring.h"
+//#include "ctype.h"
 
 // cgi var to activate debug output
 static int pubsDebug = 0;
 
 // global var for printArticleInfo to indicate if article has suppl info 
 // Most publishers have supp data.
 // If they don't have it, we can skip the fileType column in the table
 bool pubsHasSupp = TRUE; 
 
 // global var for printArticleInfo to indicate if article is elsevier
 // If it's elsevier, we print the copyright line
 bool pubsIsElsevier = FALSE; 
 
 // the article source is used to modify other parts of the page
 static char *articleSource;
@@ -270,57 +271,68 @@
 
 char *nameListString = slNameListToString(names, ',');
 slNameFree(names);
 return nameListString;
 }
 
 
 static struct sqlResult *queryMarkerRows(struct sqlConnection *conn, char *markerTable, \
     char *articleTable, char *item, int itemLimit, char *sectionList, char *artExtIdFilter)
 /* query marker rows from mysql, based on http parameters  
  * optionally filter on sections or just a single article
  * */
 {
 char query[4000];
 /* Mysql specific setting to make the group_concat function return longer strings */
-sqlUpdate(conn, "NOSQLINJ SET SESSION group_concat_max_len = 100000");
+//sqlUpdate(conn, "NOSQLINJ SET SESSION group_concat_max_len = 100000");
  
 char artFilterSql[4000];
 artFilterSql[0] = 0;
 if (isNotEmpty(artExtIdFilter))
     safef(artFilterSql, sizeof(artFilterSql), " AND extId='%s' ", artExtIdFilter);
 
 // no need to check for illegal characters in sectionList
-sqlSafef(query, sizeof(query), "SELECT distinct %s.articleId, url, title, authors, citation, "  
-    "pmid, extId, "
-    "group_concat(snippet, concat(\" (section: \", section, \")\") SEPARATOR ' (...) ') FROM %s "
+sqlSafef(query, sizeof(query), "SELECT distinct %s.articleId, url, title, authors, citation, year, "  
+    "pmid FROM %s "
+    //"group_concat(snippet, concat(\" (section: \", section, \")\") SEPARATOR ' (...) ') FROM %s "
     "JOIN %s USING (articleId) "
     "WHERE markerId='%s' AND section in (%-s) "
     "%-s"
-    "GROUP by articleId "
+    //"GROUP by articleId "
     "ORDER BY year DESC "
     "LIMIT %d",
     markerTable, markerTable, articleTable, item, sectionList, artFilterSql, itemLimit);
 
 if (pubsDebug)
     printf("%s", query);
 
 struct sqlResult *sr = sqlGetResult(conn, query);
 
 return sr;
 }
 
+static struct sqlResult *querySnippets(struct sqlConnection *conn, char *markerTable, \
+    char *articleId, char *markerId, char *sectionList)
+/* query marker snippet rows from mysql for an article, markerId combination */
+{
+char query[4000];
+sqlSafef(query, sizeof(query), "SELECT section, snippet FROM %s "  
+    "WHERE articleId=%s AND markerId='%s' AND section in (%-s) ", 
+    markerTable, articleId, markerId, sectionList);
+struct sqlResult *sr = sqlGetResult(conn, query);
+return sr;
+}
 
 static void printSectionCheckboxes()
 /* show a little form with checkboxes where user can select sections they want to show */
 {
 // labels to show to user, have to correspond to pubsSecNames
 char *secLabels[] ={
       "Title", "Abstract",
       "Introduction", "Methods",
       "Results", "Discussion",
       "Conclusions", "Acknowledgements",
       "References", "Undetermined section (e.g. for a brief communication)" };
 
 int labelCount = sizeof(secLabels)/sizeof(char *);
 
 int i;
@@ -381,74 +393,182 @@
 while (*c != 0) 
     {
     if ((*c=='&') || (*c=='<'))
        doNotBreak = TRUE;
     if (*c==';' || (*c =='>'))
        doNotBreak = FALSE;
 
     printf("%c", *c);
     if (i % distance == 0 && ! doNotBreak) 
         printf("<wbr>");
     c++;
     i++;
     }
 }
 
-static void printMarkerSnippets(struct sqlConnection *conn, char *articleTable, char *markerTable, char *item)
+/* keep only uppercase letters in string*/
+void eraseAllButUpper(char *s)
+{
+char *in, *out;
+char c;
+
+in = out = s;
+for (;;)
+    {
+    c = *in++;
+    if (c == 0)
+        break;
+    if (isupper(c))
+        *out++ = c;
+    }
+*out = 0;
+}
+
+char* printShortArticleInfo(char **row) {
+/* print a two-line description of article */
+char *articleId = row[0];
+char *url       = row[1];
+char *title     = row[2];
+char *authors   = row[3];
+char *citation  = row[4];
+char *year      = row[5];
+char *pmid      = row[6];
+url = mangleUrl(url);
+printf("<A HREF=\"%s\">%s</A><BR> ", url, title);
+// cut author string at 40 chars, like scholar
+printf("<span style=\"color:gray\">");
+if (strlen(authors)>40)
+    {
+    authors[60] = 0;
+    printf("<SMALL>%s...</SMALL> - ", authors);
+    }
+else
+    printf("<SMALL>%s</SMALL> - ", authors);
+
+// first word of citation is journal name
+char *words[10];
+chopCommas(citation, words);
+char *journal = words[0];
+
+printf("<SMALL>%s - %s ", year, journal);
+if (!isEmpty(pmid) && strcmp(pmid, "0")!=0 )
+    printf(", <A HREF=\"http://www.ncbi.nlm.nih.gov/pubmed/%s\">PMID%s</A>\n", pmid, pmid);
+printf("</SMALL>\n");
+
+printf("</span>\n");
+printf("<BR>\n");
+if (pubsDebug)
+    printf("articleId=%s", articleId);
+
+return articleId;
+}
+
+void printSnippets(struct sqlResult *srSnip) 
+{
+char **snipRow;
+struct hash *doneSnips = newHash(0); // avoid printing a sentence twice
+int snipCount = 0;
+struct slPair *secSnips = NULL;
+
+// add all pairs to the list, remove duplicated snippets (ignore all lowercase chars)
+while ((snipRow = sqlNextRow(srSnip)) != NULL)
+    {
+    char *section  = cloneString(snipRow[0]);
+    char *snippet  = cloneString(snipRow[1]);
+    char *snipHash = cloneString(snippet);
+    eraseAllButUpper(snipHash);
+    if (hashLookup(doneSnips, snipHash)!=NULL)
+        {
+        //printf("<b>already seen</b></br>");
+        continue;
+        }
+    slPairAdd(&secSnips, section, snippet);
+    hashAdd(doneSnips, snipHash, 0);
+    snipCount++;
+    }
+hashFree(&doneSnips);
+
+// now iterate over list and print
+struct slPair *pair;
+printf("<DIV CLASS=\"snips\">\n");
+int i = 0;
+for (pair = secSnips; pair != NULL; pair = pair->next)
+    {
+    char *section = pair->name;
+    char *snippet = pair->val;
+    
+    // print snippet
+    printf("<I>");
+    printAddWbr(snippet, 40);
+    printf("<style=\"color:gray\">...</style>\n");
+    if (differentWord(section, "unknown"))
+        printf(" <SPAN style=\"color:gray\">(%s)</SPAN>", section);
+    printf("</I>");
+    printf("<BR>");
+
+    if (snipCount>2) 
         {
+        if (i==0)
+            // alternative to "more": <img src=\"../images/add_sm.gif\">
+            printf("<A class=\"showSnips\" href=\"#\" onclick=\"$(this).nextUntil('.shownSnips').slideToggle(); return false;\">more</A><BR><DIV class=\"hiddenSnips\" style=\"display:none\">");
+        if (i==snipCount-2)
+            printf("</DIV><div class=\"shownSnips\"></div>");
+        }
+    i++;
+    }
+slPairFreeList(&secSnips);
+sqlFreeResult(&srSnip); 
+printf("</DIV><P>");
+}
 
+static void printMarkerSnippets(struct sqlConnection *conn, char *articleTable, 
+    char *markerTable, char *item)
+/* print page with article info and snippets from articles */
+{
 /* do not show more snippets than this limit */
 int itemLimit=100;
 
 char *artExtIdFilter = cgiOptionalString("pubsFilterExtId");
+/* This will have to be activated with the move to new Elsevier identifiers, ~Oct 2013 */
+//if (startsWith(artExtIdFilter))
+    //replaceInStr(artExtIdFilter, "ELS", "PII")
 
 char *sectionList = makeSqlMarkerList();
 if (artExtIdFilter==NULL)
     {
     printSectionCheckboxes();
     printLimitWarning(conn, markerTable, item, itemLimit, sectionList);
     printf("<H3>Snippets from Publications:</H3>");
     }
 
-struct sqlResult *sr = queryMarkerRows(conn, markerTable, articleTable, item, itemLimit, sectionList, artExtIdFilter);
+struct sqlResult *sr = queryMarkerRows(conn, markerTable, articleTable, item, \
+    itemLimit, sectionList, artExtIdFilter);
 
+// better readable if not across the whole screen
 printf("<DIV style=\"width:1024px; font-size:100%%\">\n");
 char **row;
+
+// loop over articles and print out snippets for each
 while ((row = sqlNextRow(sr)) != NULL)
     {
-    char *articleId = row[0];
-    char *url       = row[1];
-    char *title     = row[2];
-    char *authors   = row[3];
-    char *citation  = row[4];
-    char *pmid      = row[5];
-    char *snippets  = row[7];
-    url = mangleUrl(url);
-    printf("<A HREF=\"%s\">%s</A><BR> ", url, title);
-    printf("<SMALL>%s</SMALL>; ", authors);
-    printf("<SMALL>%s ", citation);
-    if (!isEmpty(pmid) && strcmp(pmid, "0")!=0 )
-        printf(", <A HREF=\"http://www.ncbi.nlm.nih.gov/pubmed/%s\">PMID%s</A>\n", pmid, pmid);
-    printf("</SMALL><BR>\n");
-    if (pubsDebug)
-        printf("articleId=%s", articleId);
-    printf("<I>\n");
-    printAddWbr(snippets, 40);
-    printf("</I><P>");
+    char *articleId = printShortArticleInfo(row);
+    struct sqlConnection *snipConn = hAllocConn(database);
+    struct sqlResult *srSnip = querySnippets(snipConn, markerTable, articleId, item, sectionList);
+    printSnippets(srSnip);
+    hFreeConn(&snipConn);
     printf("<HR>");
     }
-
 printf("</DIV>\n");
 freeMem(sectionList);
 sqlFreeResult(&sr);
 }
 
 static char *urlToLogoUrl(char *pubsArticleTable, char *articleId, char *urlOrig)
 /* return a string with relative path of logo for publisher given the url of
  * fulltext or a table/articleId, has to be freed 
 */
 {
 struct sqlConnection *conn = hAllocConn(database);
 char *pubCode = NULL;
 if (hHasField("hgFixed", pubsArticleTable, "publisher"))
     {
     char query[4000];