c20579fc7c537c1736650125c5d264357d2cfa78
angie
  Mon Sep 18 13:15:16 2017 -0700
Big search & replace: use https instead of http for NCBI URLs.  refs #17793

diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c
index e9f576e..0371475 100644
--- src/hg/hgc/pubs.c
+++ src/hg/hgc/pubs.c
@@ -1,1217 +1,1217 @@
 /* pubs.c - display details of publiations literature track (pubsxxx tables) */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "hgc.h"
 #include "hgColors.h"
 #include "trackDb.h"
 #include "web.h"
 #include "hash.h"
 #include "net.h"
 #include "obscure.h"
 #include "common.h"
 #include "string.h"
 #include "dystring.h"
 #include "dnautil.h"
 //#include "ctype.h"
 
 // cgi var to activate debug output
 static int pubsDebug = 0;
 
 // global var for printArticleInfo to indicate if article has suppl info 
 // Most publishers have supp data.
 // If they don't have it, we can skip the fileType column in the table
 bool pubsHasSupp = TRUE; 
 
 // global var for printArticleInfo to indicate if article is elsevier
 // If it's elsevier, we print the copyright line
 bool pubsIsElsevier = FALSE; 
 
 // the article source is used to modify other parts of the page
 static char *articleSource;
 
 // we need the external article PMC Id for YIF links
 static char *extId = NULL;
 
 // section types in mysql table, for all annotations tables
 // we note where the hit is located in the document
 static char *pubsSecNames[] ={
       "header", "abstract",
       "intro", "methods",
       "results", "discussion",
       "conclusions", "ack",
       "refs", "supplement", "unknown" };
 
 // labels to show to user, have to correspond to pubsSecNames
 static char *secLabels[] ={
       "Title", "Abstract",
       "Introduction", "Methods",
       "Results", "Discussion",
       "Conclusions", "Acknowledgements",
       "References", "Supplement", "Undetermined section (e.g. for a brief communication)" };
 
 // whether a checkbox is checked by default, have to correspond to pubsSecNames
 static int pubsSecChecked[] ={
       1, 1,
       1, 1,
       1, 1,
       1, 1,
       1, 1, 1 };
 
 static char *pubsSequenceTable;
 
 
 /* ------ functions to replace HTML4 tables with HTML5 constructs */
 /* Web wrappers incorporating tag, id, and class HTML attributes, to support
  * styling and test */
 
 /* Suffix -S for  "function accepts style parameter"
  * Suffix -C for  "function accepts class parameter"
  * Suffix -CI for "function accepts class and id parameter"
  *
  * Some functions are commented out because they are not yet used.
  */
 
 static void web2Start(char *tag)
 {
 printf("<%s>\n", tag);
 }
 
 static void web2End(char *tag)
 {
 printf("</%s>\n", tag);
 }
 
 static void web2StartS(char *style, char *tag)
 {
 printf("<%s style=\"%s\">\n", tag, style);
 }
 
 static void web2StartC(char *class, char *tag)
 {
 printf("<%s class=\"%s\">\n", tag, class);
 }
 
 static void web2StartCI(char *class, char *id, char *tag)
 {
 if ((id==NULL) && (class==NULL))
     web2Start(tag);
 else if (id==NULL)
     web2StartC(class, tag);
 else
     printf("<%s class=\"%s\" id=\"%s\">\n", tag, class, id);
 }
 
 static void web2PrintS(char *style, char *tag, char *label)
 {
 printf("<%s style=\"%s\">%s</%s>\n", tag, style, label, tag);
 }
 
 //static void web2PrintC(char *class, char *tag, char *label)
 //{
 //printf("<%s class=\"%s\">%s</%s>\n", tag, class, label, tag);
 //}
 
 //static void web2Print(char *tag, char *label)
 //{
 //printf("<%s>%s</%s>\n", tag, label, tag);
 //}
 
 static void web2StartTableC(char *class)             { web2StartC(class, "table"); }
 
 static void web2StartTheadC(char *class)             { web2StartC(class, "thead"); }
 static void web2EndThead()                           { web2End("thead"); }
 
 static void web2StartTbodyS(char *style)             { web2StartS(style, "tbody"); }
 
 static void web2StartCell()                          { web2Start("td"); }
 static void web2EndCell()                            { web2End("td"); }
 static void web2StartCellS(char *style)              { web2StartS(style, "td"); }
 //static void web2PrintCell(char *label)               { web2Print("td", label); }
 static void web2PrintCellS(char *style, char *label) { web2PrintS(style, "td", label); }
 
 static void web2StartRow()                           { web2Start("tr"); }
 static void web2EndRow()                             { web2End("tr"); }
 
 //static void web2StartTbody()                         { web2Start("tbody"); }
 static void web2EndTbody()                           { web2End("tbody"); }
 
 //static void web2StartTable()                         { web2Start("table"); }
 static void web2EndTable()                           { web2EndTbody(); web2End("table"); }
 
 static void web2StartDivCI (char *class, char *id)    { web2StartCI(class, id, "div"); }
 static void web2StartDivC (char *class)               { web2StartC(class, "div"); }
 
 static void web2EndDiv(char *comment) 
 {
 printf("</div> <!-- %s -->\n", comment);
 }
 
 //static void web2Img(char *url, char *alt, int width, int hspace, int vspace) 
 //{
 //printf("<img src=\"%s\" alt=\"%s\" width=\"%d\" hspace=\"%d\" vspace=\"%d\">\n", url, alt, width, hspace, vspace);
 //}
 
 static void web2ImgLink(char *url, char *imgUrl, char *alt, int width, int hspace, int vspace) 
 {
 printf("<a href=\"%s\"><img src=\"%s\" alt=\"%s\" width=\"%d\" hspace=\"%d\" vspace=\"%d\"></a>\n", url, imgUrl, alt, width, hspace, vspace);
 }
 
 static void web2PrintHeaderCell(char *label, int width)
 /* Print th heading cell with given width in percent */
 {
 printf("<th width=\"%d%%\">", width);
 printf("%s</th>", label);
 }
 
 static void web2PrintCellF(char *format, ...)
 /* print a td with format */
 {
 va_list args;
 va_start(args, format);
 
 web2StartCell();
 vprintf(format, args);
 web2EndCell();
 va_end(args);
 }
 
 static void web2StartSection(char *id, char *format, ...)
 /* create a new section on the web page */
 {
 va_list args;
 va_start(args, format);
 
 puts("<!-- START NEW SECTION -->\n");
 web2StartDivCI("section", id);
 web2StartDivC("subheadingBar windowSize");
 vprintf(format, args);
 web2EndDiv("subheadingBar");
 va_end(args);
 }
 
 static void web2EndSection()
 /* end section */
 {
 web2EndDiv("section");
 }
 
 /* ------  */
 
 static void printDebug(char *text) 
 {
 if (pubsDebug)
     printf("%s<BR>", text);
 }
 
 static char *mangleUrl(char *url) 
 /* add publisher specific parameters to url and return new url*/
 {
 if (!stringIn("sciencedirect.com", url))
     return url;
     
 // cgi param to add the "UCSC matches" sciverse application to elsevier's sciencedirect
 char *sdAddParam = "?svAppaddApp=298535"; 
 char *longUrl = catTwoStrings(url, sdAddParam);
 char *newUrl = replaceChars(longUrl, "article", "svapps");
 return newUrl;
 }
 
 static void printPositionAndSize(int start, int end, bool showSize)
 {
 printf("<B>Position:</B>&nbsp;"
            "<A HREF=\"%s&amp;db=%s&amp;position=%s%%3A%d-%d\">",
                   hgTracksPathAndSettings(), database, seqName, start+1, end);
 char startBuf[64], endBuf[64];
 sprintLongWithCommas(startBuf, start + 1);
 sprintLongWithCommas(endBuf, end);
 printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf);
 long size = end - start;
 sprintLongWithCommas(startBuf, size);
 if (showSize)
     printf("<B>Genomic Size:</B>&nbsp;%s<BR>\n", startBuf);
 }
 
 static void printFilterLink(char *pslTrack, char *articleId, char *articleTable)
 /* print a link to hgTracks with an additional cgi param to activate the single article filter */
 {
     int start = cgiOptionalInt("o", -1);
     if (start==-1)
         return;
     int end = cgiInt("t");
     char qBuf[1024];
     struct sqlConnection *conn = hAllocConn(database);
     sqlSafef(qBuf, sizeof(qBuf), "SELECT CONCAT(firstAuthor, year) FROM %s WHERE articleId='%s';", articleTable, articleId);
     char *dispId = sqlQuickString(conn, qBuf);
 
     printf("      <div class=\"subsection\">");
 
     if (!containsStringNoCase(pslTrack, "Bing"))
         {
         printf(
         "      <P><A HREF=\"%s&amp;db=%s&amp;position=%s%%3A%d-%d&amp;pubsFilterArticleId=%s&amp;%s=pack&amp;hgFind.matches=%s\">",
             hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack, dispId);
 
         printf("Show these sequence matches individually on genome browser</A> (activates track \""
             "Individual matches for article\")</P>");
         }
 
     printPositionAndSize(start, end, 1);
     printf(
         "      </div> <!-- class: subsection --> \n");
     hFreeConn(&conn);
     
 }
 
 static char *makeSqlMarkerList(void)
 /* return list of sections from cgi vars, format like "'abstract','header'" */
 {
 int secCount = sizeof(pubsSecNames)/sizeof(char *);
 struct slName *names = NULL;
 int i;
 for (i=0; i<secCount; i++) 
 {
     // add ' around name and add to list
     char *secName = pubsSecNames[i];
     if (cgiOptionalInt(secName, pubsSecChecked[i]))
     {
         char nameBuf[100];
         safef(nameBuf, sizeof(nameBuf), "'%s'", secName);
         slAddHead(&names, slNameNew(nameBuf));
     }
 }
 
 if (names==0)
     errAbort("You need to specify at least one article section.");
 
 char *nameListString = slNameListToString(names, ',');
 slNameFree(names);
 return nameListString;
 }
 
 
 static struct sqlResult *queryMarkerRows(struct sqlConnection *conn, char *markerTable, \
     char *articleTable, char *item, int itemLimit, char *sectionList, char *artExtIdFilter)
 /* query marker rows from mysql, based on http parameters  
  * optionally filter on sections or just a single article
  * */
 {
 char query[4000];
 /* Mysql specific setting to make the group_concat function return longer strings */
 //sqlUpdate(conn, NOSQLINJ "SET SESSION group_concat_max_len = 100000");
  
 char artFilterSql[4000];
 artFilterSql[0] = 0;
 if (isNotEmpty(artExtIdFilter))
     safef(artFilterSql, sizeof(artFilterSql), " AND extId='%s' ", artExtIdFilter);
 
 // no need to check for illegal characters in sectionList
 sqlSafef(query, sizeof(query), "SELECT distinct %s.articleId, url, title, authors, citation, year, "  
     "pmid FROM %s "
     //"group_concat(snippet, concat(\" (section: \", section, \")\") SEPARATOR ' (...) ') FROM %s "
     "JOIN %s USING (articleId) "
     "WHERE markerId='%s' AND section in (%-s) "
     "%-s"
     //"GROUP by articleId "
     "ORDER BY year DESC "
     "LIMIT %d",
     markerTable, markerTable, articleTable, item, sectionList, artFilterSql, itemLimit);
 
     printDebug(query);
 
 struct sqlResult *sr = sqlGetResult(conn, query);
 
 return sr;
 }
 
 static struct sqlResult *querySnippets(struct sqlConnection *conn, char *markerTable, \
     char *articleId, char *markerId, char *sectionList)
 /* query marker snippet rows from mysql for an article, markerId combination */
 {
 char query[4000];
 sqlSafef(query, sizeof(query), "SELECT section, snippet FROM %s "  
     "WHERE articleId=%s AND markerId='%s' AND section in (%-s) ", 
     markerTable, articleId, markerId, sectionList);
 struct sqlResult *sr = sqlGetResult(conn, query);
 return sr;
 }
 
 static void printSectionCheckboxes()
 /* show a little form with checkboxes where user can select sections they want to show */
 {
 int labelCount = sizeof(secLabels)/sizeof(char *);
 
 int i;
 printf("<P>\n");
 printf("<B>Sections of article searched:</B><BR>\n");
 printf("<FORM ACTION=\"hgc?%s&o=%s&t=%s&g=%s&i=%s\" METHOD=\"get\">\n",
     cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"));
 
 for (i=0; i<labelCount; i++) 
 {
     char *name = pubsSecNames[i];
     // checkboxes default to 0 unless checked, see 
     // http://stackoverflow.com/questions/2520952/how-come-checkbox-state-is-not-always-passed-along-to-php-script
     printf("<INPUT TYPE=\"hidden\" name=\"%s\" value=\"0\" />\n", pubsSecNames[i]);
     printf("<INPUT TYPE=\"checkbox\" name=\"%s\" ", name);
 
     int isChecked = cgiOptionalInt(name, pubsSecChecked[i]);
     if (isChecked)
         printf("value=\"1\" checked=\"yes\">%s</INPUT>\n", secLabels[i]);
     else
         printf("value=\"1\">%s</INPUT>\n", secLabels[i]);
 }
 
 printf("<INPUT TYPE=\"hidden\" name=\"o\" value=\"%s\" />\n", cgiString("o"));
 printf("<INPUT TYPE=\"hidden\" name=\"g\" value=\"%s\" />\n", cgiString("g"));
 printf("<INPUT TYPE=\"hidden\" name=\"t\" value=\"%s\" />\n", cgiString("t"));
 printf("<INPUT TYPE=\"hidden\" name=\"i\" value=\"%s\" />\n", cgiString("i"));
 printf("<INPUT TYPE=\"hidden\" name=\"hgsid\" value=\"%s\" />\n", cart->sessionId);
 printf("<BR>");
 printf("<INPUT TYPE=\"submit\" VALUE=\"Submit\" />\n");
 printf("</FORM><P>\n");
 }
 
 static void printLimitWarning(struct sqlConnection *conn, char *markerTable, 
     char *item, int itemLimit, char *sectionList)
 {
 char query[4000];
 // no need to check for illegal characters in sectionList
 sqlSafef(query, sizeof(query), "SELECT COUNT(*) from %s WHERE markerId='%s' AND section in (%-s) ", markerTable, item, sectionList);
 if (sqlNeedQuickNum(conn, query) > itemLimit) 
     {
     printf("<b>This marker is mentioned more than %d times</b><BR>\n", itemLimit);
     printf("The results would take too long to load in your browser and are "
     "therefore limited to the %d most recent articles.<P>\n", itemLimit);
     }
 }
 
 /* keep only uppercase letters in string*/
 void eraseAllButUpper(char *s)
 {
 char *in, *out;
 char c;
 
 in = out = s;
 for (;;)
     {
     c = *in++;
     if (c == 0)
         break;
     if (isupper(c))
         *out++ = c;
     }
 *out = 0;
 }
 
 static char* printShortArticleInfo(char **row) {
 /* print a two-line description of article */
 char *articleId = row[0];
 char *url       = row[1];
 char *title     = row[2];
 char *authors   = row[3];
 char *citation  = row[4];
 char *year      = row[5];
 char *pmid      = row[6];
 url = mangleUrl(url);
 printf("<A HREF=\"%s\">%s</A><BR> ", url, title);
 // cut author string at 40 chars, like scholar
 printf("<span style=\"color:gray\">");
 if (strlen(authors)>40)
     {
     authors[60] = 0;
     printf("<SMALL>%s...</SMALL> ", authors);
     }
 else if (!isEmpty(authors))
     printf("<SMALL>%s</SMALL> ", authors);
 
 // first word of citation is journal name
 char *words[10];
 int wordCount = chopCommas(citation, words);
 char *journal = NULL;
 if (wordCount!=0)
     journal = words[0];
 
 // optional: print the little gray line with author, journal, year info
 bool didPrint = FALSE;
 printf("<small>");
 if (year!=NULL && differentWord(year, "0"))
     {
     printf("%s", year);
     didPrint = TRUE;
     }
 if (!isEmpty(journal))
     {
     printf(" - %s ", journal);
     didPrint = TRUE;
     }
 if (!isEmpty(pmid) && strcmp(pmid, "0")!=0 )
     {
-    printf(", <A HREF=\"http://www.ncbi.nlm.nih.gov/pubmed/%s\">PMID%s</A>\n", pmid, pmid);
+    printf(", <A HREF=\"https://www.ncbi.nlm.nih.gov/pubmed/%s\">PMID%s</A>\n", pmid, pmid);
     didPrint = TRUE;
     }
 printf("</small></span>\n");
 if (didPrint)
     printf("<BR>\n");
 
 if (pubsDebug)
     printf("articleId=%s", articleId);
 
 return articleId;
 }
 
 static void printSnippets(struct sqlResult *srSnip) 
 {
 char **snipRow;
 struct hash *doneSnips = newHash(0); // avoid printing a sentence twice
 int snipCount = 0;
 struct slPair *secSnips = NULL;
 
 // add all pairs to the list, remove duplicated snippets (ignore all lowercase chars)
 while ((snipRow = sqlNextRow(srSnip)) != NULL)
     {
     char *section  = cloneString(snipRow[0]);
     char *snippet  = cloneString(snipRow[1]);
     char *snipHash = cloneString(snippet);
     eraseAllButUpper(snipHash);
     if (hashLookup(doneSnips, snipHash)!=NULL)
         {
         //printf("<b>already seen</b></br>");
         continue;
         }
     slPairAdd(&secSnips, section, snippet);
     hashAdd(doneSnips, snipHash, 0);
     snipCount++;
     }
 hashFree(&doneSnips);
 
 // now iterate over list and print
 struct slPair *pair;
 printf("<DIV CLASS=\"snips\">\n");
 int i = 0;
 for (pair = secSnips; pair != NULL; pair = pair->next)
     {
     char *section = pair->name;
     char *snippet = pair->val;
     
     // print snippet
     printf("<I>");
     printAddWbr(snippet, 40);
     printf("<style=\"color:gray\">...</style>\n");
     if (differentWord(section, "unknown"))
         printf(" <SPAN style=\"color:gray\">(%s)</SPAN>", section);
     printf("</I>");
     printf("<BR>");
 
     if (snipCount>2) 
         {
         if (i==0)
 	    {
             // alternative to "more": <img src=\"../images/add_sm.gif\">
             printf("<A id='snowSnips_more' class=\"showSnips\" href=\"#\" >more</A><BR><DIV class=\"hiddenSnips\" style=\"display:none\">");
 	    jsOnEventById("click", "snowSnips_more", "$(this).nextUntil('.shownSnips').slideToggle(); return false;");
 	    }
         if (i==snipCount-2)
             printf("</DIV><div class=\"shownSnips\"></div>");
         }
     i++;
     }
 slPairFreeList(&secSnips);
 sqlFreeResult(&srSnip); 
 printf("</DIV><P>");
 }
 
 static void printMarkerSnippets(struct sqlConnection *conn, char *articleTable, 
     char *markerTable, char *item)
 /* print page with article info and snippets from articles */
 {
 /* do not show more snippets than this limit */
 int itemLimit=100;
 
 char *artExtIdFilter = cgiOptionalString("pubsFilterExtId");
 /* This will have to be activated with the move to new Elsevier identifiers, ~Oct 2013 */
 //if (startsWith(artExtIdFilter))
     //replaceInStr(artExtIdFilter, "ELS", "PII")
 
 char *sectionList = makeSqlMarkerList();
 if (artExtIdFilter==NULL)
     {
     printSectionCheckboxes();
     printLimitWarning(conn, markerTable, item, itemLimit, sectionList);
     printf("<H3>Snippets from Publications:</H3>");
     }
 
 struct sqlResult *sr = queryMarkerRows(conn, markerTable, articleTable, item, \
     itemLimit, sectionList, artExtIdFilter);
 
 // better readable if not across the whole screen
 printf("<DIV style=\"width:1024px; font-size:100%%\">\n");
 char **row;
 
 // loop over articles and print out snippets for each
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *articleId = printShortArticleInfo(row);
     struct sqlConnection *snipConn = hAllocConn(database);
     struct sqlResult *srSnip = querySnippets(snipConn, markerTable, articleId, item, sectionList);
     printSnippets(srSnip);
     hFreeConn(&snipConn);
     printf("<HR>");
     }
 printf("</DIV>\n");
 
 freeMem(sectionList);
 sqlFreeResult(&sr);
 }
 
 static char *urlToLogoUrl(char *pubsArticleTable, char *articleId, char *urlOrig)
 /* return a string with relative path of logo for publisher given the url of
  * fulltext or a table/articleId, has to be freed 
 */
 {
 struct sqlConnection *conn = hAllocConn(database);
 char *pubCode = NULL;
 if (hHasField("hgFixed", pubsArticleTable, "publisher"))
     {
     char query[4000];
     sqlSafef(query, sizeof(query), "SELECT publisher from %s where articleId=%s", 
         pubsArticleTable, articleId);
     pubCode = sqlQuickString(conn, query);
     // if no publisher is specified, we use the source (e.g. bing, pmc, elsevier, etc)
     // to find the logo
     if (isEmpty(pubCode))
         {
         sqlSafef(query, sizeof(query), "SELECT source from %s where articleId=%s", 
             pubsArticleTable, articleId);
         pubCode = sqlQuickString(conn, query);
         }
     }
 else 
     {
     // get top-level domain url if not publisher field
     char url[1024];
     memcpy(url, urlOrig, sizeof(url));
     char *slashParts[20];
     // split http://www.sgi.com/test -> to [http:,www.sgi.com,test]
     int partCount = chopString(url, "/", slashParts, ArraySize(slashParts));
     if (partCount<3)
         return NULL;
     // split www.sgi.com to [www,sgi,com]
     char *dotParts[20];
     partCount = chopString(slashParts[1], ".", dotParts, ArraySize(dotParts));
     if (partCount<3)
         return NULL;
     pubCode = dotParts[partCount-2];
     }
     
 // construct path to image
 char *logoUrl = needMem(512);
 safef(logoUrl, 512, "../images/pubs_%s.png", pubCode);
 return logoUrl;
 }
 
 static char *printArticleInfo(struct sqlConnection *conn, char *item, char *pubsArticleTable)
 /* Header with information about paper, return documentId */
 {
 char query[512];
 
 sqlSafef(query, sizeof(query), "SELECT articleId, url, title, authors, citation, abstract, pmid, "
     "source, extId FROM %s WHERE articleId='%s'", pubsArticleTable, item);
 
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 char *articleId=NULL;
 if ((row = sqlNextRow(sr)) == NULL)
     {
     printf("Could not resolve articleId %s, this is an internal error.\n", item);
     printf("Please send an email to max@soe.ucsc.edu\n");
     sqlFreeResult(&sr);
     return NULL;
     }
 
 articleId = cloneString(row[0]);
 char *url      = row[1];
 char *title    = row[2];
 char *authors  = row[3];
 char *cit      = row[4];
 char *abstract = row[5];
 char *pmid     = row[6];
 articleSource  = row[7];
 extId          = row[8];
 
 url = mangleUrl(url);
 if (strlen(abstract)==0) 
         abstract = "(No abstract available for this article. "
             "Please follow the link to the fulltext above by clicking on the titel or the fulltext image.)";
 
 if (stringIn("sciencedirect.com", url)) 
     {
     pubsHasSupp = FALSE;
     pubsIsElsevier = TRUE;
     }
 
 // authors  title
 printf("<DIV style=\"width:1024px; font-size:100%%\">\n");
 printf("<P>%s</P>\n", authors);
 //
 // logo of publisher
 char *logoUrl = urlToLogoUrl(pubsArticleTable, articleId, url);
 if (logoUrl)
     printf("<a href=\"%s\"><img align=\"right\" hspace=\"20\" src=\"%s\"></a>\n", url, logoUrl);
 freeMem(logoUrl);
 
 printf("<div style=\"width:800px\">");
 printf("<A TARGET=\"_blank\" HREF=\"%s\"><B>%s</B></A></div>\n", url, title);
 printf("</DIV>\n");
 printf("</DIV>\n");
 
 printf("<P style=\"width:1024px; font-size:80%%\">%s", cit);
 if (strlen(pmid)!=0 && strcmp(pmid, "0"))
-    printf(", <A HREF=\"http://www.ncbi.nlm.nih.gov/pubmed/%s\">PMID%s</A>\n", pmid, pmid);
+    printf(", <A HREF=\"https://www.ncbi.nlm.nih.gov/pubmed/%s\">PMID%s</A>\n", pmid, pmid);
 printf("</P>\n");
 printf("<P style=\"width:1024px; font-size:100%%\">%s</P>\n", abstract);
 
 if (pubsIsElsevier)
     printf("<P><SMALL>Copyright 2012 Elsevier B.V. All rights reserved.</SMALL></P>");
 
 sqlFreeResult(&sr);
 return articleId;
 }
 
 static struct hash *getSeqIdHash(struct sqlConnection *conn, char *trackTable, \
     char *articleId, char *item, char *seqName, int start)
 /* return a hash with the sequence IDs for a given chain of BLAT matches */
 {
 if (start==-1)
     return NULL;
 char query[512];
 /* check first if the column exists (some debugging tables on hgwdev don't have seqIds) */
 sqlSafef(query, sizeof(query), "SHOW COLUMNS FROM %s LIKE 'seqIds';", trackTable);
 char *seqIdPresent = sqlQuickString(conn, query);
 if (!seqIdPresent) {
     return NULL;
 }
 
 /* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash*/
 sqlSafef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' "
     "and chrom='%s' and chromStart=%d;", trackTable, item, seqName, start);
     printDebug(query);
 
 // split comma-sep list into parts
 char *seqIdCoordString = sqlQuickString(conn, query);
 char *seqIdCoords[1024];
 if (isEmpty(seqIdCoordString))
     return NULL;
 int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords));
 int i;
 
 struct hash *seqIdHash = NULL;
 seqIdHash = newHash(0);
 for (i=0; i<partCount; i++) 
     {
     if (pubsDebug)
         printf("annotId %s<br>", seqIdCoords[i]);
     hashAdd(seqIdHash, seqIdCoords[i], NULL);
     }
 return seqIdHash;
 }
 
 
 static void printSeqHeaders(bool showDesc, bool isClickedSection) 
 /* print table and headers */
 {
 //style=\"margin: 10px auto; width: 98%%\"style=\"background-color: #fcecc0\"
 web2StartTableC("stdTbl centeredStdTbl");
 web2StartTheadC("stdTblHead");
 if (showDesc)
     web2PrintHeaderCell("Article file", 10);
 
 // yif sequences have no flanking text at M. Krauthammer's request
 if (stringIn("yif", articleSource))
     web2PrintHeaderCell("Matching sequences", 60);
 else
     web2PrintHeaderCell("One row per sequence, with flanking text, sequence in bold", 60);
 
 if (pubsDebug)
     web2PrintHeaderCell("Identifiers", 30);
 
 if (!isClickedSection && !pubsDebug)
     web2PrintHeaderCell("Link to matching genomic location", 20);
 web2EndThead();
 web2StartTbodyS("font-family: Arial, Helvetica, sans-serif; line-height: 1.5em; font-size: 0.9em;");
 }
 
 void printHgTracksLink(char *db, char *chrom, int start, int end, char *linkText, char *optUrlStr)
 /* print link to hgTracks for db at pos */
 {
 char buf[1024];
 if (linkText==NULL) 
     {
     char startBuf[64], endBuf[64];
     sprintLongWithCommas(startBuf, start + 1);
     sprintLongWithCommas(endBuf, end);
     safef(buf, sizeof(buf), "%s:%s-%s (%s)", chrom, startBuf, endBuf, db);
     linkText = buf;
     }
 
 if (optUrlStr==NULL)
     optUrlStr = "";
     
 printf("<A HREF=\"%s&amp;db=%s&amp;position=%s:%d-%d&amp;%s\">%s</A>\n", hgTracksPathAndSettings(), db, chrom, start, end, optUrlStr, linkText);
 }
 
 void printGbLinks(struct slName *locs) 
 /* print hash keys in format hg19/chr1:1-1000 as links */
 {
 struct slName *el;
 for (el = locs; el != NULL; el = el->next) 
     {
     char *locString = el->name;
     char *db       = cloneNextWordByDelimiter(&locString, '/');
     char *chrom    = cloneNextWordByDelimiter(&locString, ':');
     char *startStr = cloneNextWordByDelimiter(&locString, '-');
     char *endStr   = cloneString(locString);
 
     int start = atoi(startStr);
     int end = atoi(endStr);
     printHgTracksLink(db, chrom, start, end, NULL, NULL);
     printf("<br>");
     freeMem(endStr); //XX why can't I free these?
     freeMem(chrom);
     freeMem(startStr);
     freeMem(db);
     }
 }
 
 void removeFlank (char *snippet) 
 /* keep only the parts inside <b> to </b> of a string, modifies the string in place */
 {
 char *startPtr = stringIn("<B>", snippet);
 char *endPtr   = stringIn("</B>", snippet);
 if (startPtr!=0 && endPtr!=0 && startPtr<endPtr) {
     char *buf = stringBetween("<B>", "</B>", snippet);
     memcpy(snippet, buf, strlen(buf)+1);
     freeMem(buf);
     }
 }
 
 
 static void printYifSection(char *clickedFileUrl)
 /* print section with the image on yif and a link to it */
 {
 
 // parse out yif file Id from yif url to generate link to yif page
 struct netParsedUrl npu;
 netParseUrl(clickedFileUrl, &npu);
 struct hash *params = NULL;
 struct cgiVar* paramList = NULL;
 char *paramStr = strchr(npu.file, '?');
 cgiParseInput(paramStr, &params, &paramList);
 struct cgiVar *var = hashFindVal(params, "file");
 char *figId = NULL;
 if (var!=NULL && var->val!=NULL)
     figId = var->val;
 
 char yifPageUrl[4096];
 if (figId) 
     {
     safef(yifPageUrl, sizeof(yifPageUrl), "http://krauthammerlab.med.yale.edu/imagefinder/Figure.external?sp=S%s%%2F%s", extId, figId);
     }
 else
     return;
 
 web2StartSection("section", 
     "<A href=\"%s\">Yale Image Finder</a>: figure where sequences were found", 
     yifPageUrl);
 
 web2ImgLink(yifPageUrl, clickedFileUrl, "Image from YIF", 600, 10, 10); 
 
 web2EndSection();
 }
 
 static bool printSeqSection(char *articleId, char *title, bool showDesc, struct sqlConnection *conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta, char *pslTable, char *articleTable)
 /* print a section with a table of sequences, show only sequences with IDs in hash,
  * There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs 
  *   - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs
  *   - other seqs (isClickedSection=False) -> show all other seqs
  * 
  * */
 {
 // get data from mysql
 // I support two different schemas: new and old. On old tables, there is no fileUrl yet on the annotations
 // that means that oldQuery just uses an empty string for the fileUrl field.
 char *oldQuery = "SELECT fileDesc, snippet, locations, annotId, sequence, \"\" FROM %s WHERE articleId='%s'";
 char *newQuery = "SELECT fileDesc, snippet, locations, annotId, sequence, fileUrl FROM %s WHERE articleId='%s'";
 
 char *queryTemplate = oldQuery;
 if (hHasField("hgFixed", pubsSequenceTable, "fileUrl"))
     queryTemplate = newQuery;
 
 char query[4096];
 sqlSafef(query, sizeof(query), queryTemplate, pubsSequenceTable, articleId);
 printDebug(query);
 struct sqlResult *sr = sqlGetResult(conn, query);
 
 // construct title for section
 char *otherFormat = NULL;
 if (fasta)
     otherFormat = "table";
 else
     otherFormat = "fasta";
 
 char fullTitle[5000];
 safef(fullTitle, sizeof(fullTitle), 
 "%s&nbsp;<A HREF=\"../cgi-bin/hgc?%s&o=%s&t=%s&g=%s&i=%s&fasta=%d\"><SMALL>(%s format)</SMALL></A>\n", 
 title, cartSidUrlString(cart), cgiOptionalString("o"), cgiOptionalString("t"), cgiString("g"), cgiString("i"), 
 !fasta, otherFormat);
 
 web2StartSection("pubsSection", "%s", fullTitle);
 
 // print filtering link at start of table & table headers
 if (isClickedSection) {
     printFilterLink(pslTable, articleId, articleTable);
     }
 
 if (!fasta) 
     printSeqHeaders(showDesc, isClickedSection);
 
 // output rows
 char **row;
 
 // the URL of the file from the clicked sequences, for YIF
 char *clickedFileUrl = NULL; 
 
 bool foundSkippedRows = FALSE;
 int rowId = 0;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     rowId++;
 
     char *fileDesc = row[0];
     char *snippet  = row[1];
     char *locString= row[2];
     //char *artId    = row[3];
     //char *fileId   = row[4];
     //char *seqId    = row[5];
     char *annotId = row[3];
     char *seq      = row[4];
     char *fileUrl  = row[5];
 
     // annotation (=sequence) ID is a 64 bit int with 10 digits for 
     // article, 3 digits for file, 5 for annotation
     //char annotId[100];
     
     // some debugging help
     //safef(annotId, 100, "%10d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId));
     //if (pubsDebug)
         //printf("artId %s, file %s, annot %s -> annotId %s<br>", artId, fileId, seqId, annotId);
 
     // only display this sequence if we're in the right section
     if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) {
         foundSkippedRows = TRUE;
         continue;
     }
     // if we're in the clicked section and the current sequence is one that matched here
     // then keep the current URL, as we might need it afterwards
     else
         clickedFileUrl = cloneString(fileUrl);
 
     // suppress non-matches if the sequences come from YIF as figures can 
     // contain tons of non-matching sequences
     if (stringIn("yif", articleSource) && isEmpty(locString)) {
         foundSkippedRows = TRUE;
         continue;
     }
 
     if (fasta)
         {
         if (strlen(extId)!=0)
             printf("<tt><pre>&gt;%s-%d\n", extId, rowId);
         else
             printf("<tt><pre>&gt;seq%d\n", rowId);
         writeSeqWithBreaks(stdout, seq, strlen(seq), 80);
         printf("</pre></tt>\n");
         }
     else
         {
         web2StartRow();
 
         // column 1: type of file (main or supp)
         if (showDesc) 
             {
             char linkStr[4096];
             if (isEmpty(fileDesc))
                 fileDesc = "main text";
             safef(linkStr, sizeof(linkStr), "<a href=\"%s\">%s</a>", fileUrl, fileDesc);
             web2PrintCellS("word-break:break-all", linkStr);
             }
         
         // column 2: snippet
         web2StartCellS("word-break:break-all");
         if (stringIn("yif", articleSource))
             removeFlank(snippet);
         printAddWbr(snippet, 40);
         web2EndCell();
 
         // optional debug info column
         if (pubsDebug) 
             //web2PrintCellF("article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId);
             web2PrintCellF("annotId %s", annotId);
 
         // column 3: print links to locations, only print this in the 2nd section
         if (!isClickedSection && !pubsDebug) 
             {
             // format: hg19/chr1:300-400,mm9/chr1:60006-23234
             // split on "," then split on "/"
             //locs = charSepToSlNames(locString, ',');
 
             web2StartCell();
             char *locArr[1024];
             int partCount = chopString(locString, ",", locArr, ArraySize(locArr));
             if (partCount==0)
                 printf("No matches");
             else
                 {
                 struct slName *locs;
                 locs = slNameListFromStringArray(locArr, partCount);
                 slUniqify(&locs, slNameCmp, slNameFree);
                 printGbLinks(locs);
                 printf("<br>");
                 slFreeList(&locs);
                 }
             web2EndCell();
             }
         web2EndRow();
         }
     }
 
 if (!fasta)
     web2EndTable();
 
 web2EndSection();
 /* Yale Image finder files contain links to the image itself */
 if (pubsDebug)
     printf("%s %s %d", articleSource, clickedFileUrl, isClickedSection);
 if (stringIn("yif", articleSource) && (clickedFileUrl!=NULL) && isClickedSection) 
     printYifSection(clickedFileUrl);
 
 freeMem(clickedFileUrl);
 
 sqlFreeResult(&sr);
 return foundSkippedRows;
 }
 
 static void printSeqInfo(struct sqlConnection *conn, char *trackTable,
     char *pslTable, char *articleId, char *item, char *seqName, int start, 
     bool fileDesc, bool fasta, char *articleTable)
     /* print sequences, split into two sections 
      * two sections: one for sequences that were clicked, one for all others*/
 {
 struct hash *clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start);
 
 bool skippedRows;
 if (clickedSeqs) 
     skippedRows = printSeqSection(articleId, "Sequences matching here", \
         fileDesc, conn, clickedSeqs, 1, fasta, pslTable, articleTable);
 else 
     skippedRows=1;
 
 if (skippedRows) 
     {
     // the section title should change if the data comes from the yale image finder = a figure
     char *docType = "article";
     if (stringIn("yif", articleSource))
         docType = "figure";
     char title[1024];
     if (clickedSeqs)
         safef(title, sizeof(title), "Other Sequences in this %s", docType);
     // NO clicked seqs can happen if the hgc was called with no o or t parameters
     // from somewhere outside the browser, like elsevier or europmc
     else
         safef(title, sizeof(title), "Sequences in this %s", docType);
 
     printSeqSection(articleId, title, \
         fileDesc, conn, clickedSeqs, 0, fasta, pslTable, articleTable);
     }
 freeHash(&clickedSeqs);
 }
 
 static void printTrackVersion(struct trackDb *tdb, struct sqlConnection *conn, char *item) 
 {
 char versionString[256];
 char dateReference[256];
 char headerTitle[512];
 /* see if hgFixed.trackVersion exists */
 boolean trackVersionExists = hTableExists("hgFixed", "trackVersion");
 
 if (trackVersionExists)
     {
     char query[256];
     sqlSafef(query, sizeof(query), \
     "SELECT version,dateReference FROM hgFixed.trackVersion "
     "WHERE db = '%s' AND name = 'pubs' ORDER BY updateTime DESC limit 1", database);
     struct sqlResult *sr = sqlGetResult(conn, query);
     char **row;
 
     /* in case of NULL result from the table */
     versionString[0] = 0;
     while ((row = sqlNextRow(sr)) != NULL)
         {
         safef(versionString, sizeof(versionString), "version %s",
             row[0]);
         safef(dateReference, sizeof(dateReference), "%s",
             row[1]);
         }
     sqlFreeResult(&sr);
     }
 else
     {
     versionString[0] = 0;
     dateReference[0] = 0;
     }
 
 if (versionString[0])
     safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString);
 else
     safef(headerTitle, sizeof(headerTitle), "%s", item);
 
 genericHeader(tdb, headerTitle);
 }
 
 static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id)
 /* copied from otherOrgs.c */
 {
 char query[512];
 struct sqlResult *sr;
 char **row;
 bioSeq *seq = NULL;
 sqlSafef(query, sizeof(query), 
     "select sequence from %s where annotId = '%s'", table, id);
 sr = sqlGetResult(conn, query);
 if ((row = sqlNextRow(sr)) != NULL)
     {
     AllocVar(seq);
     seq->name = cloneString(id);
     seq->dna = cloneString(row[0]);
     seq->size = strlen(seq->dna);
     }
 sqlFreeResult(&sr);
 
 return seq;
 }
 
 void pubsAli(struct sqlConnection *conn, char *pslTable, char *seqTable, char *item)
 /* this is just a ripoff from htcCdnaAli, similar to markd's transMapAli */
 {
 bioSeq *oSeq = NULL;
 writeFramesetType();
 puts("<HTML>");
 printf("<HEAD>\n<TITLE>Literature Sequence vs Genomic</TITLE>\n</HEAD>\n\n");
 
 struct psl *psl = getAlignments(conn, pslTable, item);
 if (psl == NULL)
     errAbort("Couldn't find alignment at %s:%s", pslTable, item);
 
 oSeq = getSeq(conn, seqTable, item);
 
 if (oSeq == NULL)  
     errAbort("%s is in pslTable but not in sequence table. Internal error.", item);
 
 enum gfType qt;
 if (psl->qSize!=oSeq->size) 
     {
     qt = gftProt;
     // trying to correct pslMap's changes to qSize/qStarts and blockSizes
     psl->strand[1]=psl->strand[0];
     psl->strand[0]='+';
     psl->strand[2]=0;
     psl->qSize = psl->qSize/3;
     psl->match = psl->match/3;
     // Take care of codons that go over block boundaries:
     // Convert a block with blockSizes=58,32 and qStarts=0,58,
     // to blockSizes=19,11 and qStarts=0,19
     int i;
     int remaind = 0;
     for (i=0; i<psl->blockCount; i++)
         {
         psl->qStarts[i] = psl->qStarts[i]/3;
 
         int bs = psl->blockSizes[i];
         remaind += (bs % 3);
         if (remaind>=3)
         {
             bs += 1;
             remaind -= 3;
         }
         psl->blockSizes[i] = bs/3; 
         }
 
     }
 else
     qt = gftDna;
 
 showSomeAlignment(psl, oSeq, qt, 0, oSeq->size, NULL, 0, 0);
 }
 
 void doPubsDetails(struct trackDb *tdb, char *item)
 /* publications custom display */
 {
 
 int start        = cgiOptionalInt("o", -1);
 int end          = cgiOptionalInt("t", -1);
 char *trackTable = cgiString("g");
 char *aliTable   = cgiOptionalString("aliTable");
 int fasta        = cgiOptionalInt("fasta", 0);
 pubsDebug        = cgiOptionalInt("debug", 0);
 
 struct sqlConnection *conn = hAllocConn(database);
 
 char *articleTable = trackDbRequiredSetting(tdb, "pubsArticleTable");
 
 if (stringIn("Psl", trackTable))
     { 
     if (aliTable!=NULL)
         {
         pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable");
         pubsAli(conn, trackTable, pubsSequenceTable, item);
         return;
         }
 
     else
         {
         genericHeader(tdb, item);
         struct psl *psl = getAlignments(conn, trackTable, item);
         printf("<H3>Genomic Alignment with sequence found in publication fulltext</H3>");
         printAlignmentsSimple(psl, start, trackTable, trackTable, item);
         }
     }
 else
     {
     printTrackVersion(tdb, conn, item);
     if (stringIn("Marker", trackTable))
         {
         char *markerTable = trackDbRequiredSetting(tdb, "pubsMarkerTable");
         if (start!=-1)
             printPositionAndSize(start, end, 0);
         printMarkerSnippets(conn, articleTable, markerTable, item);
         }
     else
         {
         pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable");
         char *articleId = printArticleInfo(conn, item, articleTable);
         if (articleId!=NULL) 
             {
             char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack");
             printSeqInfo(conn, trackTable, pslTable, articleId, item, \
                 seqName, start, pubsHasSupp, fasta, articleTable);
             }
     }
 }
 
 printTrackHtml(tdb);
 hFreeConn(&conn);
 }