c20579fc7c537c1736650125c5d264357d2cfa78
angie
Mon Sep 18 13:15:16 2017 -0700
Big search & replace: use https instead of http for NCBI URLs. refs #17793
diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c
index e9f576e..0371475 100644
--- src/hg/hgc/pubs.c
+++ src/hg/hgc/pubs.c
@@ -1,1217 +1,1217 @@
/* pubs.c - display details of publiations literature track (pubsxxx tables) */
/* Copyright (C) 2014 The Regents of the University of California
* See README in this or parent directory for licensing information. */
#include "common.h"
#include "jksql.h"
#include "hdb.h"
#include "hgc.h"
#include "hgColors.h"
#include "trackDb.h"
#include "web.h"
#include "hash.h"
#include "net.h"
#include "obscure.h"
#include "common.h"
#include "string.h"
#include "dystring.h"
#include "dnautil.h"
//#include "ctype.h"
// cgi var to activate debug output
static int pubsDebug = 0;
// global var for printArticleInfo to indicate if article has suppl info
// Most publishers have supp data.
// If they don't have it, we can skip the fileType column in the table
bool pubsHasSupp = TRUE;
// global var for printArticleInfo to indicate if article is elsevier
// If it's elsevier, we print the copyright line
bool pubsIsElsevier = FALSE;
// the article source is used to modify other parts of the page
static char *articleSource;
// we need the external article PMC Id for YIF links
static char *extId = NULL;
// section types in mysql table, for all annotations tables
// we note where the hit is located in the document
static char *pubsSecNames[] ={
"header", "abstract",
"intro", "methods",
"results", "discussion",
"conclusions", "ack",
"refs", "supplement", "unknown" };
// labels to show to user, have to correspond to pubsSecNames
static char *secLabels[] ={
"Title", "Abstract",
"Introduction", "Methods",
"Results", "Discussion",
"Conclusions", "Acknowledgements",
"References", "Supplement", "Undetermined section (e.g. for a brief communication)" };
// whether a checkbox is checked by default, have to correspond to pubsSecNames
static int pubsSecChecked[] ={
1, 1,
1, 1,
1, 1,
1, 1,
1, 1, 1 };
static char *pubsSequenceTable;
/* ------ functions to replace HTML4 tables with HTML5 constructs */
/* Web wrappers incorporating tag, id, and class HTML attributes, to support
* styling and test */
/* Suffix -S for "function accepts style parameter"
* Suffix -C for "function accepts class parameter"
* Suffix -CI for "function accepts class and id parameter"
*
* Some functions are commented out because they are not yet used.
*/
static void web2Start(char *tag)
{
printf("<%s>\n", tag);
}
static void web2End(char *tag)
{
printf("%s>\n", tag);
}
static void web2StartS(char *style, char *tag)
{
printf("<%s style=\"%s\">\n", tag, style);
}
static void web2StartC(char *class, char *tag)
{
printf("<%s class=\"%s\">\n", tag, class);
}
static void web2StartCI(char *class, char *id, char *tag)
{
if ((id==NULL) && (class==NULL))
web2Start(tag);
else if (id==NULL)
web2StartC(class, tag);
else
printf("<%s class=\"%s\" id=\"%s\">\n", tag, class, id);
}
static void web2PrintS(char *style, char *tag, char *label)
{
printf("<%s style=\"%s\">%s%s>\n", tag, style, label, tag);
}
//static void web2PrintC(char *class, char *tag, char *label)
//{
//printf("<%s class=\"%s\">%s%s>\n", tag, class, label, tag);
//}
//static void web2Print(char *tag, char *label)
//{
//printf("<%s>%s%s>\n", tag, label, tag);
//}
static void web2StartTableC(char *class) { web2StartC(class, "table"); }
static void web2StartTheadC(char *class) { web2StartC(class, "thead"); }
static void web2EndThead() { web2End("thead"); }
static void web2StartTbodyS(char *style) { web2StartS(style, "tbody"); }
static void web2StartCell() { web2Start("td"); }
static void web2EndCell() { web2End("td"); }
static void web2StartCellS(char *style) { web2StartS(style, "td"); }
//static void web2PrintCell(char *label) { web2Print("td", label); }
static void web2PrintCellS(char *style, char *label) { web2PrintS(style, "td", label); }
static void web2StartRow() { web2Start("tr"); }
static void web2EndRow() { web2End("tr"); }
//static void web2StartTbody() { web2Start("tbody"); }
static void web2EndTbody() { web2End("tbody"); }
//static void web2StartTable() { web2Start("table"); }
static void web2EndTable() { web2EndTbody(); web2End("table"); }
static void web2StartDivCI (char *class, char *id) { web2StartCI(class, id, "div"); }
static void web2StartDivC (char *class) { web2StartC(class, "div"); }
static void web2EndDiv(char *comment)
{
printf(" \n", comment);
}
//static void web2Img(char *url, char *alt, int width, int hspace, int vspace)
//{
//printf("\n", url, alt, width, hspace, vspace);
//}
static void web2ImgLink(char *url, char *imgUrl, char *alt, int width, int hspace, int vspace)
{
printf("
\n", url, imgUrl, alt, width, hspace, vspace);
}
static void web2PrintHeaderCell(char *label, int width)
/* Print th heading cell with given width in percent */
{
printf("
", hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack, dispId); printf("Show these sequence matches individually on genome browser (activates track \"" "Individual matches for article\")
"); } printPositionAndSize(start, end, 1); printf( "\n");
}
static void printLimitWarning(struct sqlConnection *conn, char *markerTable,
char *item, int itemLimit, char *sectionList)
{
char query[4000];
// no need to check for illegal characters in sectionList
sqlSafef(query, sizeof(query), "SELECT COUNT(*) from %s WHERE markerId='%s' AND section in (%-s) ", markerTable, item, sectionList);
if (sqlNeedQuickNum(conn, query) > itemLimit)
{
printf("This marker is mentioned more than %d times
\n", itemLimit);
printf("The results would take too long to load in your browser and are "
"therefore limited to the %d most recent articles.
\n", itemLimit);
}
}
/* keep only uppercase letters in string*/
void eraseAllButUpper(char *s)
{
char *in, *out;
char c;
in = out = s;
for (;;)
{
c = *in++;
if (c == 0)
break;
if (isupper(c))
*out++ = c;
}
*out = 0;
}
static char* printShortArticleInfo(char **row) {
/* print a two-line description of article */
char *articleId = row[0];
char *url = row[1];
char *title = row[2];
char *authors = row[3];
char *citation = row[4];
char *year = row[5];
char *pmid = row[6];
url = mangleUrl(url);
printf("%s
", url, title);
// cut author string at 40 chars, like scholar
printf("");
if (strlen(authors)>40)
{
authors[60] = 0;
printf("%s... ", authors);
}
else if (!isEmpty(authors))
printf("%s ", authors);
// first word of citation is journal name
char *words[10];
int wordCount = chopCommas(citation, words);
char *journal = NULL;
if (wordCount!=0)
journal = words[0];
// optional: print the little gray line with author, journal, year info
bool didPrint = FALSE;
printf("");
if (year!=NULL && differentWord(year, "0"))
{
printf("%s", year);
didPrint = TRUE;
}
if (!isEmpty(journal))
{
printf(" - %s ", journal);
didPrint = TRUE;
}
if (!isEmpty(pmid) && strcmp(pmid, "0")!=0 )
{
- printf(", PMID%s\n", pmid, pmid);
+ printf(", PMID%s\n", pmid, pmid);
didPrint = TRUE;
}
printf("\n");
if (didPrint)
printf("
\n");
if (pubsDebug)
printf("articleId=%s", articleId);
return articleId;
}
static void printSnippets(struct sqlResult *srSnip)
{
char **snipRow;
struct hash *doneSnips = newHash(0); // avoid printing a sentence twice
int snipCount = 0;
struct slPair *secSnips = NULL;
// add all pairs to the list, remove duplicated snippets (ignore all lowercase chars)
while ((snipRow = sqlNextRow(srSnip)) != NULL)
{
char *section = cloneString(snipRow[0]);
char *snippet = cloneString(snipRow[1]);
char *snipHash = cloneString(snippet);
eraseAllButUpper(snipHash);
if (hashLookup(doneSnips, snipHash)!=NULL)
{
//printf("already seen");
continue;
}
slPairAdd(&secSnips, section, snippet);
hashAdd(doneSnips, snipHash, 0);
snipCount++;
}
hashFree(&doneSnips);
// now iterate over list and print
struct slPair *pair;
printf("
"); } static void printMarkerSnippets(struct sqlConnection *conn, char *articleTable, char *markerTable, char *item) /* print page with article info and snippets from articles */ { /* do not show more snippets than this limit */ int itemLimit=100; char *artExtIdFilter = cgiOptionalString("pubsFilterExtId"); /* This will have to be activated with the move to new Elsevier identifiers, ~Oct 2013 */ //if (startsWith(artExtIdFilter)) //replaceInStr(artExtIdFilter, "ELS", "PII") char *sectionList = makeSqlMarkerList(); if (artExtIdFilter==NULL) { printSectionCheckboxes(); printLimitWarning(conn, markerTable, item, itemLimit, sectionList); printf("
%s
\n", authors); // // logo of publisher char *logoUrl = urlToLogoUrl(pubsArticleTable, articleId, url); if (logoUrl) printf("%s", cit); if (strlen(pmid)!=0 && strcmp(pmid, "0")) - printf(", PMID%s\n", pmid, pmid); + printf(", PMID%s\n", pmid, pmid); printf("
\n"); printf("%s
\n", abstract); if (pubsIsElsevier) printf("Copyright 2012 Elsevier B.V. All rights reserved.
"); sqlFreeResult(&sr); return articleId; } static struct hash *getSeqIdHash(struct sqlConnection *conn, char *trackTable, \ char *articleId, char *item, char *seqName, int start) /* return a hash with the sequence IDs for a given chain of BLAT matches */ { if (start==-1) return NULL; char query[512]; /* check first if the column exists (some debugging tables on hgwdev don't have seqIds) */ sqlSafef(query, sizeof(query), "SHOW COLUMNS FROM %s LIKE 'seqIds';", trackTable); char *seqIdPresent = sqlQuickString(conn, query); if (!seqIdPresent) { return NULL; } /* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash*/ sqlSafef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' " "and chrom='%s' and chromStart=%d;", trackTable, item, seqName, start); printDebug(query); // split comma-sep list into parts char *seqIdCoordString = sqlQuickString(conn, query); char *seqIdCoords[1024]; if (isEmpty(seqIdCoordString)) return NULL; int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords)); int i; struct hash *seqIdHash = NULL; seqIdHash = newHash(0); for (i=0; i>%s-%d\n", extId, rowId); else printf(">seq%d\n", rowId); writeSeqWithBreaks(stdout, seq, strlen(seq), 80); printf("\n"); } else { web2StartRow(); // column 1: type of file (main or supp) if (showDesc) { char linkStr[4096]; if (isEmpty(fileDesc)) fileDesc = "main text"; safef(linkStr, sizeof(linkStr), "%s", fileUrl, fileDesc); web2PrintCellS("word-break:break-all", linkStr); } // column 2: snippet web2StartCellS("word-break:break-all"); if (stringIn("yif", articleSource)) removeFlank(snippet); printAddWbr(snippet, 40); web2EndCell(); // optional debug info column if (pubsDebug) //web2PrintCellF("article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId); web2PrintCellF("annotId %s", annotId); // column 3: print links to locations, only print this in the 2nd section if (!isClickedSection && !pubsDebug) { // format: hg19/chr1:300-400,mm9/chr1:60006-23234 // split on "," then split on "/" //locs = charSepToSlNames(locString, ','); web2StartCell(); char *locArr[1024]; int partCount = chopString(locString, ",", locArr, ArraySize(locArr)); if (partCount==0) printf("No matches"); else { struct slName *locs; locs = slNameListFromStringArray(locArr, partCount); slUniqify(&locs, slNameCmp, slNameFree); printGbLinks(locs); printf("
"); slFreeList(&locs); } web2EndCell(); } web2EndRow(); } } if (!fasta) web2EndTable(); web2EndSection(); /* Yale Image finder files contain links to the image itself */ if (pubsDebug) printf("%s %s %d", articleSource, clickedFileUrl, isClickedSection); if (stringIn("yif", articleSource) && (clickedFileUrl!=NULL) && isClickedSection) printYifSection(clickedFileUrl); freeMem(clickedFileUrl); sqlFreeResult(&sr); return foundSkippedRows; } static void printSeqInfo(struct sqlConnection *conn, char *trackTable, char *pslTable, char *articleId, char *item, char *seqName, int start, bool fileDesc, bool fasta, char *articleTable) /* print sequences, split into two sections * two sections: one for sequences that were clicked, one for all others*/ { struct hash *clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start); bool skippedRows; if (clickedSeqs) skippedRows = printSeqSection(articleId, "Sequences matching here", \ fileDesc, conn, clickedSeqs, 1, fasta, pslTable, articleTable); else skippedRows=1; if (skippedRows) { // the section title should change if the data comes from the yale image finder = a figure char *docType = "article"; if (stringIn("yif", articleSource)) docType = "figure"; char title[1024]; if (clickedSeqs) safef(title, sizeof(title), "Other Sequences in this %s", docType); // NO clicked seqs can happen if the hgc was called with no o or t parameters // from somewhere outside the browser, like elsevier or europmc else safef(title, sizeof(title), "Sequences in this %s", docType); printSeqSection(articleId, title, \ fileDesc, conn, clickedSeqs, 0, fasta, pslTable, articleTable); } freeHash(&clickedSeqs); } static void printTrackVersion(struct trackDb *tdb, struct sqlConnection *conn, char *item) { char versionString[256]; char dateReference[256]; char headerTitle[512]; /* see if hgFixed.trackVersion exists */ boolean trackVersionExists = hTableExists("hgFixed", "trackVersion"); if (trackVersionExists) { char query[256]; sqlSafef(query, sizeof(query), \ "SELECT version,dateReference FROM hgFixed.trackVersion " "WHERE db = '%s' AND name = 'pubs' ORDER BY updateTime DESC limit 1", database); struct sqlResult *sr = sqlGetResult(conn, query); char **row; /* in case of NULL result from the table */ versionString[0] = 0; while ((row = sqlNextRow(sr)) != NULL) { safef(versionString, sizeof(versionString), "version %s", row[0]); safef(dateReference, sizeof(dateReference), "%s", row[1]); } sqlFreeResult(&sr); } else { versionString[0] = 0; dateReference[0] = 0; } if (versionString[0]) safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString); else safef(headerTitle, sizeof(headerTitle), "%s", item); genericHeader(tdb, headerTitle); } static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id) /* copied from otherOrgs.c */ { char query[512]; struct sqlResult *sr; char **row; bioSeq *seq = NULL; sqlSafef(query, sizeof(query), "select sequence from %s where annotId = '%s'", table, id); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { AllocVar(seq); seq->name = cloneString(id); seq->dna = cloneString(row[0]); seq->size = strlen(seq->dna); } sqlFreeResult(&sr); return seq; } void pubsAli(struct sqlConnection *conn, char *pslTable, char *seqTable, char *item) /* this is just a ripoff from htcCdnaAli, similar to markd's transMapAli */ { bioSeq *oSeq = NULL; writeFramesetType(); puts(""); printf("\nLiterature Sequence vs Genomic \n\n\n"); struct psl *psl = getAlignments(conn, pslTable, item); if (psl == NULL) errAbort("Couldn't find alignment at %s:%s", pslTable, item); oSeq = getSeq(conn, seqTable, item); if (oSeq == NULL) errAbort("%s is in pslTable but not in sequence table. Internal error.", item); enum gfType qt; if (psl->qSize!=oSeq->size) { qt = gftProt; // trying to correct pslMap's changes to qSize/qStarts and blockSizes psl->strand[1]=psl->strand[0]; psl->strand[0]='+'; psl->strand[2]=0; psl->qSize = psl->qSize/3; psl->match = psl->match/3; // Take care of codons that go over block boundaries: // Convert a block with blockSizes=58,32 and qStarts=0,58, // to blockSizes=19,11 and qStarts=0,19 int i; int remaind = 0; for (i=0; iblockCount; i++) { psl->qStarts[i] = psl->qStarts[i]/3; int bs = psl->blockSizes[i]; remaind += (bs % 3); if (remaind>=3) { bs += 1; remaind -= 3; } psl->blockSizes[i] = bs/3; } } else qt = gftDna; showSomeAlignment(psl, oSeq, qt, 0, oSeq->size, NULL, 0, 0); } void doPubsDetails(struct trackDb *tdb, char *item) /* publications custom display */ { int start = cgiOptionalInt("o", -1); int end = cgiOptionalInt("t", -1); char *trackTable = cgiString("g"); char *aliTable = cgiOptionalString("aliTable"); int fasta = cgiOptionalInt("fasta", 0); pubsDebug = cgiOptionalInt("debug", 0); struct sqlConnection *conn = hAllocConn(database); char *articleTable = trackDbRequiredSetting(tdb, "pubsArticleTable"); if (stringIn("Psl", trackTable)) { if (aliTable!=NULL) { pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable"); pubsAli(conn, trackTable, pubsSequenceTable, item); return; } else { genericHeader(tdb, item); struct psl *psl = getAlignments(conn, trackTable, item); printf(" Genomic Alignment with sequence found in publication fulltext
"); printAlignmentsSimple(psl, start, trackTable, trackTable, item); } } else { printTrackVersion(tdb, conn, item); if (stringIn("Marker", trackTable)) { char *markerTable = trackDbRequiredSetting(tdb, "pubsMarkerTable"); if (start!=-1) printPositionAndSize(start, end, 0); printMarkerSnippets(conn, articleTable, markerTable, item); } else { pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable"); char *articleId = printArticleInfo(conn, item, articleTable); if (articleId!=NULL) { char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack"); printSeqInfo(conn, trackTable, pslTable, articleId, item, \ seqName, start, pubsHasSupp, fasta, articleTable); } } } printTrackHtml(tdb); hFreeConn(&conn); }