ace0774d8f9da573531b33371551fd9af0a763a2
max
Mon Apr 16 17:50:21 2012 -0700
html modif suggested by greg
diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c
index 43329f8..b2a0cf1 100644
--- src/hg/hgc/pubs.c
+++ src/hg/hgc/pubs.c
@@ -1,719 +1,719 @@
/* pubs.c - display details of publiations literature track (pubsxxx tables) */
#include "common.h"
#include "jksql.h"
#include "hdb.h"
#include "hgc.h"
#include "hgColors.h"
#include "trackDb.h"
#include "web.h"
#include "hash.h"
#include "obscure.h"
#include "common.h"
#include "string.h"
//include "hgTrackUi.h"
// cgi var to activate debug output
static int pubsDebug = 0;
// global var for printArticleInfo to indicate if article has suppl info
// Most publishers have supp data
bool pubsHasSupp = TRUE;
// global var for printArticleInfo to indicate if article is elsevier
bool pubsIsElsevier = FALSE;
// internal section types in mysql table
static char* pubsSecNames[] ={
"header", "abstract",
"intro", "methods",
"results", "discussion",
"conclusions", "ack",
"refs", "unknown" };
//
// whether a checkbox is checked by default, have to correspond to pubsSecNames
static int pubsSecChecked[] ={
1, 1,
1, 1,
1, 1,
1, 0,
0, 1 };
static char* pubsSequenceTable;
static char* mangleUrl(char* url)
/* add publisher specific parameters to url and return new url*/
{
if (!stringIn("sciencedirect.com", url))
return url;
// cgi param to add the "UCSC matches" sciverse application to elsevier's sciencedirect
char* sdAddParam = "?svAppaddApp=298535";
char* longUrl = catTwoStrings(url, sdAddParam);
char* newUrl = replaceChars(longUrl, "article", "svapps");
return newUrl;
}
static void printFilterLink(char* pslTrack, char* articleId, char* articleTable)
/* print a link to hgTracks with an additional cgi param to activate the single article filter */
{
int start = cgiInt("o");
int end = cgiInt("t");
char qBuf[1024];
struct sqlConnection *conn = hAllocConn(database);
safef(qBuf, sizeof(qBuf), "SELECT CONCAT(firstAuthor, year) FROM %s WHERE articleId='%s';", articleTable, articleId);
char* dispId = sqlQuickString(conn, qBuf);
printf("
",
hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack, dispId);
printf("Show these sequence matches individually on genome browser (activates track \""
"Individual matches for article\")
");
hFreeConn(&conn);
}
static char* makeSqlMarkerList(void)
/* return list of sections from cgi vars, format like "'abstract','header'" */
{
int secCount = sizeof(pubsSecNames)/sizeof(char *);
struct slName* names = NULL;
int i;
for (i=0; i\n");
printf("Sections of article shown:
\n");
printf("\n");
}
static void printLimitWarning(struct sqlConnection *conn, char* markerTable,
char* item, int itemLimit, char* sectionList)
{
char query[4000];
safef(query, sizeof(query), "SELECT COUNT(*) from %s WHERE markerId='%s' AND section in (%s) ", markerTable, item, sectionList);
if (sqlNeedQuickNum(conn, query) > itemLimit)
{
printf("This marker is mentioned more than %d times
\n", itemLimit);
printf("The results would take too long to load in your browser and are "
"therefore limited to %d articles.
\n", itemLimit);
}
}
static void printMarkerSnippets(struct sqlConnection *conn, char* articleTable, char* markerTable, char* item)
{
/* do not show more snippets than this limit */
int itemLimit=1000;
printSectionCheckboxes();
char* sectionList = makeSqlMarkerList();
printLimitWarning(conn, markerTable, item, itemLimit, sectionList);
printf("
Snippets from Publications:
");
struct sqlResult* sr = queryMarkerRows(conn, markerTable, articleTable, item, itemLimit, sectionList);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
{
char* articleId = row[0];
char* url = row[1];
char* title = row[2];
char* authors = row[3];
char* citation = row[4];
char* pmid = row[5];
char* snippets = row[6];
url = mangleUrl(url);
printf("%s ", url, title);
printf("%s; ", authors);
printf("%s ", citation);
if (!isEmpty(pmid) && strcmp(pmid, "0")!=0 )
printf(", PMID%s\n", pmid, pmid);
printf("
\n");
if (pubsDebug)
printf("articleId=%s", articleId);
printf("%s", snippets);
printf("
");
}
freeMem(sectionList);
sqlFreeResult(&sr);
}
static char* printArticleInfo(struct sqlConnection *conn, char* item, char* pubsArticleTable)
/* Header with information about paper, return documentId */
{
char query[512];
safef(query, sizeof(query), "SELECT articleId, url, title, authors, citation, abstract, pmid FROM %s WHERE articleId='%s'", pubsArticleTable, item);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
char *articleId=NULL;
if ((row = sqlNextRow(sr)) == NULL)
{
printf("Could not resolve articleId %s, this is an internal error.\n", item);
printf("Please send an email to max@soe.ucsc.edu\n");
sqlFreeResult(&sr);
return NULL;
}
articleId = cloneString(row[0]);
char* url = row[1];
char* title = row[2];
char* authors = row[3];
char* cit = row[4];
char* abstract = row[5];
char* pmid = row[6];
url = mangleUrl(url);
if (strlen(abstract)==0)
abstract = "(No abstract available for this article. "
"Please follow the link to the fulltext above.)";
if (stringIn("sciencedirect.com", url))
{
pubsHasSupp = FALSE;
pubsIsElsevier = TRUE;
}
printf("%s
\n", authors);
printf("%s\n", url, title);
printf("%s", cit);
if (strlen(pmid)!=0 && strcmp(pmid, "0"))
printf(", PMID%s\n", pmid, pmid);
printf("
\n");
printf("%s
\n", abstract);
sqlFreeResult(&sr);
return articleId;
}
static struct hash* getSeqIdHash(struct sqlConnection* conn, char* trackTable, \
char* articleId, char *item, char* seqName, int start)
/* return a hash with the sequence IDs for a given chain of BLAT matches */
{
char query[512];
/* check first if the column exists (some debugging tables on hgwdev don't have seqIds) */
safef(query, sizeof(query), "SHOW COLUMNS FROM %s LIKE 'seqIds';", trackTable);
char* seqIdPresent = sqlQuickString(conn, query);
if (!seqIdPresent) {
return NULL;
}
/* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash*/
safef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' "
"and chrom='%s' and chromStart=%d;", trackTable, item, seqName, start);
if (pubsDebug)
printf("%s
", query);
// split comma-sep list into parts
char* seqIdCoordString = sqlQuickString(conn, query);
char* seqIdCoords[1024];
int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords));
int i;
struct hash *seqIdHash = NULL;
seqIdHash = newHash(0);
for (i=0; i\n");
printf("\n");
if (showDesc)
puts(" Article file | \n");
puts(" One row per sequence, with flanking text, sequence in bold | \n");
if (pubsDebug)
puts(" Identifiers | \n");
if (!isClickedSection && !pubsDebug)
puts(" Chained matches with this sequence | \n");
puts("\n");
}
static void printAddWbr(char* text, int distance)
/* a crazy hack for firefox/mozilla that is unable to break long words in tables
* We need to add a tag every x characters in the text to make text breakable.
*/
{
int i;
i = 0;
char* c;
c = text;
bool doNotBreak = FALSE;
while (*c != 0)
{
if ((*c=='&') || (*c=='<'))
doNotBreak = TRUE;
if (*c==';' || (*c =='>'))
doNotBreak = FALSE;
printf("%c", *c);
if (i % distance == 0 && ! doNotBreak)
printf("");
c++;
i++;
}
}
void printHgTracksLink(char* db, char* chrom, int start, int end, char* linkText, char* optUrlStr)
/* print link to hgTracks for db at pos */
{
char buf[1024];
if (linkText==NULL)
{
char startBuf[64], endBuf[64];
sprintLongWithCommas(startBuf, start + 1);
sprintLongWithCommas(endBuf, end);
safef(buf, sizeof(buf), "%s:%s-%s (%s)", chrom, startBuf, endBuf, db);
linkText = buf;
}
if (optUrlStr==NULL)
optUrlStr = "";
printf("%s\n", hgTracksPathAndSettings(), db, chrom, start, end, optUrlStr, linkText);
}
void printGbLinks(struct slName* locs)
/* print hash keys in format hg19/chr1:1-1000 as links */
{
struct slName *el;
for (el = locs; el != NULL; el = el->next)
{
char* locString = el->name;
char* db = cloneNextWordByDelimiter(&locString, '/');
char* chrom = cloneNextWordByDelimiter(&locString, ':');
char* startStr = cloneNextWordByDelimiter(&locString, '-');
char* endStr = cloneString(locString);
int start = atoi(startStr);
int end = atoi(endStr);
printHgTracksLink(db, chrom, start, end, NULL, NULL);
printf("
");
freeMem(endStr); //XX why can't I free these?
freeMem(chrom);
freeMem(startStr);
freeMem(db);
}
}
static bool printSeqSection(char* articleId, char* title, bool showDesc, struct sqlConnection* conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta, char* pslTable, char* articleTable)
/* print a table of sequences, show only sequences with IDs in hash,
* There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs
* - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs
* - other seqs (isClickedSection=False) -> show all other seqs
*
* */
{
// get data from mysql
char query[4096];
safef(query, sizeof(query),
"SELECT fileDesc, snippet, locations, articleId, fileId, seqId, sequence "
"FROM %s WHERE articleId='%s';", pubsSequenceTable, articleId);
if (pubsDebug)
puts(query);
struct sqlResult *sr = sqlGetResult(conn, query);
// construct title for section
char* otherFormat = NULL;
if (fasta)
otherFormat = "table";
else
otherFormat = "fasta";
char fullTitle[5000];
safef(fullTitle, sizeof(fullTitle),
"%s (%s format)",
title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"),
!fasta, otherFormat);
webNewSection("%s", fullTitle);
if (isClickedSection)
{
printFilterLink(pslTable, articleId, articleTable);
printf("");
}
else
printf("");
if (!fasta)
printSeqHeaders(showDesc, isClickedSection);
char **row;
bool foundSkippedRows = FALSE;
while ((row = sqlNextRow(sr)) != NULL)
{
char* fileDesc = row[0];
char* snippet = row[1];
char* locString= row[2];
char* artId = row[3];
char* fileId = row[4];
char* seqId = row[5];
char* seq = row[6];
// annotation (=sequence) ID is a 64 bit int with 10 digits for
// article, 3 digits for file, 5 for annotation
char annotId[100];
safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId));
if (pubsDebug)
printf("%s", annotId);
// only display this sequence if we're in the right section
if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) {
foundSkippedRows = TRUE;
continue;
}
printf(" |
");
if (fasta)
printf(">%s %s
|
", annotId, seq);
else
{
- printf("\n", HG_COL_LOCAL_TABLE);
+ printf("
\n");
if (showDesc)
printf("%s\n", fileDesc);
//printf(" | %s | \n", snippet);
printf("");
printAddWbr(snippet, 40);
printf(" | \n");
if (pubsDebug)
printf("article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId);
// print links to locations
if (!isClickedSection && !pubsDebug)
{
// format: hg19/chr1:300-400,mm9/chr1:60006-23234
// split on "," then split on "/"
//locs = charSepToSlNames(locString, ',');
char* locArr[1024];
int partCount = chopString(locString, ",", locArr, ArraySize(locArr));
printf(" | ");
if (partCount==0)
printf("No matches");
else
{
struct slName *locs;
locs = slNameListFromStringArray(locArr, partCount);
slUniqify(&locs, slNameCmp, slNameFree);
printGbLinks(locs);
printf(" ");
printf(" | \n");
slFreeList(&locs);
}
}
printf("
\n");
}
}
printf("\n"); // finish section
webEndSectionTables();
sqlFreeResult(&sr);
return foundSkippedRows;
}
static void printSeqInfo(struct sqlConnection* conn, char* trackTable,
char* pslTable, char* articleId, char* item, char* seqName, int start,
bool fileDesc, bool fasta, char* articleTable)
/* print sequences, split into two sections
* two sections: one for sequences that were clicked, one for all others*/
{
struct hash* clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start);
bool skippedRows;
if (clickedSeqs)
skippedRows = printSeqSection(articleId, "Sequences used to construct this feature", \
fileDesc, conn, clickedSeqs, 1, fasta, pslTable, articleTable);
else
skippedRows=1;
if (skippedRows)
printSeqSection(articleId, "Other Sequences in this article", \
fileDesc, conn, clickedSeqs, 0, fasta, pslTable, articleTable);
if (pubsIsElsevier)
printf("Copyright 2012 Elsevier B.V. All rights reserved.
");
freeHash(&clickedSeqs);
}
static void printTrackVersion(struct trackDb *tdb, struct sqlConnection* conn, char* item)
{
char versionString[256];
char dateReference[256];
char headerTitle[512];
/* see if hgFixed.trackVersion exists */
boolean trackVersionExists = hTableExists("hgFixed", "trackVersion");
if (trackVersionExists)
{
char query[256];
safef(query, sizeof(query), \
"SELECT version,dateReference FROM hgFixed.trackVersion "
"WHERE db = '%s' AND name = 'pubs' ORDER BY updateTime DESC limit 1", database);
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
/* in case of NULL result from the table */
versionString[0] = 0;
while ((row = sqlNextRow(sr)) != NULL)
{
safef(versionString, sizeof(versionString), "version %s",
row[0]);
safef(dateReference, sizeof(dateReference), "%s",
row[1]);
}
sqlFreeResult(&sr);
}
else
{
versionString[0] = 0;
dateReference[0] = 0;
}
if (versionString[0])
safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString);
else
safef(headerTitle, sizeof(headerTitle), "%s", item);
genericHeader(tdb, headerTitle);
}
static void printPositionAndSize(int start, int end, bool showSize)
{
printf("Position: "
"",
hgTracksPathAndSettings(), database, seqName, start+1, end);
char startBuf[64], endBuf[64];
sprintLongWithCommas(startBuf, start + 1);
sprintLongWithCommas(endBuf, end);
printf("%s:%s-%s
\n", seqName, startBuf, endBuf);
long size = end - start;
sprintLongWithCommas(startBuf, size);
if (showSize)
printf("Genomic Size: %s
\n", startBuf);
}
static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id)
/* copied from otherOrgs.c */
{
char query[512];
struct sqlResult *sr;
char **row;
bioSeq *seq = NULL;
safef(query, sizeof(query),
"select sequence from %s where annotId = '%s'", table, id);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
{
AllocVar(seq);
seq->name = cloneString(id);
seq->dna = cloneString(row[0]);
seq->size = strlen(seq->dna);
}
sqlFreeResult(&sr);
return seq;
}
void pubsAli(struct sqlConnection *conn, char *pslTable, char *seqTable, char *item)
/* this is just a ripoff from htcCdnaAli, similar to markd's transMapAli */
{
bioSeq *oSeq = NULL;
writeFramesetType();
puts("");
printf("
\nLiterature Sequence vs Genomic\n\n\n");
struct psl *psl = getAlignments(conn, pslTable, item);
if (psl == NULL)
errAbort("Couldn't find alignment at %s:%s", pslTable, item);
oSeq = getSeq(conn, seqTable, item);
if (oSeq == NULL)
errAbort("%s is in pslTable but not in sequence table. Internal error.", item);
enum gfType qt;
if (psl->qSize!=oSeq->size)
{
qt = gftProt;
// trying to correct pslMap's changes to qSize/qStarts and blockSizes
psl->strand[1]=psl->strand[0];
psl->strand[0]='+';
psl->strand[2]=0;
psl->qSize = psl->qSize/3;
psl->match = psl->match/3;
// Take care of codons that go over block boundaries:
// Convert a block with blockSizes=58,32 and qStarts=0,58,
// to blockSizes=19,11 and qStarts=0,19
int i;
int remaind = 0;
for (i=0; iblockCount; i++)
{
psl->qStarts[i] = psl->qStarts[i]/3;
int bs = psl->blockSizes[i];
remaind += (bs % 3);
if (remaind>=3)
{
bs += 1;
remaind -= 3;
}
psl->blockSizes[i] = bs/3;
}
}
else
qt = gftDna;
showSomeAlignment(psl, oSeq, qt, 0, oSeq->size, NULL, 0, 0);
}
void doPubsDetails(struct trackDb *tdb, char *item)
/* publications custom display */
{
int start = cgiInt("o");
int end = cgiOptionalInt("t", 0);
char* trackTable = cgiString("g");
char* aliTable = cgiOptionalString("aliTable");
int fasta = cgiOptionalInt("fasta", 0);
pubsDebug = cgiOptionalInt("debug", 0);
struct sqlConnection *conn = hAllocConn(database);
char* articleTable = trackDbRequiredSetting(tdb, "pubsArticleTable");
if (stringIn("Psl", trackTable))
{
if (aliTable!=NULL)
{
pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable");
pubsAli(conn, trackTable, pubsSequenceTable, item);
return;
}
else
{
genericHeader(tdb, item);
struct psl *psl = getAlignments(conn, trackTable, item);
printf("Genomic Alignment with sequence found in publication fulltext
");
printAlignmentsSimple(psl, start, trackTable, trackTable, item);
}
}
else
{
printTrackVersion(tdb, conn, item);
if (stringIn("Marker", trackTable))
{
char* markerTable = trackDbRequiredSetting(tdb, "pubsMarkerTable");
printPositionAndSize(start, end, 0);
printMarkerSnippets(conn, articleTable, markerTable, item);
}
else
{
printPositionAndSize(start, end, 1);
pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable");
char* articleId = printArticleInfo(conn, item, articleTable);
if (articleId!=NULL)
{
char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack");
printSeqInfo(conn, trackTable, pslTable, articleId, item, seqName, start, pubsHasSupp, fasta, articleTable);
}
}
}
printTrackHtml(tdb);
hFreeConn(&conn);
}