ace0774d8f9da573531b33371551fd9af0a763a2 max Mon Apr 16 17:50:21 2012 -0700 html modif suggested by greg diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c index 43329f8..b2a0cf1 100644 --- src/hg/hgc/pubs.c +++ src/hg/hgc/pubs.c @@ -1,719 +1,719 @@ /* pubs.c - display details of publiations literature track (pubsxxx tables) */ #include "common.h" #include "jksql.h" #include "hdb.h" #include "hgc.h" #include "hgColors.h" #include "trackDb.h" #include "web.h" #include "hash.h" #include "obscure.h" #include "common.h" #include "string.h" //include "hgTrackUi.h" // cgi var to activate debug output static int pubsDebug = 0; // global var for printArticleInfo to indicate if article has suppl info // Most publishers have supp data bool pubsHasSupp = TRUE; // global var for printArticleInfo to indicate if article is elsevier bool pubsIsElsevier = FALSE; // internal section types in mysql table static char* pubsSecNames[] ={ "header", "abstract", "intro", "methods", "results", "discussion", "conclusions", "ack", "refs", "unknown" }; // // whether a checkbox is checked by default, have to correspond to pubsSecNames static int pubsSecChecked[] ={ 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 }; static char* pubsSequenceTable; static char* mangleUrl(char* url) /* add publisher specific parameters to url and return new url*/ { if (!stringIn("sciencedirect.com", url)) return url; // cgi param to add the "UCSC matches" sciverse application to elsevier's sciencedirect char* sdAddParam = "?svAppaddApp=298535"; char* longUrl = catTwoStrings(url, sdAddParam); char* newUrl = replaceChars(longUrl, "article", "svapps"); return newUrl; } static void printFilterLink(char* pslTrack, char* articleId, char* articleTable) /* print a link to hgTracks with an additional cgi param to activate the single article filter */ { int start = cgiInt("o"); int end = cgiInt("t"); char qBuf[1024]; struct sqlConnection *conn = hAllocConn(database); safef(qBuf, sizeof(qBuf), "SELECT CONCAT(firstAuthor, year) FROM %s WHERE articleId='%s';", articleTable, articleId); char* dispId = sqlQuickString(conn, qBuf); printf("

", hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack, dispId); printf("Show these sequence matches individually on genome browser (activates track \"" "Individual matches for article\")

"); hFreeConn(&conn); } static char* makeSqlMarkerList(void) /* return list of sections from cgi vars, format like "'abstract','header'" */ { int secCount = sizeof(pubsSecNames)/sizeof(char *); struct slName* names = NULL; int i; for (i=0; i\n"); printf("Sections of article shown:
\n"); printf("
\n", cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i")); for (i=0; i\n", pubsSecNames[i]); printf("%s\n", secLabels[i]); else printf("value=\"1\">%s\n", secLabels[i]); } printf("\n", cgiString("o")); printf("\n", cgiString("g")); printf("\n", cgiString("t")); printf("\n", cgiString("i")); printf("\n", cart->sessionId); printf("
"); printf("\n"); printf("

\n"); } static void printLimitWarning(struct sqlConnection *conn, char* markerTable, char* item, int itemLimit, char* sectionList) { char query[4000]; safef(query, sizeof(query), "SELECT COUNT(*) from %s WHERE markerId='%s' AND section in (%s) ", markerTable, item, sectionList); if (sqlNeedQuickNum(conn, query) > itemLimit) { printf("This marker is mentioned more than %d times
\n", itemLimit); printf("The results would take too long to load in your browser and are " "therefore limited to %d articles.

\n", itemLimit); } } static void printMarkerSnippets(struct sqlConnection *conn, char* articleTable, char* markerTable, char* item) { /* do not show more snippets than this limit */ int itemLimit=1000; printSectionCheckboxes(); char* sectionList = makeSqlMarkerList(); printLimitWarning(conn, markerTable, item, itemLimit, sectionList); printf("

Snippets from Publications:

"); struct sqlResult* sr = queryMarkerRows(conn, markerTable, articleTable, item, itemLimit, sectionList); char **row; while ((row = sqlNextRow(sr)) != NULL) { char* articleId = row[0]; char* url = row[1]; char* title = row[2]; char* authors = row[3]; char* citation = row[4]; char* pmid = row[5]; char* snippets = row[6]; url = mangleUrl(url); printf("%s ", url, title); printf("%s; ", authors); printf("%s ", citation); if (!isEmpty(pmid) && strcmp(pmid, "0")!=0 ) printf(", PMID%s\n", pmid, pmid); printf("
\n"); if (pubsDebug) printf("articleId=%s", articleId); printf("%s

", snippets); printf("


"); } freeMem(sectionList); sqlFreeResult(&sr); } static char* printArticleInfo(struct sqlConnection *conn, char* item, char* pubsArticleTable) /* Header with information about paper, return documentId */ { char query[512]; safef(query, sizeof(query), "SELECT articleId, url, title, authors, citation, abstract, pmid FROM %s WHERE articleId='%s'", pubsArticleTable, item); struct sqlResult *sr = sqlGetResult(conn, query); char **row; char *articleId=NULL; if ((row = sqlNextRow(sr)) == NULL) { printf("Could not resolve articleId %s, this is an internal error.\n", item); printf("Please send an email to max@soe.ucsc.edu\n"); sqlFreeResult(&sr); return NULL; } articleId = cloneString(row[0]); char* url = row[1]; char* title = row[2]; char* authors = row[3]; char* cit = row[4]; char* abstract = row[5]; char* pmid = row[6]; url = mangleUrl(url); if (strlen(abstract)==0) abstract = "(No abstract available for this article. " "Please follow the link to the fulltext above.)"; if (stringIn("sciencedirect.com", url)) { pubsHasSupp = FALSE; pubsIsElsevier = TRUE; } printf("

%s

\n", authors); printf("%s\n", url, title); printf("

%s", cit); if (strlen(pmid)!=0 && strcmp(pmid, "0")) printf(", PMID%s\n", pmid, pmid); printf("

\n"); printf("

%s

\n", abstract); sqlFreeResult(&sr); return articleId; } static struct hash* getSeqIdHash(struct sqlConnection* conn, char* trackTable, \ char* articleId, char *item, char* seqName, int start) /* return a hash with the sequence IDs for a given chain of BLAT matches */ { char query[512]; /* check first if the column exists (some debugging tables on hgwdev don't have seqIds) */ safef(query, sizeof(query), "SHOW COLUMNS FROM %s LIKE 'seqIds';", trackTable); char* seqIdPresent = sqlQuickString(conn, query); if (!seqIdPresent) { return NULL; } /* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash*/ safef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' " "and chrom='%s' and chromStart=%d;", trackTable, item, seqName, start); if (pubsDebug) printf("%s
", query); // split comma-sep list into parts char* seqIdCoordString = sqlQuickString(conn, query); char* seqIdCoords[1024]; int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords)); int i; struct hash *seqIdHash = NULL; seqIdHash = newHash(0); for (i=0; i\n"); printf("\n"); if (showDesc) puts(" Article file\n"); puts(" One row per sequence, with flanking text, sequence in bold\n"); if (pubsDebug) puts(" Identifiers\n"); if (!isClickedSection && !pubsDebug) puts(" Chained matches with this sequence\n"); puts("\n"); } static void printAddWbr(char* text, int distance) /* a crazy hack for firefox/mozilla that is unable to break long words in tables * We need to add a tag every x characters in the text to make text breakable. */ { int i; i = 0; char* c; c = text; bool doNotBreak = FALSE; while (*c != 0) { if ((*c=='&') || (*c=='<')) doNotBreak = TRUE; if (*c==';' || (*c =='>')) doNotBreak = FALSE; printf("%c", *c); if (i % distance == 0 && ! doNotBreak) printf(""); c++; i++; } } void printHgTracksLink(char* db, char* chrom, int start, int end, char* linkText, char* optUrlStr) /* print link to hgTracks for db at pos */ { char buf[1024]; if (linkText==NULL) { char startBuf[64], endBuf[64]; sprintLongWithCommas(startBuf, start + 1); sprintLongWithCommas(endBuf, end); safef(buf, sizeof(buf), "%s:%s-%s (%s)", chrom, startBuf, endBuf, db); linkText = buf; } if (optUrlStr==NULL) optUrlStr = ""; printf("%s\n", hgTracksPathAndSettings(), db, chrom, start, end, optUrlStr, linkText); } void printGbLinks(struct slName* locs) /* print hash keys in format hg19/chr1:1-1000 as links */ { struct slName *el; for (el = locs; el != NULL; el = el->next) { char* locString = el->name; char* db = cloneNextWordByDelimiter(&locString, '/'); char* chrom = cloneNextWordByDelimiter(&locString, ':'); char* startStr = cloneNextWordByDelimiter(&locString, '-'); char* endStr = cloneString(locString); int start = atoi(startStr); int end = atoi(endStr); printHgTracksLink(db, chrom, start, end, NULL, NULL); printf("
"); freeMem(endStr); //XX why can't I free these? freeMem(chrom); freeMem(startStr); freeMem(db); } } static bool printSeqSection(char* articleId, char* title, bool showDesc, struct sqlConnection* conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta, char* pslTable, char* articleTable) /* print a table of sequences, show only sequences with IDs in hash, * There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs * - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs * - other seqs (isClickedSection=False) -> show all other seqs * * */ { // get data from mysql char query[4096]; safef(query, sizeof(query), "SELECT fileDesc, snippet, locations, articleId, fileId, seqId, sequence " "FROM %s WHERE articleId='%s';", pubsSequenceTable, articleId); if (pubsDebug) puts(query); struct sqlResult *sr = sqlGetResult(conn, query); // construct title for section char* otherFormat = NULL; if (fasta) otherFormat = "table"; else otherFormat = "fasta"; char fullTitle[5000]; safef(fullTitle, sizeof(fullTitle), "%s (%s format)", title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"), !fasta, otherFormat); webNewSection("%s", fullTitle); if (isClickedSection) { printFilterLink(pslTable, articleId, articleTable); printf(""); } else printf(""); if (!fasta) printSeqHeaders(showDesc, isClickedSection); char **row; bool foundSkippedRows = FALSE; while ((row = sqlNextRow(sr)) != NULL) { char* fileDesc = row[0]; char* snippet = row[1]; char* locString= row[2]; char* artId = row[3]; char* fileId = row[4]; char* seqId = row[5]; char* seq = row[6]; // annotation (=sequence) ID is a 64 bit int with 10 digits for // article, 3 digits for file, 5 for annotation char annotId[100]; safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId)); if (pubsDebug) printf("%s", annotId); // only display this sequence if we're in the right section if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) { foundSkippedRows = TRUE; continue; } printf(""); if (fasta) printf(">%s
%s
", annotId, seq); else { - printf("\n", HG_COL_LOCAL_TABLE); + printf("\n"); if (showDesc) printf("%s\n", fileDesc); //printf("%s\n", snippet); printf(""); printAddWbr(snippet, 40); printf("\n"); if (pubsDebug) printf("article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId); // print links to locations if (!isClickedSection && !pubsDebug) { // format: hg19/chr1:300-400,mm9/chr1:60006-23234 // split on "," then split on "/" //locs = charSepToSlNames(locString, ','); char* locArr[1024]; int partCount = chopString(locString, ",", locArr, ArraySize(locArr)); printf(""); if (partCount==0) printf("No matches"); else { struct slName *locs; locs = slNameListFromStringArray(locArr, partCount); slUniqify(&locs, slNameCmp, slNameFree); printGbLinks(locs); printf("
"); printf("\n"); slFreeList(&locs); } } printf("\n"); } } printf("\n"); // finish section webEndSectionTables(); sqlFreeResult(&sr); return foundSkippedRows; } static void printSeqInfo(struct sqlConnection* conn, char* trackTable, char* pslTable, char* articleId, char* item, char* seqName, int start, bool fileDesc, bool fasta, char* articleTable) /* print sequences, split into two sections * two sections: one for sequences that were clicked, one for all others*/ { struct hash* clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start); bool skippedRows; if (clickedSeqs) skippedRows = printSeqSection(articleId, "Sequences used to construct this feature", \ fileDesc, conn, clickedSeqs, 1, fasta, pslTable, articleTable); else skippedRows=1; if (skippedRows) printSeqSection(articleId, "Other Sequences in this article", \ fileDesc, conn, clickedSeqs, 0, fasta, pslTable, articleTable); if (pubsIsElsevier) printf("

Copyright 2012 Elsevier B.V. All rights reserved.

"); freeHash(&clickedSeqs); } static void printTrackVersion(struct trackDb *tdb, struct sqlConnection* conn, char* item) { char versionString[256]; char dateReference[256]; char headerTitle[512]; /* see if hgFixed.trackVersion exists */ boolean trackVersionExists = hTableExists("hgFixed", "trackVersion"); if (trackVersionExists) { char query[256]; safef(query, sizeof(query), \ "SELECT version,dateReference FROM hgFixed.trackVersion " "WHERE db = '%s' AND name = 'pubs' ORDER BY updateTime DESC limit 1", database); struct sqlResult *sr = sqlGetResult(conn, query); char **row; /* in case of NULL result from the table */ versionString[0] = 0; while ((row = sqlNextRow(sr)) != NULL) { safef(versionString, sizeof(versionString), "version %s", row[0]); safef(dateReference, sizeof(dateReference), "%s", row[1]); } sqlFreeResult(&sr); } else { versionString[0] = 0; dateReference[0] = 0; } if (versionString[0]) safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString); else safef(headerTitle, sizeof(headerTitle), "%s", item); genericHeader(tdb, headerTitle); } static void printPositionAndSize(int start, int end, bool showSize) { printf("Position: " "", hgTracksPathAndSettings(), database, seqName, start+1, end); char startBuf[64], endBuf[64]; sprintLongWithCommas(startBuf, start + 1); sprintLongWithCommas(endBuf, end); printf("%s:%s-%s
\n", seqName, startBuf, endBuf); long size = end - start; sprintLongWithCommas(startBuf, size); if (showSize) printf("Genomic Size: %s
\n", startBuf); } static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id) /* copied from otherOrgs.c */ { char query[512]; struct sqlResult *sr; char **row; bioSeq *seq = NULL; safef(query, sizeof(query), "select sequence from %s where annotId = '%s'", table, id); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) { AllocVar(seq); seq->name = cloneString(id); seq->dna = cloneString(row[0]); seq->size = strlen(seq->dna); } sqlFreeResult(&sr); return seq; } void pubsAli(struct sqlConnection *conn, char *pslTable, char *seqTable, char *item) /* this is just a ripoff from htcCdnaAli, similar to markd's transMapAli */ { bioSeq *oSeq = NULL; writeFramesetType(); puts(""); printf("\nLiterature Sequence vs Genomic\n\n\n"); struct psl *psl = getAlignments(conn, pslTable, item); if (psl == NULL) errAbort("Couldn't find alignment at %s:%s", pslTable, item); oSeq = getSeq(conn, seqTable, item); if (oSeq == NULL) errAbort("%s is in pslTable but not in sequence table. Internal error.", item); enum gfType qt; if (psl->qSize!=oSeq->size) { qt = gftProt; // trying to correct pslMap's changes to qSize/qStarts and blockSizes psl->strand[1]=psl->strand[0]; psl->strand[0]='+'; psl->strand[2]=0; psl->qSize = psl->qSize/3; psl->match = psl->match/3; // Take care of codons that go over block boundaries: // Convert a block with blockSizes=58,32 and qStarts=0,58, // to blockSizes=19,11 and qStarts=0,19 int i; int remaind = 0; for (i=0; iblockCount; i++) { psl->qStarts[i] = psl->qStarts[i]/3; int bs = psl->blockSizes[i]; remaind += (bs % 3); if (remaind>=3) { bs += 1; remaind -= 3; } psl->blockSizes[i] = bs/3; } } else qt = gftDna; showSomeAlignment(psl, oSeq, qt, 0, oSeq->size, NULL, 0, 0); } void doPubsDetails(struct trackDb *tdb, char *item) /* publications custom display */ { int start = cgiInt("o"); int end = cgiOptionalInt("t", 0); char* trackTable = cgiString("g"); char* aliTable = cgiOptionalString("aliTable"); int fasta = cgiOptionalInt("fasta", 0); pubsDebug = cgiOptionalInt("debug", 0); struct sqlConnection *conn = hAllocConn(database); char* articleTable = trackDbRequiredSetting(tdb, "pubsArticleTable"); if (stringIn("Psl", trackTable)) { if (aliTable!=NULL) { pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable"); pubsAli(conn, trackTable, pubsSequenceTable, item); return; } else { genericHeader(tdb, item); struct psl *psl = getAlignments(conn, trackTable, item); printf("

Genomic Alignment with sequence found in publication fulltext

"); printAlignmentsSimple(psl, start, trackTable, trackTable, item); } } else { printTrackVersion(tdb, conn, item); if (stringIn("Marker", trackTable)) { char* markerTable = trackDbRequiredSetting(tdb, "pubsMarkerTable"); printPositionAndSize(start, end, 0); printMarkerSnippets(conn, articleTable, markerTable, item); } else { printPositionAndSize(start, end, 1); pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable"); char* articleId = printArticleInfo(conn, item, articleTable); if (articleId!=NULL) { char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack"); printSeqInfo(conn, trackTable, pslTable, articleId, item, seqName, start, pubsHasSupp, fasta, articleTable); } } } printTrackHtml(tdb); hFreeConn(&conn); }