0513e85cc01843ecc411b86dfce9180a8d1acf37 max Thu Mar 22 17:40:16 2012 -0700 make individual sequences for publications track clickable, show alignments, correct mouseOvers // Also changs text that is shown for PDF Output in hgTracks (see email with Ann/Donna/Brian) diff --git src/hg/hgc/pubs.c src/hg/hgc/pubs.c index cd09f14..41e8f4a 100644 --- src/hg/hgc/pubs.c +++ src/hg/hgc/pubs.c @@ -1,49 +1,63 @@ /* pubs.c - display details of publiations literature track (pubsxxx tables) */ #include "common.h" #include "jksql.h" #include "hdb.h" #include "hgc.h" #include "hgColors.h" #include "trackDb.h" #include "web.h" #include "hash.h" #include "obscure.h" +#include "common.h" +#include "string.h" //include "hgTrackUi.h" // cgi var to activate debug output static int pubsDebug = 0; // internal section types in mysql table static char* pubsSecNames[] ={ "header", "abstract", "intro", "methods", "results", "discussion", "conclusions", "ack", "refs", "unknown" }; // // whether a checkbox is checked by default, have to correspond to pubsSecNames static int pubsSecChecked[] ={ 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 }; static char* pubsSequenceTable; -static char* pubsArticleTable; + +static void printFilterLink(char* pslTrack, char* articleId) +/* print a link to hgTracks with an additional cgi param to activate the single article filter */ +{ + int start = cgiInt("o"); + int end = cgiInt("t"); + printf(" <A HREF=\"%s&db=%s&position=%s%%3A%d-%d&pubsFilterArticleId=%s&%s=pack\">", + hgTracksPathAndSettings(), database, seqName, start+1, end, articleId, pslTrack); + char startBuf[64], endBuf[64]; + sprintLongWithCommas(startBuf, start + 1); + sprintLongWithCommas(endBuf, end); + printf("Show these sequence matches individually on genome browser</A>"); +} static char* makeSqlMarkerList(void) /* return list of sections from cgi vars, format like "'abstract','header'" */ { int secCount = sizeof(pubsSecNames)/sizeof(char *); struct slName* names = NULL; int i; for (i=0; i<secCount; i++) { // add ' around name and add to list char* secName = pubsSecNames[i]; if (cgiOptionalInt(secName, pubsSecChecked[i])) { char nameBuf[100]; safef(nameBuf, sizeof(nameBuf), "'%s'", secName); @@ -173,31 +187,31 @@ printf("<SMALL>%s</SMALL>; ", authors); printf("<SMALL>%s ", citation); if (!isEmpty(pmid) && strcmp(pmid, "0")!=0 ) printf(", <A HREF=\"http://www.ncbi.nlm.nih.gov/pubmed/%s\">PMID%s</A>\n", pmid, pmid); printf("</SMALL><BR>\n"); if (pubsDebug) printf("articleId=%s", articleId); printf("<I>%s</I><P>", snippets); printf("<HR>"); } freeMem(sectionList); sqlFreeResult(&sr); } -static char* printArticleInfo(struct sqlConnection *conn, char* item) +static char* printArticleInfo(struct sqlConnection *conn, char* item, char* pubsArticleTable) /* Header with information about paper, return documentId */ { char query[512]; safef(query, sizeof(query), "SELECT articleId, url, title, authors, citation, abstract, pmid FROM %s WHERE articleId='%s'", pubsArticleTable, item); struct sqlResult *sr = sqlGetResult(conn, query); char **row; char *articleId=NULL; if ((row = sqlNextRow(sr)) == NULL) { printf("Could not resolve articleId %s, this is an internal error.\n", item); printf("Please send an email to max@soe.ucsc.edu\n"); sqlFreeResult(&sr); return NULL; @@ -238,211 +252,257 @@ return NULL; } /* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash*/ safef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' " "and chrom='%s' and chromStart=%d;", trackTable, item, seqName, start); if (pubsDebug) printf("%s<br>", query); // split comma-sep list into parts char* seqIdCoordString = sqlQuickString(conn, query); char* seqIdCoords[1024]; int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords)); int i; - // format of part <fileId:3><annotId:5>:<start>-<end>, e.g. 12312345:0-23 struct hash *seqIdHash = NULL; seqIdHash = newHash(0); for (i=0; i<partCount; i++) { - char* annotIdHitCoord[1024]; - chopString(seqIdCoords[i], ":", annotIdHitCoord, ArraySize(annotIdHitCoord)); - // put articleId and annotId together to create annotation(=sequence) Id - char fullAnnotId[4096]; - safef(fullAnnotId, sizeof(fullAnnotId), "%s%s", articleId, annotIdHitCoord[0]); - if (pubsDebug) - printf("%s, %s, %s<br>", annotIdHitCoord[0], annotIdHitCoord[1], fullAnnotId); - - hashAdd(seqIdHash, fullAnnotId, annotIdHitCoord[1]); + hashAdd(seqIdHash, seqIdCoords[i], NULL); } - freeMem(seqIdCoordString); return seqIdHash; } static void printSeqHeaders(bool showDesc, bool isClickedSection) { printf("<TABLE style=\"background-color: #%s\" WIDTH=\"100%%\" CELLPADDING=\"2\">\n", HG_COL_BORDER); printf("<TR style=\"background-color: #%s; color: #FFFFFF\">\n", HG_COL_TABLE_LABEL); if (showDesc) puts(" <TH style=\"width: 10%\">Article file</TH>\n"); - puts(" <TH style=\"width: 70%\">One table row per sequence, with flanking text, sequence in bold</TH>\n"); + puts(" <TH style=\"width: 60%\">One row per sequence, with flanking text, sequence in bold</TH>\n"); if (pubsDebug) puts(" <TH style=\"width: 30%\">Identifiers</TH>\n"); if (!isClickedSection && !pubsDebug) - puts(" <TH style=\"width: 20%\">Feature that includes this match</TH>\n"); + puts(" <TH style=\"width: 20%\">Chained matches with this sequence</TH>\n"); puts("</TR>\n"); } static void printAddWbr(char* text, int distance) /* a crazy hack for firefox/mozilla that is unable to break long words in tables * We need to add a <wbr> tag every x characters in the text to make text breakable. */ { int i; i = 0; char* c; c = text; bool doNotBreak = FALSE; while (*c != 0) { { if ((*c=='&') || (*c=='<')) doNotBreak = TRUE; if (*c==';' || (*c =='>')) doNotBreak = FALSE; printf("%c", *c); if (i % distance == 0 && ! doNotBreak) printf("<wbr>"); c++; i++; } } } -static bool printSeqSection(char* articleId, char* title, bool showDesc, struct sqlConnection* conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta) +void printHgTracksLink(char* db, char* chrom, int start, int end, char* linkText, char* optUrlStr) +/* print link to hgTracks for db at pos */ +{ +char buf[1024]; +if (linkText==NULL) +{ + char startBuf[64], endBuf[64]; + sprintLongWithCommas(startBuf, start + 1); + sprintLongWithCommas(endBuf, end); + safef(buf, sizeof(buf), "%s:%s-%s (%s)", seqName, startBuf, endBuf, db); + linkText = buf; +} + +if (optUrlStr==NULL) + optUrlStr = ""; + +printf("<A HREF=\"%s&db=%s&position=%d-%d&%s\">%s</A>\n", hgTracksPathAndSettings(), db, start, end, optUrlStr, linkText); +} + +void printGbLinks(struct slName* locs) +/* print hash keys in format hg19/chr1:1-1000 as links */ +{ +struct slName *el; +for (el = locs; el != NULL; el = el->next) +{ + char* locString = el->name; + char* db = cloneNextWordByDelimiter(&locString, '/'); + char* chrom = cloneNextWordByDelimiter(&locString, ':'); + char* startStr = cloneNextWordByDelimiter(&locString, '-'); + char* endStr = locString; + + int start = atoi(startStr); + int end = atoi(endStr); + printHgTracksLink(db, chrom, start, end, NULL, NULL); + printf("<BR>"); + //freeMem(endStr); //XX why can't I free these? + //freeMem(chrom); + //freeMem(startStr); + //freeMem(db); +} +} + +static bool printSeqSection(char* articleId, char* title, bool showDesc, struct sqlConnection* conn, struct hash* clickedSeqs, bool isClickedSection, bool fasta, char* pslTable) /* print a table of sequences, show only sequences with IDs in hash, * There are two sections, respective sequences are shown depending on isClickedSection and clickedSeqs * - seqs that were clicked on (isClickedSection=True) -> show only seqs in clickedSeqs * - other seqs (isClickedSection=False) -> show all other seqs * * */ { // get data from mysql char query[4096]; safef(query, sizeof(query), "SELECT fileDesc, snippet, locations, articleId, fileId, seqId, sequence " "FROM %s WHERE articleId='%s';", pubsSequenceTable, articleId); if (pubsDebug) puts(query); struct sqlResult *sr = sqlGetResult(conn, query); // construct title for section + char* otherFormat = NULL; + if (fasta) + otherFormat = "table"; + else + otherFormat = "fasta"; + char fullTitle[5000]; safef(fullTitle, sizeof(fullTitle), - "%s <A HREF=\"../cgi-bin/hgc?%s&o=%s&t=%s&g=%s&i=%s&fasta=%d\"><SMALL>(switch fasta format)</SMALL></A>", + "%s <A HREF=\"../cgi-bin/hgc?%s&o=%s&t=%s&g=%s&i=%s&fasta=%d\"><SMALL>(%s format)</SMALL></A>", title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"), - !fasta); + !fasta, otherFormat); webNewSection(fullTitle); if (!fasta) printSeqHeaders(showDesc, isClickedSection); char **row; bool foundSkippedRows = FALSE; while ((row = sqlNextRow(sr)) != NULL) { char* fileDesc = row[0]; char* snippet = row[1]; - char* locList = row[2]; + char* locString= row[2]; char* artId = row[3]; char* fileId = row[4]; char* seqId = row[5]; char* seq = row[6]; // annotation (=sequence) ID is a 64 bit int with 10 digits for // article, 3 digits for file, 5 for annotation char annotId[100]; safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId)); if (pubsDebug) printf("%s", annotId); // only display this sequence if we're in the right section if (clickedSeqs!=NULL && ((hashLookup(clickedSeqs, annotId)!=NULL) != isClickedSection)) { foundSkippedRows = TRUE; continue; } if (fasta) { - printf("<TT>>%s<BR>%s<BR></TT>", annotId, seq); + printf("<TR><TD><TT>>%s<BR>%s<BR></TT></TD></TR></TABLE>", annotId, seq); } else { printf("<TR style=\"background-color: #%s\">\n", HG_COL_LOCAL_TABLE); if (showDesc) printf("<TD style=\"word-break:break-all\">%s\n", fileDesc); //printf("<TD><I>%s</I></TD>\n", snippet); printf("<TD style=\"word-break:break-all;\"><I>"); printAddWbr(snippet, 40); printf("</I></TD>\n"); if (pubsDebug) { printf("<TD>article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId); } // print links to locations if (!isClickedSection && !pubsDebug) { - struct slName *locs; // format: hg19/chr1:300-400,mm9/chr1:60006-23234 // split on "," then split on "/" - locs = charSepToSlNames(locList, ','); + //locs = charSepToSlNames(locString, ','); + + char* locArr[1024]; + int partCount = chopString(locString, ",", locArr, ArraySize(locArr)); printf("<TD>"); - if (locs==NULL) + if (partCount==0) printf("No matches"); - for ( ; locs!=NULL; locs = locs->next) - { - char* locString = locs->name; - char* parts[2]; - int partCount; - partCount = chopString(locString, "/", parts, ArraySize(parts)); - assert(partCount==2); - char* db = parts[0]; - char* pos = parts[1]; - printf("<A HREF=\"../cgi-bin/hgTracks?%s&db=%s&position=%s\">%s (%s)</A>", cartSidUrlString(cart), db, pos, pos, db); + + else + { + struct slName *locs; + locs = slNameListFromStringArray(locArr, partCount); + slUniqify(&locs, slNameCmp, slNameFree); + printGbLinks(locs); printf("<BR>"); - } printf("</TD>\n"); + slFreeList(&locs); + } + } printf("</TR>\n"); } } printf("</TR>\n"); + + if (isClickedSection) + { + printf("</TABLE></TABLE><TR><TD><P> "); + printFilterLink(pslTable, articleId); + } webEndSectionTables(); sqlFreeResult(&sr); return foundSkippedRows; } static void printSeqInfo(struct sqlConnection* conn, char* trackTable, - char* articleId, char* item, char* seqName, int start, bool fileDesc, bool fasta) + char* pslTable, char* articleId, char* item, char* seqName, int start, + bool fileDesc, bool fasta) /* print sequences, split into two sections * two sections: one for sequences that were clicked, one for all others*/ { struct hash* clickedSeqs = getSeqIdHash(conn, trackTable, articleId, item, seqName, start); bool skippedRows; if (clickedSeqs) skippedRows = printSeqSection(articleId, "Sequences used to construct this feature", \ - fileDesc, conn, clickedSeqs, 1, fasta); + fileDesc, conn, clickedSeqs, 1, fasta, pslTable); else skippedRows=1; if (skippedRows) printSeqSection(articleId, "Other Sequences in this article", \ - fileDesc, conn, clickedSeqs, 0, fasta); + fileDesc, conn, clickedSeqs, 0, fasta, pslTable); printf("<P><SMALL>Copyright 2012 Elsevier B.V. All rights reserved.</SMALL><P>"); freeHash(&clickedSeqs); } static void printTrackVersion(struct trackDb *tdb, struct sqlConnection* conn, char* item) { char versionString[256]; char dateReference[256]; char headerTitle[512]; /* see if hgFixed.trackVersion exists */ boolean trackVersionExists = hTableExists("hgFixed", "trackVersion"); if (trackVersionExists) { @@ -481,55 +541,116 @@ static void printPositionAndSize(int start, int end, bool showSize) { printf("<B>Position:</B> " "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">", hgTracksPathAndSettings(), database, seqName, start+1, end); char startBuf[64], endBuf[64]; sprintLongWithCommas(startBuf, start + 1); sprintLongWithCommas(endBuf, end); printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf); long size = end - start; sprintLongWithCommas(startBuf, size); if (showSize) printf("<B>Genomic Size:</B> %s<BR>\n", startBuf); } +static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id) +/* copied from otherOrgs.c */ +{ +char query[512]; +struct sqlResult *sr; +char **row; +bioSeq *seq = NULL; +safef(query, sizeof(query), + "select sequence from %s where annotId = '%s'", table, id); +sr = sqlGetResult(conn, query); +if ((row = sqlNextRow(sr)) != NULL) + { + AllocVar(seq); + seq->name = cloneString(id); + seq->dna = cloneString(row[0]); + seq->size = strlen(seq->dna); + } +sqlFreeResult(&sr); +return seq; +} + +void pubsAli(struct sqlConnection *conn, char *pslTable, char *seqTable, char *item) +/* this is just a ripoff from htcCdnaAli, similar to markd's transMapAli */ +{ +bioSeq *oSeq = NULL; +writeFramesetType(); +puts("<HTML>"); +printf("<HEAD>\n<TITLE>Literature Sequence vs Genomic</TITLE>\n</HEAD>\n\n"); + +struct psl *psl = getAlignments(conn, pslTable, item); +if (psl == NULL) + errAbort("Couldn't find alignment at %s:%s", pslTable, item); + +oSeq = getSeq(conn, seqTable, item); +if (oSeq == NULL) + errAbort("%s is in pslTable but not in sequence table. Internal error.", item); +showSomeAlignment(psl, oSeq, gftDna, 0, oSeq->size, NULL, 0, 0); +printf("hihi"); +} + void doPubsDetails(struct trackDb *tdb, char *item) /* publications custom display */ { int start = cgiInt("o"); -int end = cgiInt("t"); +int end = cgiOptionalInt("t", 0); char* trackTable = cgiString("g"); +char* aliTable = cgiOptionalString("aliTable"); int fasta = cgiOptionalInt("fasta", 0); - pubsDebug = cgiOptionalInt("debug", 0); struct sqlConnection *conn = hAllocConn(database); -printTrackVersion(tdb, conn, item); -if (hashFindVal(tdb->settingsHash, "pubsMarkerTable")) +char* articleTable = trackDbRequiredSetting(tdb, "pubsArticleTable"); + +if (stringIn("Psl", trackTable)) +{ + if (aliTable!=NULL) + { + pubsSequenceTable = trackDbRequiredSetting(tdb, "pubsSequenceTable"); + pubsAli(conn, trackTable, pubsSequenceTable, item); + return; + } + + else + { + genericHeader(tdb, item); + struct psl *psl = getAlignments(conn, trackTable, item); + printf("<H3>Genomic Alignment with sequence found in publication fulltext</H3>"); + printAlignmentsSimple(psl, start, trackTable, trackTable, item); + } +} + +else +{ + printTrackVersion(tdb, conn, item); + if (trackDbSettingClosestToHome(tdb, "pubsMarkerTable") != NULL) { char* markerTable = hashMustFindVal(tdb->settingsHash, "pubsMarkerTable"); - char* articleTable = hashMustFindVal(tdb->settingsHash, "pubsArticleTable"); printPositionAndSize(start, end, 0); printMarkerSnippets(conn, articleTable, markerTable, item); } else { printPositionAndSize(start, end, 1); pubsSequenceTable = hashMustFindVal(tdb->settingsHash, "pubsSequenceTable"); - pubsArticleTable = hashMustFindVal(tdb->settingsHash, "pubsArticleTable"); - - char* articleId = printArticleInfo(conn, item); + char* articleId = printArticleInfo(conn, item, articleTable); if (articleId!=NULL) { bool showDesc; showDesc = (! endsWith(trackTable, "Elsevier")); // avoid clutter: Elsevier has only main text - printSeqInfo(conn, trackTable, articleId, item, seqName, start, showDesc, fasta); + char *pslTable = trackDbRequiredSetting(tdb, "pubsPslTrack"); + printSeqInfo(conn, trackTable, pslTable, articleId, item, seqName, start, showDesc, fasta); + } } } printTrackHtml(tdb); hFreeConn(&conn); }