2b21a977be741c62d8db2fa7869f1be0c120cf63 max Wed Nov 16 11:40:22 2011 -0800 display code for publications (t2g) track, most of them by larrym diff --git src/hg/hgc/t2g.c src/hg/hgc/t2g.c index 1bcb315..01f912f 100644 --- src/hg/hgc/t2g.c +++ src/hg/hgc/t2g.c @@ -1,176 +1,284 @@ -/* t2g.c - display details of text2genome stuff */ +/* t2g.c - display details of text2genome literature track (t2gxxx tables) */ #include "common.h" #include "jksql.h" #include "hdb.h" #include "hgc.h" #include "hgColors.h" #include "trackDb.h" #include "web.h" #include "hash.h" #include "obscure.h" +//include "hgTrackUi.h" -#define PMCURL "http://www.ncbi.nlm.nih.gov/pmc/articles/PMC" +// cgi var to activate debug output +int debug = 0; +// cgi var to activate fasta output +int fasta = 0; -void printPubmedLink(char* pmid) -{ - printf("PubMed: %s
\n", pmid, pmid); -} - -void printPmcLink(char* pmcId) -{ - printf("PubMed Central: PMC%s
\n", PMCURL, pmcId, pmcId); -} +char* sequenceTable; +char* articleTable; -void printT2gLink(char* pmcId) -{ - printf("Text2Genome: %s
\n", pmcId, pmcId); -} - -void printLinks(char* pmid, char* pmcId) -{ - printPubmedLink(pmid); - printPmcLink(pmcId); - printT2gLink(pmcId); - printf("
\n"); -} - -char* printArticleInfo(struct sqlConnection *conn, struct trackDb* tdb, char* item) +char* printArticleInfo(struct sqlConnection *conn, char* item) /* Header with information about paper, return documentId */ { char query[512]; - char* articleTable = hashMustFindVal(tdb->settingsHash, "articleTable"); - safef(query, sizeof(query), "SELECT pmid, pmcId, title,authors, abstract FROM %s WHERE displayId='%s'", articleTable, item); + safef(query, sizeof(query), "SELECT articleId, url, title, authors, citation, abstract FROM %s WHERE displayId='%s'", articleTable, item); + struct sqlResult *sr = sqlGetResult(conn, query); char **row; char *docId=0; if ((row = sqlNextRow(sr)) != NULL) { - printLinks(row[0], row[1]); - printf("%s\n", PMCURL, row[1], row[2]); - printf("

%s

\n", row[3]); - printf("

%s

\n", row[4]); - docId = row[1]; + char* abstract = row[5]; + if (strlen(abstract)==0) + { + abstract = "(No abstract found for this article. Please use the link to the fulltext above.)"; + } + docId = cloneString(row[0]); + printf("

%s

\n", row[3]); + printf("%s\n", row[1], row[2]); + printf("

%s

\n", row[4]); + printf("

%s

\n", abstract); } sqlFreeResult(&sr); return docId; } -void printSeqInfo(struct sqlConnection* conn, struct trackDb* tdb, - char* docId, char* item, char* seqName, int start) +struct hash* getSeqIdHash(struct sqlConnection* conn, char* trackTable, char* docId, char *item, char* seqName, int start) { - /* print table of sequences */ - - /* get all sequences for paper identified by docId*/ - char query[512]; - char* sequenceTable = hashMustFindVal(tdb->settingsHash, "sequenceTable"); - safef(query, sizeof(query), "SELECT concat_ws('|',seqId, sequence) FROM %s WHERE pmcId='%s'", sequenceTable, docId); - struct slName *seqList = sqlQuickList(conn, query); - - /* get sequence-Ids for feature that was clicked on (item&startPos are unique) and put into hash */ + /* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash */ // there must be an easier way to do this... // couldn't find a function that splits a string and converts it to a list - safef(query, sizeof(query), "SELECT seqIds,'' FROM t2g WHERE name='%s' " - "and chrom='%s' and chromStart=%d", item, seqName, start); - char* seqIdsString = sqlQuickString(conn, query); - char* seqIds[1024]; - int partCount = chopString(seqIdsString, ",", seqIds, ArraySize(seqIds)); + char query[512]; + safef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' " + "and chrom='%s' and chromStart=%d", trackTable, item, seqName, start); + if (debug) + printf(query); + char* seqIdCoordString = sqlQuickString(conn, query); + char* seqIdCoords[1024]; + int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords)); int i; struct hash *seqIdHash = NULL; seqIdHash = newHash(0); for (i=0; i", seqId[0], seqId[1]); + hashAdd(seqIdHash, seqId[0], seqId[1]); + } + freeMem(seqIdCoordString); + return seqIdHash; +} - // output table - webNewSection("Sequences in article"); - printf("Sequences that map to the feature that was clicked " - "are highlighted in bold"); - webPrintLinkTableStart(); +void printSeqHeaders(bool debug, bool showDesc, bool isClickedSection) +{ + printf("\n", HG_COL_BORDER); + printf("\n", HG_COL_TABLE_LABEL); + if (showDesc) + puts(" \n"); + puts(" \n"); + if (debug) + puts(" \n"); + + if (!isClickedSection && !debug) + puts(" \n"); + puts("\n"); +} - struct slName *listEl = seqList; - while (listEl != NULL) +bool printSeqSection(char* docId, char* title, bool showDesc, struct sqlConnection* conn, struct hash* filterIdHash, bool isClickedSection, bool fasta) +/* print a table of sequences, show only sequences with IDs in hash, + * */ { - char* parts[2]; - chopString(listEl->name, "|", parts, 2); - char* seqId = parts[0]; - char* seq = parts[1]; + // get data from mysql + char query[4096]; + safef(query, sizeof(query), "SELECT fileDesc, snippet, locations, articleId,fileId, seqId, sequence FROM %s WHERE articleId='%s';", sequenceTable, docId); + if (debug) + printf(query); + struct sqlResult *sr = sqlGetResult(conn, query); + + // construct title for section + char fullTitle[5000]; + safef(fullTitle, sizeof(fullTitle), + "%s (switch fasta format)", + title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"), + !fasta); + + webNewSection(fullTitle); + + if (!fasta) + printSeqHeaders(debug, showDesc, isClickedSection); - if (hashLookup(seqIdHash, seqId)) - printf("", - HG_COL_TABLE, seq); + char **row; + bool foundSkippedRows = false; + while ((row = sqlNextRow(sr)) != NULL) + { + char* fileDesc = row[0]; + char* snippet = row[1]; + char* locList = row[2]; + char* artId = row[3]; + char* fileId = row[4]; + char* seqId = row[5]; + char* seq = row[6]; + + // annotation (=sequence) ID is a 64 bit int with 10 digits for + // article, 3 digits for file, 5 for annotation + char annotId[100]; + safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId)); + + // only display this sequence if we're in the right section + if ((hashLookup(filterIdHash, annotId)==0) ^ !isClickedSection) { + foundSkippedRows = true; + continue; + } + + if (fasta) + { + printf(">%s
%s
", annotId, seq); + } else - printf("\n", HG_COL_TABLE, seq); - webPrintLinkTableNewRow(); - listEl=listEl->next; + { + printf("\n", HG_COL_LOCAL_TABLE); + if (showDesc) + printf("\n", snippet); + printf("\n", snippet); + if (debug) + { + printf("\n"); + } + printf("\n"); + } + } + printf("\n"); + webEndSectionTables(); + sqlFreeResult(&sr); + return foundSkippedRows; } -void doT2gDetails(struct trackDb *tdb, char *item) -/* text2genome.org custom display */ +void printSeqInfo(struct sqlConnection* conn, char* trackTable, + char* docId, char* item, char* seqName, int start, bool fileDesc, bool fasta) + /* print sequences, split into two sections + * two sections: one for sequences that were clicked, one for all others*/ +{ + struct hash* clickedSeqs = getSeqIdHash(conn, trackTable, docId, item, seqName, start); + + bool skippedRows; + skippedRows = printSeqSection(docId, "Sequences used to construct this feature", fileDesc, conn, clickedSeqs, 1, fasta); + if (skippedRows) + printSeqSection(docId, "Other Sequences in this article", fileDesc, conn, clickedSeqs, 0, fasta); + else + printf("

No more sequences found in this article

"); + freeHash(&clickedSeqs); + +} + +void printTrackVersion(struct trackDb *tdb, struct sqlConnection* conn, char* item) { -int start = cgiInt("o"); -int end = cgiInt("t"); char versionString[256]; char dateReference[256]; char headerTitle[512]; -struct sqlConnection *conn = hAllocConn(database); - /* see if hgFixed.trackVersion exists */ boolean trackVersionExists = hTableExists("hgFixed", "trackVersion"); if (trackVersionExists) { char query[256]; safef(query, sizeof(query), "select version,dateReference from hgFixed.trackVersion where db = '%s' AND name = 't2g' order by updateTime DESC limit 1", database); struct sqlResult *sr = sqlGetResult(conn, query); char **row; /* in case of NULL result from the table */ versionString[0] = 0; while ((row = sqlNextRow(sr)) != NULL) { safef(versionString, sizeof(versionString), "version %s", row[0]); safef(dateReference, sizeof(dateReference), "%s", row[1]); } sqlFreeResult(&sr); } else { versionString[0] = 0; dateReference[0] = 0; } if (versionString[0]) safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString); else safef(headerTitle, sizeof(headerTitle), "%s", item); genericHeader(tdb, headerTitle); +} +void printPositionAndSize(int start, int end) +{ printf("Position: " - "", + "", hgTracksPathAndSettings(), database, seqName, start+1, end); char startBuf[64], endBuf[64]; sprintLongWithCommas(startBuf, start + 1); sprintLongWithCommas(endBuf, end); printf("%s:%s-%s
\n", seqName, startBuf, endBuf); long size = end - start; sprintLongWithCommas(startBuf, size); printf("Genomic Size: %s
\n", startBuf); +} -char* docId = printArticleInfo(conn, tdb, item); +void doT2gDetails(struct trackDb *tdb, char *item) +/* text2genome.org custom display */ +{ +int start = cgiInt("o"); +int end = cgiInt("t"); +char* trackTable = cgiString("g"); +debug = cgiOptionalInt("debug", 0); +fasta = cgiOptionalInt("fasta", 0); + +struct sqlConnection *conn = hAllocConn(database); +printTrackVersion(tdb, conn, item); +printPositionAndSize(start, end); + +sequenceTable = hashMustFindVal(tdb->settingsHash, "sequenceTable"); +articleTable = hashMustFindVal(tdb->settingsHash, "articleTable"); + +char* docId = printArticleInfo(conn, item); if (docId!=0) - printSeqInfo(conn, tdb, docId, item, seqName, start); +{ + bool showDesc; + showDesc = (! endsWith(trackTable, "Elsevier")); + printSeqInfo(conn, trackTable, docId, item, seqName, start, showDesc, fasta); +} + +printTrackHtml(tdb); hFreeConn(&conn); }

Article fileSequence (in bold) with flanking textIdentifiersMatches
%s%s" - "
%s\n", fileDesc); + //printf("%s%sarticle %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId); } - webPrintLinkTableEnd(); - printTrackHtml(tdb); - slFreeList(seqList); - freeHash(&seqIdHash); + // print links to locations + if (!isClickedSection && !debug) { + struct slName *locs; + // format: hg19/chr1:300-400,mm9/chr1:60006-23234 + // split on "," then split on "/" + locs = charSepToSlNames(locList, ','); + printf(""); + if (locs==NULL) + printf("No matches"); + for ( ; locs!=NULL; locs = locs->next) + { + char* locString = locs->name; + char* parts[2]; + int partCount; + partCount = chopString(locString, "/", parts, ArraySize(parts)); + assert(partCount==2); + char* db = parts[0]; + char* pos = parts[1]; + printf("%s (%s)", cartSidUrlString(cart), db, pos, pos, db); + printf("
"); + } + printf("