2b21a977be741c62d8db2fa7869f1be0c120cf63
max
  Wed Nov 16 11:40:22 2011 -0800
display code for publications (t2g) track, most of them by larrym
diff --git src/hg/hgc/t2g.c src/hg/hgc/t2g.c
index 1bcb315..01f912f 100644
--- src/hg/hgc/t2g.c
+++ src/hg/hgc/t2g.c
@@ -1,176 +1,284 @@
-/* t2g.c - display details of text2genome stuff */
+/* t2g.c - display details of text2genome literature track (t2gxxx tables) */
 
 #include "common.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "hgc.h"
 #include "hgColors.h"
 #include "trackDb.h"
 #include "web.h"
 #include "hash.h"
 #include "obscure.h"
+//include "hgTrackUi.h"
 
-#define PMCURL "http://www.ncbi.nlm.nih.gov/pmc/articles/PMC"
+// cgi var to activate debug output
+int debug = 0;
+// cgi var  to activate fasta output
+int fasta = 0;
 
-void printPubmedLink(char* pmid)
-{
-    printf("<B>PubMed:</B>&nbsp;<A HREF=\"http://www.ncbi.nlm.nih.gov/pubmed/%s\" TARGET=_blank>%s</A><BR>\n", pmid, pmid);
-}
-
-void printPmcLink(char* pmcId)
-{
-    printf("<B>PubMed&nbsp;Central:</B>&nbsp;<A HREF=\"%s%s\" TARGET=_blank>PMC%s</A><BR>\n", PMCURL, pmcId, pmcId);
-}
+char* sequenceTable;
+char* articleTable;
 
-void printT2gLink(char* pmcId)
-{
-    printf("<B>Text2Genome:</B>&nbsp;<A HREF=\"http://kumiho.smith.man.ac.uk/bergman/text2genome/inspector.cgi?pmcId=%s\" TARGET=_blank>%s</A><BR>\n", pmcId, pmcId);
-}
-
-void printLinks(char* pmid, char* pmcId)
-{
-    printPubmedLink(pmid);
-    printPmcLink(pmcId);
-    printT2gLink(pmcId);
-    printf("<BR>\n");
-}
-
-char* printArticleInfo(struct sqlConnection *conn, struct trackDb* tdb, char* item)
+char* printArticleInfo(struct sqlConnection *conn, char* item)
 /* Header with information about paper, return documentId */
 {
     char query[512];
-    char* articleTable = hashMustFindVal(tdb->settingsHash, "articleTable");
 
-    safef(query, sizeof(query), "SELECT pmid, pmcId, title,authors, abstract FROM %s WHERE displayId='%s'", articleTable, item);
+    safef(query, sizeof(query), "SELECT articleId, url, title, authors, citation, abstract FROM %s WHERE displayId='%s'", articleTable, item);
+
     struct sqlResult *sr = sqlGetResult(conn, query);
     char **row;
     char *docId=0;
     if ((row = sqlNextRow(sr)) != NULL)
 	{
-	printLinks(row[0], row[1]);
-	printf("<A HREF=\"%s%s\"><b>%s</b></A>\n", PMCURL, row[1], row[2]);
-	printf("<p style=\"width:800px; font-size:96%%\">%s</p>\n", row[3]);
-	printf("<p style=\"width:800px; font-size:92%%\">%s</p>\n", row[4]);
-        docId = row[1];
+        char* abstract = row[5];
+        if (strlen(abstract)==0) 
+            {
+                abstract = "(No abstract found for this article. Please use the link to the fulltext above.)";
+            }
+        docId = cloneString(row[0]);
+        printf("<P>%s</P>\n", row[3]);
+        printf("<A HREF=\"%s\"><B>%s</B></A>\n", row[1], row[2]);
+        printf("<P style=\"width:800px; font-size:80%%\">%s</P>\n", row[4]);
+        printf("<P style=\"width:800px; font-size:100%%\">%s</P>\n", abstract);
 	}
     sqlFreeResult(&sr);
     return docId;
 }
 
-void printSeqInfo(struct sqlConnection* conn, struct trackDb* tdb,
-    char* docId, char* item, char* seqName, int start)
+struct hash* getSeqIdHash(struct sqlConnection* conn, char* trackTable, char* docId, char *item, char* seqName, int start)
 {
-    /* print table of sequences */
-
-    /* get all sequences for paper identified by docId*/
-    char query[512];
-    char* sequenceTable = hashMustFindVal(tdb->settingsHash, "sequenceTable");
-    safef(query, sizeof(query), "SELECT concat_ws('|',seqId, sequence) FROM %s WHERE pmcId='%s'", sequenceTable, docId);
-    struct slName *seqList = sqlQuickList(conn, query);
-
-    /* get sequence-Ids for feature that was clicked on (item&startPos are unique) and put into hash */
+    /* get sequence-Ids for feature that was clicked (item&startPos are unique) and return as hash */
     // there must be an easier way to do this...
     // couldn't find a function that splits a string and converts it to a list
-    safef(query, sizeof(query), "SELECT seqIds,'' FROM t2g WHERE name='%s' "
-	"and chrom='%s' and chromStart=%d", item, seqName, start);
-    char* seqIdsString = sqlQuickString(conn, query);
-    char* seqIds[1024];
-    int partCount = chopString(seqIdsString, ",", seqIds, ArraySize(seqIds));
+    char query[512];
+    safef(query, sizeof(query), "SELECT seqIds,'' FROM %s WHERE name='%s' "
+        "and chrom='%s' and chromStart=%d", trackTable, item, seqName, start);
+    if (debug)
+        printf(query);
+    char* seqIdCoordString = sqlQuickString(conn, query);
+    char* seqIdCoords[1024];
+    int partCount = chopString(seqIdCoordString, ",", seqIdCoords, ArraySize(seqIdCoords));
     int i;
     struct hash *seqIdHash = NULL;
     seqIdHash = newHash(0);
     for (i=0; i<partCount; i++)
-	hashAdd(seqIdHash, seqIds[i], NULL);
-    freeMem(seqIdsString);
+    {
+        char* seqId[1024];
+        chopString(seqIdCoords[i], ":", seqId, ArraySize(seqId));
+        if (debug)
+            printf("%s, %s<br>", seqId[0], seqId[1]);
+        hashAdd(seqIdHash, seqId[0], seqId[1]);
+    }
+    freeMem(seqIdCoordString);
+    return seqIdHash;
+}
 
-    // output table
-    webNewSection("Sequences in article");
-    printf("<small>Sequences that map to the feature that was clicked "
-	"are highlighted in bold</small>");
-    webPrintLinkTableStart();
+void printSeqHeaders(bool debug, bool showDesc, bool isClickedSection) 
+{
+    printf("<TABLE style=\"background-color: #%s\" WIDTH=\"100%%\" CELLPADDING=\"2\">\n", HG_COL_BORDER);
+    printf("<TR style=\"background-color: #%s; color: #FFFFFF\">\n", HG_COL_TABLE_LABEL);
+    if (showDesc)
+        puts("  <TH style=\"width: 10%\">Article file</TH>\n");
+    puts("  <TH style=\"width: 70%\">Sequence (in bold) with flanking text</TH>\n");
+    if (debug)
+        puts("  <TH style=\"width: 30%\">Identifiers</TH>\n");
+
+    if (!isClickedSection && !debug)
+        puts("  <TH style=\"width: 20%\">Matches</TH>\n");
+    puts("</TR>\n");
+}
 
-    struct slName *listEl = seqList;
-    while (listEl != NULL)
+bool printSeqSection(char* docId, char* title, bool showDesc, struct sqlConnection* conn, struct hash* filterIdHash, bool isClickedSection, bool fasta)
+/* print a table of sequences, show only sequences with IDs in hash,
+ * */
         {
-        char* parts[2];
-        chopString(listEl->name, "|", parts, 2);
-        char* seqId    = parts[0];
-        char* seq      = parts[1];
+    // get data from mysql
+    char query[4096];
+    safef(query, sizeof(query), "SELECT fileDesc, snippet, locations, articleId,fileId, seqId, sequence FROM %s WHERE articleId='%s';", sequenceTable, docId);
+    if (debug)
+        printf(query);
+    struct sqlResult *sr = sqlGetResult(conn, query);
+
+    // construct title for section
+    char fullTitle[5000];
+    safef(fullTitle, sizeof(fullTitle), 
+    "%s&nbsp;<A HREF=\"../cgi-bin/hgc?%s&o=%s&t=%s&g=%s&i=%s&fasta=%d\"><SMALL>(switch fasta format)</SMALL></A>", 
+    title, cartSidUrlString(cart), cgiString("o"), cgiString("t"), cgiString("g"), cgiString("i"), 
+    !fasta);
+
+    webNewSection(fullTitle);
+
+    if (!fasta) 
+        printSeqHeaders(debug, showDesc, isClickedSection);
 
-        if (hashLookup(seqIdHash, seqId))
-            printf("<TD BGCOLOR=\"#%s\"><TT><B>%s</B></TT></TD>",
-		HG_COL_TABLE, seq);
+    char **row;
+    bool foundSkippedRows = false;
+    while ((row = sqlNextRow(sr)) != NULL)
+    {
+        char* fileDesc = row[0];
+        char* snippet  = row[1];
+        char* locList  = row[2];
+        char* artId    = row[3];
+        char* fileId   = row[4];
+        char* seqId    = row[5];
+        char* seq      = row[6];
+
+        // annotation (=sequence) ID is a 64 bit int with 10 digits for 
+        // article, 3 digits for file, 5 for annotation
+        char annotId[100];
+        safef(annotId, 100, "%010d%03d%05d", atoi(artId), atoi(fileId), atoi(seqId));
+
+        // only display this sequence if we're in the right section
+        if ((hashLookup(filterIdHash, annotId)==0) ^ !isClickedSection) {
+            foundSkippedRows = true;
+            continue;
+        }
+
+        if (fasta)
+        {
+            printf("<TT>>%s<BR>%s<BR></TT>", annotId, seq);
+        }
         else
-            printf("<TD style='background-color:#%s; color:#AAAAAA;'><TT>%s"
-		"</TT></TD>\n", HG_COL_TABLE, seq);
-        webPrintLinkTableNewRow();
-        listEl=listEl->next;
+        {
+            printf("<TR style=\"background-color: #%s\">\n", HG_COL_LOCAL_TABLE);
+            if (showDesc)
+                printf("<TD style=\"word-break: normal\">%s\n", fileDesc);
+            //printf("<TD><I>%s</I></TD>\n", snippet); 
+            printf("<TD style=\"word-break: normal\"><I>%s</I></TD>\n", snippet); 
+            if (debug) 
+            {
+                printf("<TD>article %s, file %s, seq %s, annotId %s", artId, fileId, seqId, annotId);
         }
-	webPrintLinkTableEnd();
-    printTrackHtml(tdb);
 
-    slFreeList(seqList);
-    freeHash(&seqIdHash);
+            // print links to locations 
+            if (!isClickedSection && !debug) {
+                struct slName *locs;
+                // format: hg19/chr1:300-400,mm9/chr1:60006-23234
+                // split on "," then split on "/"
+                locs = charSepToSlNames(locList, ',');
+                printf("<TD>");
+                if (locs==NULL)
+                    printf("No matches");
+                for ( ; locs!=NULL; locs = locs->next) 
+                {
+                    char* locString = locs->name;
+                    char* parts[2];
+                    int partCount;
+                    partCount = chopString(locString, "/", parts, ArraySize(parts));
+                    assert(partCount==2);
+                    char* db = parts[0];
+                    char* pos = parts[1];
+                    printf("<A HREF=\"../cgi-bin/hgTracks?%s&amp;db=%s&amp;position=%s\">%s (%s)</A>", cartSidUrlString(cart), db, pos, pos, db);
+                    printf("<BR>");
+                }
+                printf("</TD>\n");
+            }
+        printf("</TR>\n");
+        }
+	}
+    printf("</TR>\n");
+    webEndSectionTables();
+    sqlFreeResult(&sr);
+    return foundSkippedRows;
 }
 
-void doT2gDetails(struct trackDb *tdb, char *item)
-/* text2genome.org custom display */
+void printSeqInfo(struct sqlConnection* conn, char* trackTable,
+    char* docId, char* item, char* seqName, int start, bool fileDesc, bool fasta)
+    /* print sequences, split into two sections 
+     * two sections: one for sequences that were clicked, one for all others*/
+{
+    struct hash* clickedSeqs = getSeqIdHash(conn, trackTable, docId, item, seqName, start);
+
+    bool skippedRows;
+    skippedRows = printSeqSection(docId, "Sequences used to construct this feature", fileDesc, conn, clickedSeqs, 1, fasta);
+    if (skippedRows)
+        printSeqSection(docId, "Other Sequences in this article", fileDesc, conn, clickedSeqs, 0, fasta);
+    else
+        printf("<P>No more sequences found in this article<P>");
+    freeHash(&clickedSeqs);
+
+}
+
+void printTrackVersion(struct trackDb *tdb, struct sqlConnection* conn, char* item) 
 {
-int start = cgiInt("o");
-int end = cgiInt("t");
 char versionString[256];
 char dateReference[256];
 char headerTitle[512];
-struct sqlConnection *conn = hAllocConn(database);
-
 /* see if hgFixed.trackVersion exists */
 boolean trackVersionExists = hTableExists("hgFixed", "trackVersion");
 
 if (trackVersionExists)
     {
     char query[256];
     safef(query, sizeof(query), "select version,dateReference from hgFixed.trackVersion where db = '%s' AND name = 't2g' order by updateTime DESC limit 1", database);
     struct sqlResult *sr = sqlGetResult(conn, query);
     char **row;
 
     /* in case of NULL result from the table */
     versionString[0] = 0;
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	safef(versionString, sizeof(versionString), "version %s",
 		row[0]);
 	safef(dateReference, sizeof(dateReference), "%s",
 		row[1]);
 	}
     sqlFreeResult(&sr);
     }
 else
     {
     versionString[0] = 0;
     dateReference[0] = 0;
     }
 
 if (versionString[0])
     safef(headerTitle, sizeof(headerTitle), "%s - %s", item, versionString);
 else
     safef(headerTitle, sizeof(headerTitle), "%s", item);
 
 genericHeader(tdb, headerTitle);
+}
 
+void printPositionAndSize(int start, int end)
+{
 printf("<B>Position:</B>&nbsp;"
-           "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">",
+               "<A HREF=\"%s&amp;db=%s&amp;position=%s%%3A%d-%d\">",
                   hgTracksPathAndSettings(), database, seqName, start+1, end);
 char startBuf[64], endBuf[64];
 sprintLongWithCommas(startBuf, start + 1);
 sprintLongWithCommas(endBuf, end);
 printf("%s:%s-%s</A><BR>\n", seqName, startBuf, endBuf);
 long size = end - start;
 sprintLongWithCommas(startBuf, size);
 printf("<B>Genomic Size:</B>&nbsp;%s<BR>\n", startBuf);
+}
 
-char* docId = printArticleInfo(conn, tdb, item);
+void doT2gDetails(struct trackDb *tdb, char *item)
+/* text2genome.org custom display */
+{
 
+int start = cgiInt("o");
+int end = cgiInt("t");
+char* trackTable = cgiString("g");
+debug = cgiOptionalInt("debug", 0);
+fasta = cgiOptionalInt("fasta", 0);
+
+struct sqlConnection *conn = hAllocConn(database);
+printTrackVersion(tdb, conn, item);
+printPositionAndSize(start, end);
+
+sequenceTable = hashMustFindVal(tdb->settingsHash, "sequenceTable");
+articleTable = hashMustFindVal(tdb->settingsHash, "articleTable");
+
+char* docId = printArticleInfo(conn, item);
 if (docId!=0)
-    printSeqInfo(conn, tdb, docId, item, seqName, start);
+{
+    bool showDesc; 
+    showDesc = (! endsWith(trackTable, "Elsevier"));
+    printSeqInfo(conn, trackTable, docId, item, seqName, start, showDesc, fasta);
+}
+
+printTrackHtml(tdb);
 hFreeConn(&conn);
 }