72f056ceabce7b9778635a48def6ee9be5b29ebe
markd
  Fri Sep 21 03:05:03 2012 -0700
link GENCODE to APPRIS and HGNC
diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c
index 4dbbbc1..797710b 100644
--- src/hg/hgc/gencodeClick.c
+++ src/hg/hgc/gencodeClick.c
@@ -1,48 +1,72 @@
 /* gencodeClick - click handling for GENCODE tracks */
 #include "common.h"
 #include "hgc.h"
 #include "gencodeClick.h"
+#include "ccdsClick.h"
 #include "genePred.h"
 #include "genePredReader.h"
 #include "ensFace.h"
 #include "htmshell.h"
 #include "jksql.h"
 #include "encode/wgEncodeGencodeAttrs.h"
 #include "encode/wgEncodeGencodeGeneSource.h"
 #include "encode/wgEncodeGencodePdb.h"
 #include "encode/wgEncodeGencodePubMed.h"
 #include "encode/wgEncodeGencodeRefSeq.h"
 #include "encode/wgEncodeGencodeTag.h"
 #include "encode/wgEncodeGencodeTranscriptSource.h"
 #include "encode/wgEncodeGencodeTranscriptSupport.h"
 #include "encode/wgEncodeGencodeExonSupport.h"
 #include "encode/wgEncodeGencodeUniProt.h"
 #include "encode/wgEncodeGencodeAnnotationRemark.h"
 #include "encode/wgEncodeGencodeTranscriptionSupportLevel.h"
 
 /*
  * General notes:
  *  - this will be integrated into hgGene at some point, however this was
  *    done as part of hgc for timing reasons and to allow more time to design
  *    the hgGene part.
  *  - Tables below will output at least one row even if no data is available.
  *    
  */
 
-/* size for buffering URL strings */
-static const int urlBufSize = 512;
+/* Various URLs and URL templates.  At one time, these were in the ra file,
+ * but that didn't prove that helpful and end up requiring updated the ra
+ * files for every GENCODE version if a URL was added or changed. */
+//FIXME: clean up RA files when CGIs no longer need them
+static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/gencode_biotypes.html";
+static char *ensemblTranscriptIdUrl = "http://www.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=%s";
+static char *ensemblGeneIdUrl = "http://www.ensembl.org/Homo_sapiens/Gene/Summary?db=core;t=%s";
+static char *vegaTranscriptIdUrl = "http://vega.sanger.ac.uk/Homo_sapiens/Transcript/Summary?db=core;t=%s";
+static char *vegaGeneIdUrl = "http://vega.sanger.ac.uk/Homo_sapiens/Gene/Summary?db=core;g=%s";
+static char *yalePseudoUrl = "http://tables.pseudogene.org/%s";
+static char *hgncUrl = "http://www.genenames.org/data/hgnc_data.php?match=%s";
+static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s";
+static char *apprisHomeUrl = "http://appris.bioinfo.cnio.es/";
+static char *apprisGeneUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Gene_Id";
+static char *apprisTranscriptUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Transcript_Id";
+
+static char *getBaseAcc(char *acc, char *accBuf, int accBufSize)
+/* get the accession with version number dropped. */
+{
+safecpy(accBuf, accBufSize, acc);
+char *dot = strchr(accBuf, '.');
+if (dot != NULL)
+    *dot = '\0';
+return accBuf;
+}
 
 static char *getGencodeTable(struct trackDb *tdb, char *tableBase)
 /* get a table name from the settings. */
 {
 return trackDbRequiredSetting(tdb, tableBase);
 }
 
 static int transAnnoCmp(const void *va, const void *vb)
 /* Compare genePreds, sorting to keep select gene first.  The only cases
  * that annotations will be duplicated is if they are in the PAR and thus
  * on different chroms. */
 {
 const struct genePred *a = *((struct genePred **)va);
 const struct genePred *b = *((struct genePred **)vb);
 if (sameString(a->name, seqName))
@@ -136,116 +160,127 @@
 }
 
 static char *getSupportLevelDesc(struct wgEncodeGencodeTranscriptionSupportLevel *tsl)
 /* return description for level */
 {
 static char buf[32];
 if ((tsl == NULL) || (tsl->level <= 0))
     return "tslNA";
 else
     {
     safef(buf, sizeof(buf), "tsl%d", tsl->level);
     return buf;
     }
 }
 
-
-static char *mkExtIdUrl(struct trackDb *tdb,  char *id, char *settingName, char *urlBuf)
-/* generate a url to a external database given an id and the name of a setting
- * containing the sprintf URL template.*/
-{
-safef(urlBuf, urlBufSize, trackDbRequiredSetting(tdb, settingName), id);
-return urlBuf;
-}
-
-static void prExtIdAnchor(struct trackDb *tdb,  char *id, char *settingName)
+static void prExtIdAnchor(char *id, char *urlTemplate)
 /* if an id to an external database is not empty, print an HTML anchor to it */
 {
-char urlBuf[urlBufSize];
 if (!isEmpty(id))
-    printf("<a href=\"%s\" target=_blank>%s</a>", mkExtIdUrl(tdb, id, settingName, urlBuf), id);
+    {
+    char urlBuf[512];
+    safef(urlBuf, sizeof(urlBuf), urlTemplate, id);
+    printf("<a href=\"%s\" target=_blank>%s</a>", urlBuf, id);
+    }
 }
 
-static void prTdExtIdAnchor(struct trackDb *tdb,  char *id, char *settingName)
+static void prTdExtIdAnchor(char *id, char *urlTemplate)
 /* print a table data element with an anchor for a id */
 {
 printf("<td>");
-prExtIdAnchor(tdb, id, settingName);
+prExtIdAnchor(id, urlTemplate);
 }
 
-
 static void writePosLink(char *chrom, int chromStart, int chromEnd)
 /* write link to a genomic position */
 {
 printf("<a href=\"%s&db=%s&position=%s%%3A%d-%d\" target=_blank>%s:%d-%d</A>",
        hgTracksPathAndSettings(), database,
        chrom, chromStart, chromEnd, chrom, chromStart+1, chromEnd);
 }
 
 static void writeBasicInfoHtml(struct trackDb *tdb, char *gencodeId, struct genePred *transAnno, struct wgEncodeGencodeAttrs *transAttrs,
                                int geneChromStart, int geneChromEnd,
                                struct wgEncodeGencodeGeneSource *geneSource, struct wgEncodeGencodeTranscriptSource *transcriptSource,
                                bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl)
 /* write basic HTML info for all genes */
 {
 /*
  * notes:
  *   - According to Steve: `status' is not the same for ensembl and havana.  So either avoid displaying it
  *     or display it as `automatic status' or `manual status'.
  */
 
 // basic gene and transcript information
 printf("<table class=\"hgcCcds\" style=\"white-space: nowrap;\"><thead>\n");
 printf("<tr><th><th>Transcript<th>Gene</tr>\n");
 printf("</thead><tbody>\n");
 
 printf("<tr><th>Gencode id");
-prTdExtIdAnchor(tdb, transAttrs->transcriptId, "ensemblTranscriptIdUrl");
-prTdExtIdAnchor(tdb, transAttrs->geneId, "ensemblGeneIdUrl");
+prTdExtIdAnchor(transAttrs->transcriptId, ensemblTranscriptIdUrl);
+prTdExtIdAnchor(transAttrs->geneId, ensemblGeneIdUrl);
 printf("</tr>\n");
 
 printf("<tr><th>HAVANA manual id");
-prTdExtIdAnchor(tdb, transAttrs->havanaTranscriptId, "vegaTranscriptIdUrl");
-prTdExtIdAnchor(tdb, transAttrs->havanaGeneId, "vegaGeneIdUrl");
+prTdExtIdAnchor(transAttrs->havanaTranscriptId, vegaTranscriptIdUrl);
+prTdExtIdAnchor(transAttrs->havanaGeneId, vegaGeneIdUrl);
 printf("</tr>\n");
 
 printf("<tr><th>Position");
 printf("<td>");
 writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd);
 printf("<td>");
 writePosLink(transAnno->chrom, geneChromStart, geneChromEnd);
 printf("</tr>\n");
 
 printf("<tr><th>Strand<td>%s<td></tr>\n", transAnno->strand);
 
-printf("<tr><th><a href=\"http://www.gencodegenes.org/gencode_biotypes.html\">Biotype</a><td>%s<td>%s</tr>\n", transAttrs->transcriptType, transAttrs->geneType);
-/* FIXME: add href o */
+printf("<tr><th><a href=\"%s\">Biotype</a><td>%s<td>%s</tr>\n", gencodeBiotypesUrl, transAttrs->transcriptType, transAttrs->geneType);
+
 printf("<tr><th>Status<td>%s<td>%s</tr>\n", transAttrs->transcriptStatus, transAttrs->geneStatus);
+
 printf("<tr><th>Annotation Level<td>%s (%d)<td></tr>\n", getLevelDesc(transAttrs->level), transAttrs->level);
+
 printf("<tr><th>Annotation Method<td>%s<td>%s</tr>\n", getMethodDesc(transcriptSource->source), getMethodDesc(geneSource->source));
+
 if (haveTsl)
     {
     char *tslDesc = getSupportLevelDesc(tsl);
     printf("<tr><th><a href=\"#tsl\">Transcription Support Level</a><td><a href=\"#%s\">%s</a><td></tr>\n", tslDesc, tslDesc);
     }
-printf("<tr><th>HUGO gene<td colspan=2>%s</tr>\n", transAttrs->geneName);
-printf("<tr><th>CCDS<td>%s<td></tr>\n", transAttrs->ccdsId);
+printf("<tr><th>HGNC gene symbol<td colspan=2>");
+prExtIdAnchor(transAttrs->geneName, hgncUrl);
+printf("</tr>\n");
+
+printf("<tr><th>CCDS<td>");
+if (!isEmpty(transAttrs->ccdsId))
+    {
+    printf("<a href=\"");
+    printCcdsExtUrl(transAttrs->ccdsId);
+    printf("\" target=_blank>%s</a><td></tr>\n", transAttrs->ccdsId);
+    }
+
 printf("<tr><th>GeneCards<td colspan=2>");
-if (!isEmpty(transAttrs->geneName))
-    printf("<a href = \"http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s\" TARGET=_blank>%s</A>\n",
-	   transAttrs->geneName, transAttrs->geneName);
+prExtIdAnchor(transAttrs->geneName, geneCardsUrl);
 printf("</tr>\n");
+
+printf("<tr><th><a href=\"%s\" target=_blank>APPRIS</a>\n", apprisHomeUrl);
+char accBuf[64];
+prTdExtIdAnchor(getBaseAcc(transAttrs->transcriptId, accBuf, sizeof(accBuf)), apprisGeneUrl);
+prTdExtIdAnchor(getBaseAcc(transAttrs->geneId, accBuf, sizeof(accBuf)), apprisTranscriptUrl);
+printf("</tr>\n");
+
 // FIXME: add sequence here??
 printf("</tbody></table>\n");
 }
 
 static void writeSequenceHtml(struct trackDb *tdb, char *gencodeId, struct genePred *transAnno)
 /* write links to get sequences */
 {
 printf("<table class=\"hgcCcds\"><thead>\n");
 printf("<tr><th colspan=\"2\">Sequences</tr>\n");
 printf("</thead><tbody>\n");
 if (transAnno->cdsStart < transAnno->cdsEnd)
     {
     // protein coding
     printf("<tr><td width=\"50%%\">");
     hgcAnchorSomewhere("htcGeneMrna", gencodeId, tdb->table, seqName);
@@ -642,31 +677,31 @@
 wgEncodeGencodeRefSeqFreeList(&refSeqs);
 wgEncodeGencodeTranscriptSupportFreeList(&transcriptSupports);
 wgEncodeGencodeExonSupportFreeList(&exonSupports);
 wgEncodeGencodeUniProtFreeList(&uniProts);
 wgEncodeGencodeTranscriptionSupportLevelFreeList(&tsl);
 }
 
 static void doGencodeGene2WayPseudo(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *pseudoAnno)
 /* Process click on a GENCODE two-way pseudogene annotation track. */
 {
 char header[256];
 safef(header, sizeof(header), "GENCODE 2-way consensus pseudogene %s", gencodeId);
 cartWebStart(cart, database, "%s", header);
 printf("<H2>%s</H2>\n", header);
 printf("<b>Yale id:</b> ");
-prExtIdAnchor(tdb, gencodeId, "yalePseudoUrl");
+prExtIdAnchor(gencodeId, yalePseudoUrl);
 printf("<br>");
 printPos(pseudoAnno->chrom, pseudoAnno->txStart, pseudoAnno->txEnd, pseudoAnno->strand, FALSE, NULL);
 }
 
 static void doGencodeGenePolyA(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *polyAAnno)
 /* Process click on a GENCODE poly-A annotation track. */
 {
 char header[256];
 safef(header, sizeof(header), "GENCODE PolyA Annotation %s (%s)", polyAAnno->name2, gencodeId);
 cartWebStart(cart, database, "%s", header);
 printf("<H2>%s</H2>\n", header);
 printf("<b>Annotation id:</b> %s<br>", gencodeId);
 printf("<b>Annotation Type:</b> %s<br>",polyAAnno->name2);
 printPos(polyAAnno->chrom, polyAAnno->txStart, polyAAnno->txEnd, polyAAnno->strand, FALSE, NULL);
 }