72f056ceabce7b9778635a48def6ee9be5b29ebe markd Fri Sep 21 03:05:03 2012 -0700 link GENCODE to APPRIS and HGNC diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c index 4dbbbc1..797710b 100644 --- src/hg/hgc/gencodeClick.c +++ src/hg/hgc/gencodeClick.c @@ -1,48 +1,72 @@ /* gencodeClick - click handling for GENCODE tracks */ #include "common.h" #include "hgc.h" #include "gencodeClick.h" +#include "ccdsClick.h" #include "genePred.h" #include "genePredReader.h" #include "ensFace.h" #include "htmshell.h" #include "jksql.h" #include "encode/wgEncodeGencodeAttrs.h" #include "encode/wgEncodeGencodeGeneSource.h" #include "encode/wgEncodeGencodePdb.h" #include "encode/wgEncodeGencodePubMed.h" #include "encode/wgEncodeGencodeRefSeq.h" #include "encode/wgEncodeGencodeTag.h" #include "encode/wgEncodeGencodeTranscriptSource.h" #include "encode/wgEncodeGencodeTranscriptSupport.h" #include "encode/wgEncodeGencodeExonSupport.h" #include "encode/wgEncodeGencodeUniProt.h" #include "encode/wgEncodeGencodeAnnotationRemark.h" #include "encode/wgEncodeGencodeTranscriptionSupportLevel.h" /* * General notes: * - this will be integrated into hgGene at some point, however this was * done as part of hgc for timing reasons and to allow more time to design * the hgGene part. * - Tables below will output at least one row even if no data is available. * */ -/* size for buffering URL strings */ -static const int urlBufSize = 512; +/* Various URLs and URL templates. At one time, these were in the ra file, + * but that didn't prove that helpful and end up requiring updated the ra + * files for every GENCODE version if a URL was added or changed. */ +//FIXME: clean up RA files when CGIs no longer need them +static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/gencode_biotypes.html"; +static char *ensemblTranscriptIdUrl = "http://www.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=%s"; +static char *ensemblGeneIdUrl = "http://www.ensembl.org/Homo_sapiens/Gene/Summary?db=core;t=%s"; +static char *vegaTranscriptIdUrl = "http://vega.sanger.ac.uk/Homo_sapiens/Transcript/Summary?db=core;t=%s"; +static char *vegaGeneIdUrl = "http://vega.sanger.ac.uk/Homo_sapiens/Gene/Summary?db=core;g=%s"; +static char *yalePseudoUrl = "http://tables.pseudogene.org/%s"; +static char *hgncUrl = "http://www.genenames.org/data/hgnc_data.php?match=%s"; +static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s"; +static char *apprisHomeUrl = "http://appris.bioinfo.cnio.es/"; +static char *apprisGeneUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Gene_Id"; +static char *apprisTranscriptUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Transcript_Id"; + +static char *getBaseAcc(char *acc, char *accBuf, int accBufSize) +/* get the accession with version number dropped. */ +{ +safecpy(accBuf, accBufSize, acc); +char *dot = strchr(accBuf, '.'); +if (dot != NULL) + *dot = '\0'; +return accBuf; +} static char *getGencodeTable(struct trackDb *tdb, char *tableBase) /* get a table name from the settings. */ { return trackDbRequiredSetting(tdb, tableBase); } static int transAnnoCmp(const void *va, const void *vb) /* Compare genePreds, sorting to keep select gene first. The only cases * that annotations will be duplicated is if they are in the PAR and thus * on different chroms. */ { const struct genePred *a = *((struct genePred **)va); const struct genePred *b = *((struct genePred **)vb); if (sameString(a->name, seqName)) @@ -136,116 +160,127 @@ } static char *getSupportLevelDesc(struct wgEncodeGencodeTranscriptionSupportLevel *tsl) /* return description for level */ { static char buf[32]; if ((tsl == NULL) || (tsl->level <= 0)) return "tslNA"; else { safef(buf, sizeof(buf), "tsl%d", tsl->level); return buf; } } - -static char *mkExtIdUrl(struct trackDb *tdb, char *id, char *settingName, char *urlBuf) -/* generate a url to a external database given an id and the name of a setting - * containing the sprintf URL template.*/ -{ -safef(urlBuf, urlBufSize, trackDbRequiredSetting(tdb, settingName), id); -return urlBuf; -} - -static void prExtIdAnchor(struct trackDb *tdb, char *id, char *settingName) +static void prExtIdAnchor(char *id, char *urlTemplate) /* if an id to an external database is not empty, print an HTML anchor to it */ { -char urlBuf[urlBufSize]; if (!isEmpty(id)) - printf("<a href=\"%s\" target=_blank>%s</a>", mkExtIdUrl(tdb, id, settingName, urlBuf), id); + { + char urlBuf[512]; + safef(urlBuf, sizeof(urlBuf), urlTemplate, id); + printf("<a href=\"%s\" target=_blank>%s</a>", urlBuf, id); + } } -static void prTdExtIdAnchor(struct trackDb *tdb, char *id, char *settingName) +static void prTdExtIdAnchor(char *id, char *urlTemplate) /* print a table data element with an anchor for a id */ { printf("<td>"); -prExtIdAnchor(tdb, id, settingName); +prExtIdAnchor(id, urlTemplate); } - static void writePosLink(char *chrom, int chromStart, int chromEnd) /* write link to a genomic position */ { printf("<a href=\"%s&db=%s&position=%s%%3A%d-%d\" target=_blank>%s:%d-%d</A>", hgTracksPathAndSettings(), database, chrom, chromStart, chromEnd, chrom, chromStart+1, chromEnd); } static void writeBasicInfoHtml(struct trackDb *tdb, char *gencodeId, struct genePred *transAnno, struct wgEncodeGencodeAttrs *transAttrs, int geneChromStart, int geneChromEnd, struct wgEncodeGencodeGeneSource *geneSource, struct wgEncodeGencodeTranscriptSource *transcriptSource, bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl) /* write basic HTML info for all genes */ { /* * notes: * - According to Steve: `status' is not the same for ensembl and havana. So either avoid displaying it * or display it as `automatic status' or `manual status'. */ // basic gene and transcript information printf("<table class=\"hgcCcds\" style=\"white-space: nowrap;\"><thead>\n"); printf("<tr><th><th>Transcript<th>Gene</tr>\n"); printf("</thead><tbody>\n"); printf("<tr><th>Gencode id"); -prTdExtIdAnchor(tdb, transAttrs->transcriptId, "ensemblTranscriptIdUrl"); -prTdExtIdAnchor(tdb, transAttrs->geneId, "ensemblGeneIdUrl"); +prTdExtIdAnchor(transAttrs->transcriptId, ensemblTranscriptIdUrl); +prTdExtIdAnchor(transAttrs->geneId, ensemblGeneIdUrl); printf("</tr>\n"); printf("<tr><th>HAVANA manual id"); -prTdExtIdAnchor(tdb, transAttrs->havanaTranscriptId, "vegaTranscriptIdUrl"); -prTdExtIdAnchor(tdb, transAttrs->havanaGeneId, "vegaGeneIdUrl"); +prTdExtIdAnchor(transAttrs->havanaTranscriptId, vegaTranscriptIdUrl); +prTdExtIdAnchor(transAttrs->havanaGeneId, vegaGeneIdUrl); printf("</tr>\n"); printf("<tr><th>Position"); printf("<td>"); writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd); printf("<td>"); writePosLink(transAnno->chrom, geneChromStart, geneChromEnd); printf("</tr>\n"); printf("<tr><th>Strand<td>%s<td></tr>\n", transAnno->strand); -printf("<tr><th><a href=\"http://www.gencodegenes.org/gencode_biotypes.html\">Biotype</a><td>%s<td>%s</tr>\n", transAttrs->transcriptType, transAttrs->geneType); -/* FIXME: add href o */ +printf("<tr><th><a href=\"%s\">Biotype</a><td>%s<td>%s</tr>\n", gencodeBiotypesUrl, transAttrs->transcriptType, transAttrs->geneType); + printf("<tr><th>Status<td>%s<td>%s</tr>\n", transAttrs->transcriptStatus, transAttrs->geneStatus); + printf("<tr><th>Annotation Level<td>%s (%d)<td></tr>\n", getLevelDesc(transAttrs->level), transAttrs->level); + printf("<tr><th>Annotation Method<td>%s<td>%s</tr>\n", getMethodDesc(transcriptSource->source), getMethodDesc(geneSource->source)); + if (haveTsl) { char *tslDesc = getSupportLevelDesc(tsl); printf("<tr><th><a href=\"#tsl\">Transcription Support Level</a><td><a href=\"#%s\">%s</a><td></tr>\n", tslDesc, tslDesc); } -printf("<tr><th>HUGO gene<td colspan=2>%s</tr>\n", transAttrs->geneName); -printf("<tr><th>CCDS<td>%s<td></tr>\n", transAttrs->ccdsId); +printf("<tr><th>HGNC gene symbol<td colspan=2>"); +prExtIdAnchor(transAttrs->geneName, hgncUrl); +printf("</tr>\n"); + +printf("<tr><th>CCDS<td>"); +if (!isEmpty(transAttrs->ccdsId)) + { + printf("<a href=\""); + printCcdsExtUrl(transAttrs->ccdsId); + printf("\" target=_blank>%s</a><td></tr>\n", transAttrs->ccdsId); + } + printf("<tr><th>GeneCards<td colspan=2>"); -if (!isEmpty(transAttrs->geneName)) - printf("<a href = \"http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s\" TARGET=_blank>%s</A>\n", - transAttrs->geneName, transAttrs->geneName); +prExtIdAnchor(transAttrs->geneName, geneCardsUrl); printf("</tr>\n"); + +printf("<tr><th><a href=\"%s\" target=_blank>APPRIS</a>\n", apprisHomeUrl); +char accBuf[64]; +prTdExtIdAnchor(getBaseAcc(transAttrs->transcriptId, accBuf, sizeof(accBuf)), apprisGeneUrl); +prTdExtIdAnchor(getBaseAcc(transAttrs->geneId, accBuf, sizeof(accBuf)), apprisTranscriptUrl); +printf("</tr>\n"); + // FIXME: add sequence here?? printf("</tbody></table>\n"); } static void writeSequenceHtml(struct trackDb *tdb, char *gencodeId, struct genePred *transAnno) /* write links to get sequences */ { printf("<table class=\"hgcCcds\"><thead>\n"); printf("<tr><th colspan=\"2\">Sequences</tr>\n"); printf("</thead><tbody>\n"); if (transAnno->cdsStart < transAnno->cdsEnd) { // protein coding printf("<tr><td width=\"50%%\">"); hgcAnchorSomewhere("htcGeneMrna", gencodeId, tdb->table, seqName); @@ -642,31 +677,31 @@ wgEncodeGencodeRefSeqFreeList(&refSeqs); wgEncodeGencodeTranscriptSupportFreeList(&transcriptSupports); wgEncodeGencodeExonSupportFreeList(&exonSupports); wgEncodeGencodeUniProtFreeList(&uniProts); wgEncodeGencodeTranscriptionSupportLevelFreeList(&tsl); } static void doGencodeGene2WayPseudo(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *pseudoAnno) /* Process click on a GENCODE two-way pseudogene annotation track. */ { char header[256]; safef(header, sizeof(header), "GENCODE 2-way consensus pseudogene %s", gencodeId); cartWebStart(cart, database, "%s", header); printf("<H2>%s</H2>\n", header); printf("<b>Yale id:</b> "); -prExtIdAnchor(tdb, gencodeId, "yalePseudoUrl"); +prExtIdAnchor(gencodeId, yalePseudoUrl); printf("<br>"); printPos(pseudoAnno->chrom, pseudoAnno->txStart, pseudoAnno->txEnd, pseudoAnno->strand, FALSE, NULL); } static void doGencodeGenePolyA(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *polyAAnno) /* Process click on a GENCODE poly-A annotation track. */ { char header[256]; safef(header, sizeof(header), "GENCODE PolyA Annotation %s (%s)", polyAAnno->name2, gencodeId); cartWebStart(cart, database, "%s", header); printf("<H2>%s</H2>\n", header); printf("<b>Annotation id:</b> %s<br>", gencodeId); printf("<b>Annotation Type:</b> %s<br>",polyAAnno->name2); printPos(polyAAnno->chrom, polyAAnno->txStart, polyAAnno->txEnd, polyAAnno->strand, FALSE, NULL); }