72f056ceabce7b9778635a48def6ee9be5b29ebe markd Fri Sep 21 03:05:03 2012 -0700 link GENCODE to APPRIS and HGNC diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c index 4dbbbc1..797710b 100644 --- src/hg/hgc/gencodeClick.c +++ src/hg/hgc/gencodeClick.c @@ -1,48 +1,72 @@ /* gencodeClick - click handling for GENCODE tracks */ #include "common.h" #include "hgc.h" #include "gencodeClick.h" +#include "ccdsClick.h" #include "genePred.h" #include "genePredReader.h" #include "ensFace.h" #include "htmshell.h" #include "jksql.h" #include "encode/wgEncodeGencodeAttrs.h" #include "encode/wgEncodeGencodeGeneSource.h" #include "encode/wgEncodeGencodePdb.h" #include "encode/wgEncodeGencodePubMed.h" #include "encode/wgEncodeGencodeRefSeq.h" #include "encode/wgEncodeGencodeTag.h" #include "encode/wgEncodeGencodeTranscriptSource.h" #include "encode/wgEncodeGencodeTranscriptSupport.h" #include "encode/wgEncodeGencodeExonSupport.h" #include "encode/wgEncodeGencodeUniProt.h" #include "encode/wgEncodeGencodeAnnotationRemark.h" #include "encode/wgEncodeGencodeTranscriptionSupportLevel.h" /* * General notes: * - this will be integrated into hgGene at some point, however this was * done as part of hgc for timing reasons and to allow more time to design * the hgGene part. * - Tables below will output at least one row even if no data is available. * */ -/* size for buffering URL strings */ -static const int urlBufSize = 512; +/* Various URLs and URL templates. At one time, these were in the ra file, + * but that didn't prove that helpful and end up requiring updated the ra + * files for every GENCODE version if a URL was added or changed. */ +//FIXME: clean up RA files when CGIs no longer need them +static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/gencode_biotypes.html"; +static char *ensemblTranscriptIdUrl = "http://www.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=%s"; +static char *ensemblGeneIdUrl = "http://www.ensembl.org/Homo_sapiens/Gene/Summary?db=core;t=%s"; +static char *vegaTranscriptIdUrl = "http://vega.sanger.ac.uk/Homo_sapiens/Transcript/Summary?db=core;t=%s"; +static char *vegaGeneIdUrl = "http://vega.sanger.ac.uk/Homo_sapiens/Gene/Summary?db=core;g=%s"; +static char *yalePseudoUrl = "http://tables.pseudogene.org/%s"; +static char *hgncUrl = "http://www.genenames.org/data/hgnc_data.php?match=%s"; +static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s"; +static char *apprisHomeUrl = "http://appris.bioinfo.cnio.es/"; +static char *apprisGeneUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Gene_Id"; +static char *apprisTranscriptUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Transcript_Id"; + +static char *getBaseAcc(char *acc, char *accBuf, int accBufSize) +/* get the accession with version number dropped. */ +{ +safecpy(accBuf, accBufSize, acc); +char *dot = strchr(accBuf, '.'); +if (dot != NULL) + *dot = '\0'; +return accBuf; +} static char *getGencodeTable(struct trackDb *tdb, char *tableBase) /* get a table name from the settings. */ { return trackDbRequiredSetting(tdb, tableBase); } static int transAnnoCmp(const void *va, const void *vb) /* Compare genePreds, sorting to keep select gene first. The only cases * that annotations will be duplicated is if they are in the PAR and thus * on different chroms. */ { const struct genePred *a = *((struct genePred **)va); const struct genePred *b = *((struct genePred **)vb); if (sameString(a->name, seqName)) @@ -136,116 +160,127 @@ } static char *getSupportLevelDesc(struct wgEncodeGencodeTranscriptionSupportLevel *tsl) /* return description for level */ { static char buf[32]; if ((tsl == NULL) || (tsl->level <= 0)) return "tslNA"; else { safef(buf, sizeof(buf), "tsl%d", tsl->level); return buf; } } - -static char *mkExtIdUrl(struct trackDb *tdb, char *id, char *settingName, char *urlBuf) -/* generate a url to a external database given an id and the name of a setting - * containing the sprintf URL template.*/ -{ -safef(urlBuf, urlBufSize, trackDbRequiredSetting(tdb, settingName), id); -return urlBuf; -} - -static void prExtIdAnchor(struct trackDb *tdb, char *id, char *settingName) +static void prExtIdAnchor(char *id, char *urlTemplate) /* if an id to an external database is not empty, print an HTML anchor to it */ { -char urlBuf[urlBufSize]; if (!isEmpty(id)) - printf("%s", mkExtIdUrl(tdb, id, settingName, urlBuf), id); + { + char urlBuf[512]; + safef(urlBuf, sizeof(urlBuf), urlTemplate, id); + printf("%s", urlBuf, id); + } } -static void prTdExtIdAnchor(struct trackDb *tdb, char *id, char *settingName) +static void prTdExtIdAnchor(char *id, char *urlTemplate) /* print a table data element with an anchor for a id */ { printf(""); -prExtIdAnchor(tdb, id, settingName); +prExtIdAnchor(id, urlTemplate); } - static void writePosLink(char *chrom, int chromStart, int chromEnd) /* write link to a genomic position */ { printf("%s:%d-%d", hgTracksPathAndSettings(), database, chrom, chromStart, chromEnd, chrom, chromStart+1, chromEnd); } static void writeBasicInfoHtml(struct trackDb *tdb, char *gencodeId, struct genePred *transAnno, struct wgEncodeGencodeAttrs *transAttrs, int geneChromStart, int geneChromEnd, struct wgEncodeGencodeGeneSource *geneSource, struct wgEncodeGencodeTranscriptSource *transcriptSource, bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl) /* write basic HTML info for all genes */ { /* * notes: * - According to Steve: `status' is not the same for ensembl and havana. So either avoid displaying it * or display it as `automatic status' or `manual status'. */ // basic gene and transcript information printf("\n"); printf("\n"); printf("\n"); printf("\n"); printf("\n"); printf("\n"); printf("\n", transAnno->strand); -printf("\n", transAttrs->transcriptType, transAttrs->geneType); -/* FIXME: add href o */ +printf("\n", gencodeBiotypesUrl, transAttrs->transcriptType, transAttrs->geneType); + printf("\n", transAttrs->transcriptStatus, transAttrs->geneStatus); + printf("\n", getLevelDesc(transAttrs->level), transAttrs->level); + printf("\n", getMethodDesc(transcriptSource->source), getMethodDesc(geneSource->source)); + if (haveTsl) { char *tslDesc = getSupportLevelDesc(tsl); printf("\n", tslDesc, tslDesc); } -printf("\n", transAttrs->geneName); -printf("\n", transAttrs->ccdsId); +printf("\n"); + +printf("\n", transAttrs->ccdsId); + } + printf("\n"); + +printf("\n"); + // FIXME: add sequence here?? printf("
TranscriptGene
Gencode id"); -prTdExtIdAnchor(tdb, transAttrs->transcriptId, "ensemblTranscriptIdUrl"); -prTdExtIdAnchor(tdb, transAttrs->geneId, "ensemblGeneIdUrl"); +prTdExtIdAnchor(transAttrs->transcriptId, ensemblTranscriptIdUrl); +prTdExtIdAnchor(transAttrs->geneId, ensemblGeneIdUrl); printf("
HAVANA manual id"); -prTdExtIdAnchor(tdb, transAttrs->havanaTranscriptId, "vegaTranscriptIdUrl"); -prTdExtIdAnchor(tdb, transAttrs->havanaGeneId, "vegaGeneIdUrl"); +prTdExtIdAnchor(transAttrs->havanaTranscriptId, vegaTranscriptIdUrl); +prTdExtIdAnchor(transAttrs->havanaGeneId, vegaGeneIdUrl); printf("
Position"); printf(""); writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd); printf(""); writePosLink(transAnno->chrom, geneChromStart, geneChromEnd); printf("
Strand%s
Biotype%s%s
Biotype%s%s
Status%s%s
Annotation Level%s (%d)
Annotation Method%s%s
Transcription Support Level%s
HUGO gene%s
CCDS%s
HGNC gene symbol"); +prExtIdAnchor(transAttrs->geneName, hgncUrl); +printf("
CCDS"); +if (!isEmpty(transAttrs->ccdsId)) + { + printf("ccdsId); + printf("\" target=_blank>%s
GeneCards"); -if (!isEmpty(transAttrs->geneName)) - printf("%s\n", - transAttrs->geneName, transAttrs->geneName); +prExtIdAnchor(transAttrs->geneName, geneCardsUrl); printf("
APPRIS\n", apprisHomeUrl); +char accBuf[64]; +prTdExtIdAnchor(getBaseAcc(transAttrs->transcriptId, accBuf, sizeof(accBuf)), apprisGeneUrl); +prTdExtIdAnchor(getBaseAcc(transAttrs->geneId, accBuf, sizeof(accBuf)), apprisTranscriptUrl); +printf("
\n"); } static void writeSequenceHtml(struct trackDb *tdb, char *gencodeId, struct genePred *transAnno) /* write links to get sequences */ { printf("\n"); printf("\n"); printf("\n"); if (transAnno->cdsStart < transAnno->cdsEnd) { // protein coding printf("
Sequences
"); hgcAnchorSomewhere("htcGeneMrna", gencodeId, tdb->table, seqName); @@ -642,31 +677,31 @@ wgEncodeGencodeRefSeqFreeList(&refSeqs); wgEncodeGencodeTranscriptSupportFreeList(&transcriptSupports); wgEncodeGencodeExonSupportFreeList(&exonSupports); wgEncodeGencodeUniProtFreeList(&uniProts); wgEncodeGencodeTranscriptionSupportLevelFreeList(&tsl); } static void doGencodeGene2WayPseudo(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *pseudoAnno) /* Process click on a GENCODE two-way pseudogene annotation track. */ { char header[256]; safef(header, sizeof(header), "GENCODE 2-way consensus pseudogene %s", gencodeId); cartWebStart(cart, database, "%s", header); printf("

%s

\n", header); printf("Yale id: "); -prExtIdAnchor(tdb, gencodeId, "yalePseudoUrl"); +prExtIdAnchor(gencodeId, yalePseudoUrl); printf("
"); printPos(pseudoAnno->chrom, pseudoAnno->txStart, pseudoAnno->txEnd, pseudoAnno->strand, FALSE, NULL); } static void doGencodeGenePolyA(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *polyAAnno) /* Process click on a GENCODE poly-A annotation track. */ { char header[256]; safef(header, sizeof(header), "GENCODE PolyA Annotation %s (%s)", polyAAnno->name2, gencodeId); cartWebStart(cart, database, "%s", header); printf("

%s

\n", header); printf("Annotation id: %s
", gencodeId); printf("Annotation Type: %s
",polyAAnno->name2); printPos(polyAAnno->chrom, polyAAnno->txStart, polyAAnno->txEnd, polyAAnno->strand, FALSE, NULL); }