5899cbd4120fc82ba0939dce519582841db961f7 markd Thu Mar 8 11:38:07 2018 -0800 Add support for GENCODE protein id to wgEncodeGencodeAttrs* table. For backwards compatibility with existing tables, this was added as the last column of the table and the code checks for it's presences. RM 21077 diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c index 634dc46..2496784 100644 --- src/hg/hgc/gencodeClick.c +++ src/hg/hgc/gencodeClick.c @@ -30,30 +30,31 @@ * - this will be integrated into hgGene at some point, however this was * done as part of hgc for timing reasons and to allow more time to design * the hgGene part. * - Tables below will output at least one row even if no data is available. * */ /* Various URLs and URL templates. At one time, these were in the ra file, * but that didn't prove that helpful and end up requiring updated the ra * files for every GENCODE version if a URL was added or changed. */ //FIXME: clean up RA files when CGIs no longer need them static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/gencode_biotypes.html"; static char *gencodeTagsUrl = "http://www.gencodegenes.org/gencode_tags.html"; static char *ensemblTranscriptIdUrl = "http://www.ensembl.org/%s/Transcript/Summary?db=core;t=%s"; static char *ensemblGeneIdUrl = "http://www.ensembl.org/%s/Gene/Summary?db=core;t=%s"; +static char *ensemblProteinIdUrl = "http://www.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s"; static char *ensemblSupportingEvidUrl = "http://www.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s"; static char *vegaTranscriptIdUrl = "http://vega.sanger.ac.uk/%s/Transcript/Summary?db=core;t=%s"; static char *vegaGeneIdUrl = "http://vega.sanger.ac.uk/%s/Gene/Summary?db=core;g=%s"; static char *yalePseudoUrl = "http://tables.pseudogene.org/%s"; static char *hgncUrl = "http://www.genenames.org/data/hgnc_data.php?match=%s"; static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s"; static char *apprisHomeUrl = "http://appris.bioinfo.cnio.es/"; static char *apprisGeneUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Gene_Id&specie=%s"; static char *apprisTranscriptUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Transcript_Id&specie=%s"; static char *getBaseAcc(char *acc, char *accBuf, int accBufSize) /* get the accession with version number dropped. */ { safecpy(accBuf, accBufSize, acc); char *dot = strchr(accBuf, '.'); @@ -103,35 +104,44 @@ } static struct genePred *transAnnoLoad(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId) /* load the gencode annotations and sort the one corresponding to the one that was clicked on is * first. Should only have one or two. */ { // must check chrom due to PAR char where[256]; sqlSafefFrag(where, sizeof(where), "(chrom = \"%s\") and (name = \"%s\")", seqName, gencodeId); struct genePred *transAnno = genePredReaderLoadQuery(conn, tdb->track, where); slSort(&transAnno, transAnnoCmp); return transAnno; } static struct wgEncodeGencodeAttrs *transAttrsLoad(struct trackDb *tdb, struct sqlConnection *conn, char *gencodeId) -/* load the gencode class information */ +/* load the gencode attributes */ { -return sqlQueryObjs(conn, (sqlLoadFunc)wgEncodeGencodeAttrsLoad, sqlQuerySingle|sqlQueryMust, - "select * from %s where transcriptId = \"%s\"", +char query[1024]; +sqlSafef(query, sizeof(query), "select * from %s where transcriptId = \"%s\"", getGencodeTable(tdb, "wgEncodeGencodeAttrs"), gencodeId); +struct sqlResult *sr = sqlGetResult(conn, query); +char **row = sqlNextRow(sr); +if (row == NULL) + errAbort("gencode transcript %s not found in %s", gencodeId, + getGencodeTable(tdb, "wgEncodeGencodeAttrs")); +// older version don't have proteinId column. +struct wgEncodeGencodeAttrs *transAttrs = wgEncodeGencodeAttrsLoad(row, sqlCountColumns(sr)); +sqlFreeResult(&sr); +return transAttrs; } static void getGeneBounds(struct trackDb *tdb, struct sqlConnection *conn, struct genePred *transAnno, int *geneChromStart, int *geneChromEnd) /* find bounds for the gene */ { // must check chrom due to PAR char where[256]; sqlSafefFrag(where, sizeof(where), "(chrom = \"%s\") and (name2 = \"%s\")", seqName, transAnno->name2); struct genePred *geneAnnos = genePredReaderLoadQuery(conn, tdb->track, where); struct genePred *geneAnno; *geneChromStart = transAnno->txStart; *geneChromEnd = transAnno->txEnd; for (geneAnno = geneAnnos; geneAnno != NULL; geneAnno = geneAnno->next) { @@ -339,35 +349,47 @@ printf("</tr>\n"); } static void writeBasicInfoHtml(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId, struct genePred *transAnno, struct wgEncodeGencodeAttrs *transAttrs, int geneChromStart, int geneChromEnd, struct wgEncodeGencodeGeneSource *geneSource, struct wgEncodeGencodeTranscriptSource *transcriptSource, struct wgEncodeGencodeTag *tags, bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl) /* write basic HTML info for all genes */ { // basic gene and transcript information printf("<table class=\"hgcCcds\" style=\"white-space: nowrap;\"><thead>\n"); printf("<tr><th><th>Transcript<th>Gene</tr>\n"); printf("</thead><tbody>\n"); -printf("<tr><th>Gencode id"); +printf("<tr><th>GENCODE id"); prTdEnsIdAnchor(transAttrs->transcriptId, ensemblTranscriptIdUrl); prTdEnsIdAnchor(transAttrs->geneId, ensemblGeneIdUrl); printf("</tr>\n"); +if (transAttrs->proteinId != NULL) + { + // protein id in database, maybe not this transcript + printf("<tr><th>Protein id"); + if (strlen(transAttrs->proteinId) > 0) + prTdEnsIdAnchor(transAttrs->proteinId, ensemblProteinIdUrl); + else + printf("<td> "); + printf("<td>"); + printf("</tr>\n"); + } + printf("<tr><th>HAVANA manual id"); prTdEnsIdAnchor(transAttrs->havanaTranscriptId, vegaTranscriptIdUrl); prTdEnsIdAnchor(transAttrs->havanaGeneId, vegaGeneIdUrl); printf("</tr>\n"); printf("<tr><th>Position"); printf("<td>"); writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd); printf("<td>"); writePosLink(transAnno->chrom, geneChromStart, geneChromEnd); printf("</tr>\n"); printf("<tr><th>Strand<td>%s<td></tr>\n", transAnno->strand); printf("<tr><th><a href=\"%s\" target = _blank>Biotype</a><td>%s<td>%s</tr>\n", gencodeBiotypesUrl, transAttrs->transcriptType, transAttrs->geneType);