f9d47d16a47f128366aeb1deb0575c0cb52aa0f9 markd Thu Nov 29 15:24:08 2018 -0800 For native GENCODE on GRCh37/hg19, link to http://grch37.ensembl.org, not primary GENCODE site. #22549 diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c index 746b8f3..ba01314 100644 --- src/hg/hgc/gencodeClick.c +++ src/hg/hgc/gencodeClick.c @@ -26,38 +26,46 @@ #include "encode/wgEncodeGencodeTranscriptionSupportLevel.h" /* * General notes: * - this will be integrated into hgGene at some point, however this was * done as part of hgc for timing reasons and to allow more time to design * the hgGene part. * - Tables below will output at least one row even if no data is available. * */ /* Various URLs and URL templates. At one time, these were in the ra file, * but that didn't prove that helpful and end up requiring updated the ra * files for every GENCODE version if a URL was added or changed. */ //FIXME: clean up RA files when CGIs no longer need them -static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/gencode_biotypes.html"; -static char *gencodeTagsUrl = "http://www.gencodegenes.org/gencode_tags.html"; static char *ensemblTranscriptIdUrl = "http://www.ensembl.org/%s/Transcript/Summary?db=core;t=%s"; static char *ensemblGeneIdUrl = "http://www.ensembl.org/%s/Gene/Summary?db=core;t=%s"; static char *ensemblProteinIdUrl = "http://www.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s"; static char *ensemblSupportingEvidUrl = "http://www.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s"; + +static char *ensemblH37TranscriptIdUrl = "http://grch37.ensembl.org/%s/Transcript/Summary?db=core;t=%s"; +static char *ensemblH37GeneIdUrl = "http://grch37.ensembl.org/%s/Gene/Summary?db=core;t=%s"; +static char *ensemblH37ProteinIdUrl = "http://grch37.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s"; +static char *ensemblH37SupportingEvidUrl = "http://grch37.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s"; + static char *vegaTranscriptIdUrl = "http://vega.sanger.ac.uk/%s/Transcript/Summary?db=core;t=%s"; static char *vegaGeneIdUrl = "http://vega.sanger.ac.uk/%s/Gene/Summary?db=core;g=%s"; + +static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/gencode_biotypes.html"; +static char *gencodeTagsUrl = "http://www.gencodegenes.org/gencode_tags.html"; + static char *yalePseudoUrl = "http://tables.pseudogene.org/%s"; static char *hgncUrl = "http://www.genenames.org/data/hgnc_data.php?match=%s"; static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s"; static char *apprisHomeUrl = "http://appris-tools.org/"; static char *apprisGeneUrl = "http://appris-tools.org/#/database/id/%s/%s?sc=ensembl"; static char *getBaseAcc(char *acc, char *accBuf, int accBufSize) /* get the accession with version number dropped. */ { safecpy(accBuf, accBufSize, acc); char *dot = strchr(accBuf, '.'); if (dot != NULL) *dot = '\0'; return accBuf; } @@ -69,30 +77,40 @@ } static char *getGencodeTable(struct trackDb *tdb, char *tableBase) /* get a table name from the settings. */ { return trackDbRequiredSetting(tdb, tableBase); } static char* getGencodeVersion(struct trackDb *tdb) /* get the GENCODE version or NULL for < V7, which is not supported * by this module. */ { return trackDbSetting(tdb, "wgEncodeGencodeVersion"); } +static boolean isGrcH37Native(struct trackDb *tdb) +/* Is this GENCODE GRCh37 native build, which requires a different Ensembl site. */ +{ +// check for non-lifted GENCODE on GRCh37/hg19 +if (sameString(database, "hg19")) + return stringIn("lift37", getGencodeVersion(tdb)) == NULL; +else + return FALSE; +} + static int transAnnoCmp(const void *va, const void *vb) /* Compare genePreds, sorting to keep select gene first. The only cases * that annotations will be duplicated is if they are in the PAR and thus * on different chroms. */ { const struct genePred *a = *((struct genePred **)va); const struct genePred *b = *((struct genePred **)vb); if (sameString(a->name, seqName)) return -1; else if (sameString(b->name, seqName)) return 1; else return strcmp(a->name, b->name); } @@ -350,40 +368,43 @@ } static void writeBasicInfoHtml(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId, struct genePred *transAnno, struct wgEncodeGencodeAttrs *transAttrs, int geneChromStart, int geneChromEnd, struct wgEncodeGencodeGeneSource *geneSource, struct wgEncodeGencodeTranscriptSource *transcriptSource, struct wgEncodeGencodeTag *tags, bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl) /* write basic HTML info for all genes */ { // basic gene and transcript information printf("<table class=\"hgcCcds\" style=\"white-space: nowrap;\"><thead>\n"); printf("<tr><th><th>Transcript<th>Gene</tr>\n"); printf("</thead><tbody>\n"); printf("<tr><th>GENCODE id"); -prTdEnsIdAnchor(transAttrs->transcriptId, ensemblTranscriptIdUrl); -prTdEnsIdAnchor(transAttrs->geneId, ensemblGeneIdUrl); +prTdEnsIdAnchor(transAttrs->transcriptId, + (isGrcH37Native(tdb) ? ensemblH37TranscriptIdUrl: ensemblTranscriptIdUrl)); +prTdEnsIdAnchor(transAttrs->geneId, + (isGrcH37Native(tdb) ? ensemblH37GeneIdUrl : ensemblGeneIdUrl)); printf("</tr>\n"); if (transAttrs->proteinId != NULL) { - // protein id in database, maybe not this transcript + // protein id in database, maybe not for this transcript printf("<tr><th>Protein id"); if (strlen(transAttrs->proteinId) > 0) - prTdEnsIdAnchor(transAttrs->proteinId, ensemblProteinIdUrl); + prTdEnsIdAnchor(transAttrs->proteinId, + (isGrcH37Native(tdb) ? ensemblH37ProteinIdUrl: ensemblProteinIdUrl)); else printf("<td> "); printf("<td>"); printf("</tr>\n"); } printf("<tr><th>HAVANA manual id"); prTdEnsIdAnchor(transAttrs->havanaTranscriptId, vegaTranscriptIdUrl); prTdEnsIdAnchor(transAttrs->havanaGeneId, vegaGeneIdUrl); printf("</tr>\n"); printf("<tr><th>Position"); printf("<td>"); writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd); printf("<td>"); @@ -719,39 +740,41 @@ struct supportEvid *supportEvids = NULL; transcriptSupportToSupportEvid(&supportEvids, transcriptSupports); exonSupportToSupportEvid(&supportEvids, exonSupports); sortUniqSupportExidence(&supportEvids); return supportEvids; } static void writeSupportEvidenceEntry(struct supportEvid *supportEvid) /* write HTML table entry for a supporting evidence */ { // FIXME: should link to sources when possible printf("<td width=\"25%%\">%s", supportEvid->seqSrc); printf("<td width=\"25%%\">%s", supportEvid->seqId); } -static void writeSupportingEvidenceLinkHtml(char *gencodeId, struct wgEncodeGencodeTranscriptSupport *transcriptSupports, +static void writeSupportingEvidenceLinkHtml(struct trackDb *tdb, char *gencodeId, + struct wgEncodeGencodeTranscriptSupport *transcriptSupports, struct wgEncodeGencodeExonSupport *exonSupports) /* write HTML links to supporting evidence */ { struct supportEvid *supportEvids = loadSupportEvid(transcriptSupports, exonSupports); printf("<table class=\"hgcCcds\"><thead>\n"); printf("<tr><th colspan=\"4\">Supporting Evidence ("); -prEnsIdAnchor(gencodeId, ensemblSupportingEvidUrl); +prEnsIdAnchor(gencodeId, + (isGrcH37Native(tdb) ? ensemblH37SupportingEvidUrl: ensemblSupportingEvidUrl)); printf(")</tr>\n"); printf("<tr class=\"hgcCcdsSub\"><th>Source<th>Sequence<th>Source<th>Sequence</tr>\n"); printf("</thead><tbody>\n"); struct supportEvid *supportEvid = supportEvids; int i, rowCnt = 0; while ((supportEvid != NULL) || (rowCnt == 0)) { printf("<tr>"); for (i = 0; i < 2; i++) { if (supportEvid != NULL) { writeSupportEvidenceEntry(supportEvid); supportEvid = supportEvid->next; } @@ -839,31 +862,31 @@ printf("<H2>%s</H2>\n", header); writeBasicInfoHtml(conn, tdb, gencodeId, transAnno, transAttrs, geneChromStart, geneChromEnd, geneSource, transcriptSource, tags, haveTsl, tsl); writeTagLinkHtml(tags); writeSequenceHtml(tdb, gencodeId, transAnno); if (haveRemarks) writeAnnotationRemarkHtml(remarks); if (isProteinCodingTrans(transAttrs)) writePdbLinkHtml(pdbs); writePubMedLinkHtml(pubMeds); if (haveEntrezGene) writeEntrezGeneLinkHtml(entrezGenes); writeRefSeqLinkHtml(refSeqs); if (isProteinCodingTrans(transAttrs)) writeUniProtLinkHtml(uniProts); -writeSupportingEvidenceLinkHtml(gencodeId, transcriptSupports, exonSupports); +writeSupportingEvidenceLinkHtml(tdb, gencodeId, transcriptSupports, exonSupports); wgEncodeGencodeAttrsFree(&transAttrs); wgEncodeGencodeAnnotationRemarkFreeList(&remarks); wgEncodeGencodeGeneSourceFreeList(&geneSource); wgEncodeGencodeTranscriptSourceFreeList(&transcriptSource); wgEncodeGencodePdbFreeList(&pdbs); wgEncodeGencodePubMedFreeList(&pubMeds); wgEncodeGencodeEntrezGeneFreeList(&entrezGenes); wgEncodeGencodeRefSeqFreeList(&refSeqs); wgEncodeGencodeTranscriptSupportFreeList(&transcriptSupports); wgEncodeGencodeExonSupportFreeList(&exonSupports); wgEncodeGencodeUniProtFreeList(&uniProts); wgEncodeGencodeTranscriptionSupportLevelFreeList(&tsl); }