e22f12e30ffc45d4b6657aeca878a22eeb239edd braney Thu Jul 2 16:05:42 2020 -0700 add gencode transcript details to hgGene diff --git src/hg/hgGene/gencodeSection.c src/hg/hgGene/gencodeSection.c new file mode 100644 index 0000000..d8d1d99 --- /dev/null +++ src/hg/hgGene/gencodeSection.c @@ -0,0 +1,868 @@ +/* gencodeSection - click handling for GENCODE tracks */ + +/* Copyright (C) 2014 The Regents of the University of California + * See README in this or parent directory for licensing information. */ +#include "common.h" +#include "hdb.h" +#include "hCommon.h" +#include "hgGene.h" +#include "gencodeSection.h" +#include "genePred.h" +#include "genePredReader.h" +#include "ensFace.h" +#include "htmshell.h" +#include "jksql.h" +#include "regexHelper.h" +#include "gencodeAttrs.h" +#include "gencodeToUniProt.h" +#include "gencodeTranscriptionSupportLevel.h" +#include "gencodeTag.h" +#include "gencodeGeneSource.h" +#include "gencodeToPdb.h" +#include "gencodeToPubMed.h" +#include "gencodeToRefSeq.h" +#include "gencodeTranscriptSource.h" +#include "gencodeTranscriptSupport.h" +#include "gencodeExonSupport.h" +#include "gencodeToUniProt.h" +#include "gencodeToEntrezGene.h" +#include "gencodeAnnotationRemark.h" +#include "gencodeTranscriptionSupportLevel.h" + +extern struct trackDb *globalTdb; + +char *entrezUidFormat = "https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=%s&list_uids=%d&dopt=%s&tool=genome.ucsc.edu"; + +static void printEntrezPubMedUidUrl(FILE *f, int pmid) +/* Print URL for Entrez browser on a PubMed search. */ +{ +fprintf(f, entrezUidFormat, "PubMed", pmid, "Summary"); +} + +static void printEntrezGeneUrl(FILE *f, int geneid) +/* Print URL for Entrez browser on a gene details page. */ +{ +fprintf(f, entrezUidFormat, "gene", geneid, "Graphics"); +} + +static void printCcdsExtUrl(char *ccdsId) +/* Print out URL to link to CCDS database at NCBI */ +{ +printf("https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&BUILDS=ALLBUILDS&DATA=%s", ccdsId); +} + +static char *hgTracksPathAndSettings() +/* Return path with hgTracks CGI path and session state variable. */ +{ +static struct dyString *dy = NULL; +if (dy == NULL) + { + dy = newDyString(128); + dyStringPrintf(dy, "%s?%s", hgTracksName(), cartSidUrlString(cart)); + } +return dy->string; +} + +/* + * General notes: + * - this will be integrated into hgGene at some point, however this was + * done as part of hgc for timing reasons and to allow more time to design + * the hgGene part. + * - Tables below will output at least one row even if no data is available. + * + */ + +/* Various URLs and URL templates. At one time, these were in the ra file, + * but that didn't prove that helpful and end up requiring updated the ra + * files for every GENCODE version if a URL was added or changed. */ +//FIXME: clean up RA files when CGIs no longer need them +static char *ensemblTranscriptIdUrl = "http://www.ensembl.org/%s/Transcript/Summary?db=core;t=%s"; +static char *ensemblGeneIdUrl = "http://www.ensembl.org/%s/Gene/Summary?db=core;t=%s"; +static char *ensemblProteinIdUrl = "http://www.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s"; +static char *ensemblSupportingEvidUrl = "http://www.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s"; + +static char *ensemblH37TranscriptIdUrl = "http://grch37.ensembl.org/%s/Transcript/Summary?db=core;t=%s"; +static char *ensemblH37GeneIdUrl = "http://grch37.ensembl.org/%s/Gene/Summary?db=core;t=%s"; +static char *ensemblH37ProteinIdUrl = "http://grch37.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s"; +static char *ensemblH37SupportingEvidUrl = "http://grch37.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s"; + +static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/pages/biotypes.html"; +static char *gencodeTagsUrl = "http://www.gencodegenes.org/pages/tags.html"; + +//static char *yalePseudoUrl = "http://tables.pseudogene.org/%s"; +static char *hgncUrl = " https://www.genenames.org/data/gene-symbol-report/#!/symbol/%s"; +static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s"; +static char *apprisHomeUrl = "http://appris-tools.org/"; +static char *apprisGeneUrl = "http://appris-tools.org/#/database/id/%s/%s?sc=ensembl"; + +static char* UNKNOWN = "unknown"; + +static char *getBaseAcc(char *acc, char *accBuf, int accBufSize) +/* get the accession with version number dropped. */ +{ +safecpy(accBuf, accBufSize, acc); +char *dot = strchr(accBuf, '.'); +if (dot != NULL) + *dot = '\0'; +return accBuf; +} + +static bool haveGencodeTable(struct trackDb *tdb, char *tableBase) +/* determine if table is in settings and thus in this gencode release */ +{ +return trackDbSetting(tdb, tableBase) != NULL; +} + +static char *getGencodeTable(struct trackDb *tdb, char *tableBase) +/* get a table name from the settings. */ +{ +char buffer[strlen(tableBase) + 10]; +safef(buffer, sizeof buffer, "%sV35",tableBase); +return cloneString(buffer); +//return trackDbRequiredSetting(tdb, tableBase); +} + +static char* getGencodeVersion(struct trackDb *tdb) +/* get the GENCODE version or NULL for < V7, which is not supported + * by this module. */ +{ +return trackDbSetting(tdb, "gencodeVersion"); +} + +static boolean isGrcH37Native(struct trackDb *tdb) +/* Is this GENCODE GRCh37 native build, which requires a different Ensembl site. */ +{ +// check for non-lifted GENCODE on GRCh37/hg19 +if (sameString(database, "hg19")) + return stringIn("lift37", getGencodeVersion(tdb)) == NULL; +else + return FALSE; +} + +static boolean isFakeGeneSymbol(char* sym) +/* is this a static gene symbol? */ +{ +static const char *regexp = "^AC[0-9]+\\.[0-9]+$"; +return regexMatch(sym, regexp); +} + +static int transAnnoCmp(const void *va, const void *vb) +/* Compare genePreds, sorting to keep select gene first. The only cases + * that annotations will be duplicated is if they are in the PAR and thus + * on different chroms. */ +{ +const struct genePred *a = *((struct genePred **)va); +const struct genePred *b = *((struct genePred **)vb); +if (sameString(a->name, curGeneChrom)) + return -1; +else if (sameString(b->name, curGeneChrom)) + return 1; +else + return strcmp(a->name, b->name); +} + +static bool isProteinCodingTrans(struct gencodeAttrs *transAttrs) +/* is a transcript protein coding? */ +{ +return sameString(transAttrs->transcriptClass, "coding"); +} + +static struct genePred *transAnnoLoad(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId) +/* load the gencode annotations and sort the one corresponding to the one that was clicked on is + * first. Should only have one or two. */ +{ +// must check chrom due to PAR +char where[256]; +sqlSafefFrag(where, sizeof(where), "(chrom = \"%s\") and (name = \"%s\")", curGeneChrom, gencodeId); +struct genePred *transAnno = genePredReaderLoadQuery(conn, tdb->table, where); +slSort(&transAnno, transAnnoCmp); +return transAnno; +} + +static struct gencodeAttrs *transAttrsLoad(struct trackDb *tdb, struct sqlConnection *conn, char *gencodeId) +/* load the gencode attributes */ +{ +char query[1024]; +sqlSafef(query, sizeof(query), "select * from %s where transcriptId = \"%s\"", + getGencodeTable(tdb, "gencodeAttrs"), gencodeId); +struct sqlResult *sr = sqlGetResult(conn, query); +char **row = sqlNextRow(sr); +if (row == NULL) + errAbort("gencode transcript %s not found in %s", gencodeId, + getGencodeTable(tdb, "gencodeAttrs")); +// older version don't have proteinId column. +//struct gencodeAttrs *transAttrs = gencodeAttrsLoad(row, sqlCountColumns(sr)); +struct gencodeAttrs *transAttrs = gencodeAttrsLoad(row); +sqlFreeResult(&sr); +return transAttrs; +} + +static void getGeneBounds(struct trackDb *tdb, struct sqlConnection *conn, struct genePred *transAnno, + int *geneChromStart, int *geneChromEnd) +/* find bounds for the gene */ +{ +// must check chrom due to PAR +char where[256]; +sqlSafefFrag(where, sizeof(where), "(chrom = \"%s\") and (name2 = \"%s\")", curGeneChrom, transAnno->name2); +struct genePred *geneAnnos = genePredReaderLoadQuery(conn, tdb->table, where); +struct genePred *geneAnno; +*geneChromStart = transAnno->txStart; +*geneChromEnd = transAnno->txEnd; +for (geneAnno = geneAnnos; geneAnno != NULL; geneAnno = geneAnno->next) + { + *geneChromStart = min(*geneChromStart, geneAnno->txStart); + *geneChromEnd = max(*geneChromEnd, transAnno->txEnd); + } +genePredFreeList(&geneAnnos); +} + +static void *metaDataLoad(struct trackDb *tdb, struct sqlConnection *conn, char *gencodeId, char *tableBase, char *keyCol, unsigned queryOpts, sqlLoadFunc loadFunc) +/* load autoSql objects for gencode meta data. */ +{ +return sqlQueryObjs(conn, loadFunc, queryOpts, "select * from %s where %s = \"%s\"", + getGencodeTable(tdb, tableBase), keyCol, gencodeId); +} + +static int uniProtDatasetCmp(const void *va, const void *vb) +/* Compare gencodeToUniProt by dateset */ +{ +const struct gencodeToUniProt *a = *((struct gencodeToUniProt **)va); +const struct gencodeToUniProt *b = *((struct gencodeToUniProt **)vb); +return a->dataset - b->dataset; +} + +static char *getMethodDesc(char *source) +/* return the annotation method name based gene or transcript source */ +{ +// sometimes backmap doesn't get every entry method entry mapped. Until that +// is fixed, allow it to be missing +// looks for being havana and/or ensembl +// classifies other sources as automatic (mt_genbank_import ncrna ncrna_pseudogene) +bool hasHav = containsStringNoCase(source, "havana") != NULL; +bool hasEns = containsStringNoCase(source, "ensembl") != NULL; +if (hasHav && hasEns) + return "manual & automatic"; +else if (hasHav) + return "manual"; +else + return "automatic"; +} + +static char *getLevelDesc(int level) +/* return english description for level */ +{ +if (level == 1) + return "validated"; +else if (level == 2) + return "manual"; +else if (level == 3) + return "automatic"; +else + return "unknown"; +} + +static char *getSupportLevelDesc(struct gencodeTranscriptionSupportLevel *tsl) +/* return description for level */ +{ +static char buf[32]; +if ((tsl == NULL) || (tsl->level <= 0)) + return "tslNA"; +else + { + safef(buf, sizeof(buf), "%s", tsl->level); + return buf; + } +} + +static char* getScientificNameSym(void) +/* get the scientific name of an organism in the form "Homo_sapiens" + * WARNING: static return */ +{ +static char sciNameSym[128]; +char *sciName = hScientificName(database); +if (sciName == NULL) + errAbort("can't get scientific name for %s", database); +safecpy(sciNameSym, sizeof(sciNameSym), sciName); +freeMem(sciName); +subChar(sciNameSym, ' ', '_'); +return sciNameSym; +} + +static void prExtIdAnchor(char *id, char *urlTemplate) +/* if an id to an external database is not empty, print an HTML anchor to it */ +{ +if (!isEmpty(id)) + { + char urlBuf[512]; + safef(urlBuf, sizeof(urlBuf), urlTemplate, id); + printf("<a href=\"%s\" target=_blank>%s</a>", urlBuf, id); + } +} + +#if UNUSED +static void prTdExtIdAnchor(char *id, char *urlTemplate) +/* print a table data element with an anchor for a id */ +{ +printf("<td>"); +prExtIdAnchor(id, urlTemplate); +} +#endif + +static void prEnsIdAnchor(char *id, char *urlTemplate) +/* if an id to an ensembl database is not empty, print an HTML anchor to it */ +{ +if (!isEmpty(id)) + { + char idBuf[64], urlBuf[512]; + /* The lift37 releases append a '_N' modifier to the ids to indicate the are + * mapped. N is an integer mapping version. Don't include this in link if it exists. */ + safecpy(idBuf, sizeof(idBuf), id); + char *p = strchr(idBuf, '_'); + if (p != NULL) + *p = '\0'; + safef(urlBuf, sizeof(urlBuf), urlTemplate, getScientificNameSym(), idBuf); + printf("<a href=\"%s\" target=_blank>%s</a>", urlBuf, id); + } +} + +static void prTdEnsIdAnchor(char *id, char *urlTemplate) +/* print a table data element with an ensembl anchor for a id */ +{ +printf("<td>"); +prEnsIdAnchor(id, urlTemplate); +} + +static void prApprisTdAnchor(char *id, char *label, char *urlTemplate) +/* print a gene or transcript link to APPRIS */ +{ +// under bar separated, lower case species name. +char *speciesArg = hScientificName(database); +toLowerN(speciesArg, strlen(speciesArg)); +subChar(speciesArg, ' ', '_'); + +char accBuf[64]; +printf("<td><a href=\""); +printf(urlTemplate, speciesArg, getBaseAcc(id, accBuf, sizeof(accBuf))); +printf("\" target=_blank>%s</a>", label); + +freeMem(speciesArg); +} + +static void writePosLink(char *chrom, int chromStart, int chromEnd) +/* write link to a genomic position */ +{ +printf("<a href=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</A>", + hgTracksPathAndSettings(), database, + chrom, chromStart, chromEnd, chrom, chromStart+1, chromEnd); +} + +static bool geneHasApprisTranscripts(struct trackDb *tdb, struct sqlConnection *conn, struct gencodeAttrs *transAttrs) +/* check if any transcript in a gene has an APPRIS tags */ +{ +char query[1024]; +sqlSafefFrag(query, sizeof(query), + "%s tag where tag.tag like 'appris%%' and transcriptId in " + "(select transcriptId from %s where geneId='%s')", + getGencodeTable(tdb, "gencodeTag"), + getGencodeTable(tdb, "gencodeAttrs"), + transAttrs->geneId); +return sqlRowCount(conn, query) > 0; +} + +static char* findApprisTag(struct gencodeTag *tags) +/* search list for APPRIS tag or NULL */ +{ +struct gencodeTag *tag; +for (tag = tags; tag != NULL; tag = tag->next) + { + if (startsWith("appris_", tag->tag)) + return tag->tag; + } +return NULL; +} + +static char* apprisTagToSymbol(char* tag) +/* convert APPRIS tag to the symbol use by APPRIS. WARNING static return. */ +{ +// appris_principal_1 -> PRINCIPAL:1 +static char buf[64]; +safecpy(buf, sizeof(buf), tag+7); +touppers(buf); +subChar(buf, '_', ':'); +return buf; +} + +static void writeAprrisRow(struct sqlConnection *conn, struct trackDb *tdb, + struct gencodeAttrs *transAttrs, + struct gencodeTag *tags) +/* write row for APPRIS */ +{ +// Get labels to use. if transcript has an appris tag, then we link to the transcript. +// if it doesn;t have a appris tag, we can still link to the gene if any of the transcripts +// have appris tags +char* apprisTag = findApprisTag(tags); +char* transLabel = (apprisTag != NULL) ? apprisTagToSymbol(apprisTag) : NULL; +char *geneLabel = ((apprisTag != NULL) || geneHasApprisTranscripts(tdb, conn, transAttrs)) ? transAttrs->geneName : NULL; + +// APPRIS gene and transcript now go to the same location +printf("<tr><th><a href=\"%s\" target=_blank>APPRIS</a>\n", apprisHomeUrl); +if (transLabel != NULL) + prApprisTdAnchor(transAttrs->geneId, transLabel, apprisGeneUrl); +else + printf("<td> "); +if (geneLabel != NULL) + prApprisTdAnchor(transAttrs->geneId, geneLabel, apprisGeneUrl); +else + printf("<td> "); +printf("</tr>\n"); +} + +static void writeBasicInfoHtml(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId, struct genePred *transAnno, + struct gencodeAttrs *transAttrs, + int geneChromStart, int geneChromEnd, + struct gencodeGeneSource *geneSource, struct gencodeTranscriptSource *transcriptSource, + struct gencodeTag *tags, bool haveTsl, struct gencodeTranscriptionSupportLevel *tsl) +/* write basic HTML info for all genes */ +{ +// basic gene and transcript information +printf("<table class=\"hgcCcds\" style=\"white-space: nowrap;\"><thead>\n"); +printf("<tr><th><th>Transcript<th>Gene</tr>\n"); +printf("</thead><tbody>\n"); + +printf("<tr><th>GENCODE id"); +prTdEnsIdAnchor(transAttrs->transcriptId, + (isGrcH37Native(tdb) ? ensemblH37TranscriptIdUrl: ensemblTranscriptIdUrl)); +prTdEnsIdAnchor(transAttrs->geneId, + (isGrcH37Native(tdb) ? ensemblH37GeneIdUrl : ensemblGeneIdUrl)); +printf("</tr>\n"); + +if (transAttrs->proteinId != NULL) + { + // protein id in database, maybe not for this transcript + printf("<tr><th>Protein id"); + if (strlen(transAttrs->proteinId) > 0) + prTdEnsIdAnchor(transAttrs->proteinId, + (isGrcH37Native(tdb) ? ensemblH37ProteinIdUrl: ensemblProteinIdUrl)); + else + printf("<td> "); + printf("<td>"); + printf("</tr>\n"); + } + +#ifdef NOTW +printf("<tr><th>HAVANA manual id"); +printf("<td>%s", transAttrs->havanaTranscriptId); +printf("<td>%s", transAttrs->havanaGeneId); +printf("</tr>\n"); +#endif + +printf("<tr><th>Position"); +printf("<td>"); +writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd); +printf("<td>"); +writePosLink(transAnno->chrom, geneChromStart, geneChromEnd); +printf("</tr>\n"); + +printf("<tr><th>Strand<td>%s<td></tr>\n", transAnno->strand); + +printf("<tr><th><a href=\"%s\" target = _blank>Biotype</a><td>%s<td>%s</tr>\n", gencodeBiotypesUrl, transAttrs->transcriptType, transAttrs->geneType); + +printf("<tr><th>Annotation Level<td>%s (%d)<td></tr>\n", getLevelDesc(transAttrs->level), transAttrs->level); + +char *transSrcDesc = (transcriptSource != NULL) ? getMethodDesc(transcriptSource->source) : UNKNOWN; +char *geneSrcDesc = (geneSource != NULL) ? getMethodDesc(geneSource->source) : UNKNOWN; +printf("<tr><th>Annotation Method<td>%s<td>%s</tr>\n", transSrcDesc, geneSrcDesc); + +if (haveTsl) + { + char *tslDesc = getSupportLevelDesc(tsl); + printf("<tr><th><a href=\"#tsl\">Transcription Support Level</a><td><a href=\"#%s\">%s</a><td></tr>\n", tslDesc, tslDesc); + } +printf("<tr><th>HGNC gene symbol<td colspan=2>"); +if (!isFakeGeneSymbol(transAttrs->geneName)) + prExtIdAnchor(transAttrs->geneName, hgncUrl); +printf("</tr>\n"); + +printf("<tr><th>CCDS<td>"); +if (!isEmpty(transAttrs->ccdsId)) + { + printf("<a href=\""); + printCcdsExtUrl(transAttrs->ccdsId); + printf("\" target=_blank>%s</a>", transAttrs->ccdsId); + } +printf("<td></tr>\n"); + +printf("<tr><th>GeneCards<td colspan=2>"); +if (!isFakeGeneSymbol(transAttrs->geneName)) + prExtIdAnchor(transAttrs->geneName, geneCardsUrl); +printf("</tr>\n"); + +if (isProteinCodingTrans(transAttrs)) + writeAprrisRow(conn, tdb, transAttrs, tags); + +// FIXME: add sequence here?? +printf("</tbody></table>\n"); +} + +static void writeAnnotationRemarkHtml(struct gencodeAnnotationRemark *remarks) +/* write HTML links to remarks */ +{ +printf("<table class=\"hgcCcds\"><thead>\n"); +printf("<tr><th colspan=\"1\">Annotation Remarks</tr>\n"); +printf("</thead><tbody>\n"); +// make sure at least one empty row in printed +if (remarks == NULL) + printf("<tr><td></td></tr>\n"); +struct gencodeAnnotationRemark *remark; +for (remark = remarks; remark != NULL; remark = remark->next) + { + char *encRemark = htmlEncode(remark->remark); + printf("<tr><td>%s</td></tr>\n", encRemark); + freeMem(encRemark); + } +printf("</tbody></table>\n"); +} + +static void writePdbLinkHtml(struct gencodeToPdb *pdbs) +/* write HTML links to PDB */ +{ +printf("<table class=\"hgcCcds\"><thead>\n"); +printf("<tr><th colspan=\"3\">Protein Data Bank</tr>\n"); +printf("</thead><tbody>\n"); +struct gencodeToPdb *pdb = pdbs; +int i, rowCnt = 0; +while ((pdb != NULL) || (rowCnt == 0)) + { + printf("<tr>"); + for (i = 0; i < 3; i++) + { + printf("<td width=\"33.33%%\">"); + if (pdb != NULL) + { + printf("<a href=\"http://www.rcsb.org/pdb/cgi/explore.cgi?job=graphics&pdbId=%s\" target=_blank>%s</a>", pdb->pdbId, pdb->pdbId); + pdb = pdb->next; + } + } + printf("</tr>\n"); + rowCnt++; + } +printf("</tbody></table>\n"); +} + +static void writePubMedEntry(struct gencodeToPubMed *pubMed) +/* write HTML table entry for a pubMed */ +{ +printf("<td width=\"33.33%%\"><a href=\""); +printEntrezPubMedUidUrl(stdout, pubMed->pubMedId); +printf("\" target=_blank>%d</a>", pubMed->pubMedId); +} + +static void writePubMedLinkHtml(struct gencodeToPubMed *pubMeds) +/* write HTML links to PubMed */ +{ +printf("<table class=\"hgcCcds\"><thead>\n"); +printf("<tr><th colspan=\"3\">PubMed</tr>\n"); +printf("</thead><tbody>\n"); +struct gencodeToPubMed *pubMed = pubMeds; +int i, rowCnt = 0; +while ((pubMed != NULL) || (rowCnt == 0)) + { + printf("<tr>"); + for (i = 0; i < 3; i++) + { + if (pubMed != NULL) + { + writePubMedEntry(pubMed); + pubMed = pubMed->next; + } + else + printf("<td width=\"33.33%%\">"); + } + printf("</tr>\n"); + rowCnt++; + } +printf("</tbody></table>\n"); +} + +static void writeEntrezGeneEntry(struct gencodeToEntrezGene *entrezGene) +/* write HTML table entry for a entrezGene */ +{ +printf("<td width=\"33.33%%\"><a href=\""); +printEntrezGeneUrl(stdout, entrezGene->entrezGeneId); +printf("\" target=_blank>%d</a>", entrezGene->entrezGeneId); +} + +static void writeEntrezGeneLinkHtml(struct gencodeToEntrezGene *entrezGenes) +/* write HTML links to EntrezGene */ +{ +printf("<table class=\"hgcCcds\"><thead>\n"); +printf("<tr><th colspan=\"3\">Entrez Gene</tr>\n"); +printf("</thead><tbody>\n"); +struct gencodeToEntrezGene *entrezGene = entrezGenes; +int i, rowCnt = 0; +while ((entrezGene != NULL) || (rowCnt == 0)) + { + printf("<tr>"); + for (i = 0; i < 3; i++) + { + if (entrezGene != NULL) + { + writeEntrezGeneEntry(entrezGene); + entrezGene = entrezGene->next; + } + else + printf("<td width=\"33.33%%\">"); + } + printf("</tr>\n"); + rowCnt++; + } +printf("</tbody></table>\n"); +} + +struct supportEvid +/* temporary struct for subset of supporting information displayed */ +{ + struct supportEvid *next; + char *seqId; /* sequence id (memory not owned) */ + char *seqSrc; /* evidence source database (memory not owned) */ +}; + +static int supportEvidCmp(const void *va, const void *vb) +/* Compare two supportEvid objects. */ +{ +const struct supportEvid *a = *((struct supportEvid **)va); +const struct supportEvid *b = *((struct supportEvid **)vb); +int diff = strcmp(a->seqSrc, b->seqSrc); +if (diff == 0) + diff = strcmp(a->seqId, b->seqId); +return diff; +} + +static void transcriptSupportToSupportEvid(struct supportEvid **supportEvids, struct gencodeTranscriptSupport *transcriptSupports) +/* convert transcriptSupport to common structure */ +{ +struct gencodeTranscriptSupport *transcriptSupport; +for (transcriptSupport = transcriptSupports; transcriptSupport != NULL; transcriptSupport = transcriptSupport->next) + { + struct supportEvid *supportEvid; + AllocVar(supportEvid); + supportEvid->seqId = transcriptSupport->seqId; + supportEvid->seqSrc = transcriptSupport->seqSrc; + slAddHead(supportEvids, supportEvid); + } +} + +static void exonSupportToSupportEvid(struct supportEvid **supportEvids, struct gencodeExonSupport *exonSupports) +/* convert exonSupport to common structure */ +{ +struct gencodeExonSupport *exonSupport; +for (exonSupport = exonSupports; exonSupport != NULL; exonSupport = exonSupport->next) + { + struct supportEvid *supportEvid; + AllocVar(supportEvid); + supportEvid->seqId = exonSupport->seqId; + supportEvid->seqSrc = exonSupport->seqSrc; + slAddHead(supportEvids, supportEvid); + } +} + +static void sortUniqSupportExidence(struct supportEvid **supportEvids) +/* sort support evidence and make unique */ +{ +struct supportEvid *supportEvid, *supportEvids2 = NULL; +slSort(supportEvids, supportEvidCmp); +// make unique +while ((supportEvid = slPopHead(supportEvids)) != NULL) + { + if ((supportEvids2 == NULL) || (supportEvidCmp(&supportEvid, &supportEvids2) != 0)) + slAddHead(&supportEvids2, supportEvid); + else + freeMem(supportEvid); + } +slReverse(&supportEvids2); +*supportEvids = supportEvids2; +} + +static struct supportEvid *loadSupportEvid(struct gencodeTranscriptSupport *transcriptSupports, + struct gencodeExonSupport *exonSupports) +/* load transcript and supporting evidence into a common structure */ +{ +struct supportEvid *supportEvids = NULL; +transcriptSupportToSupportEvid(&supportEvids, transcriptSupports); +exonSupportToSupportEvid(&supportEvids, exonSupports); +sortUniqSupportExidence(&supportEvids); +return supportEvids; +} + +static void writeSupportEvidenceEntry(struct supportEvid *supportEvid) +/* write HTML table entry for a supporting evidence */ +{ +// FIXME: should link to sources when possible +printf("<td width=\"25%%\">%s", supportEvid->seqSrc); +printf("<td width=\"25%%\">%s", supportEvid->seqId); +} + +static void writeSupportingEvidenceLinkHtml(struct trackDb *tdb, char *gencodeId, + struct gencodeTranscriptSupport *transcriptSupports, + struct gencodeExonSupport *exonSupports) +/* write HTML links to supporting evidence */ +{ +struct supportEvid *supportEvids = loadSupportEvid(transcriptSupports, exonSupports); + +printf("<table class=\"hgcCcds\"><thead>\n"); +printf("<tr><th colspan=\"4\">Supporting Evidence ("); +prEnsIdAnchor(gencodeId, + (isGrcH37Native(tdb) ? ensemblH37SupportingEvidUrl: ensemblSupportingEvidUrl)); +printf(")</tr>\n"); +printf("<tr class=\"hgcCcdsSub\"><th>Source<th>Sequence<th>Source<th>Sequence</tr>\n"); +printf("</thead><tbody>\n"); +struct supportEvid *supportEvid = supportEvids; +int i, rowCnt = 0; +while ((supportEvid != NULL) || (rowCnt == 0)) + { + printf("<tr>"); + for (i = 0; i < 2; i++) + { + if (supportEvid != NULL) + { + writeSupportEvidenceEntry(supportEvid); + supportEvid = supportEvid->next; + } + else + printf("<td colspan=\"2\" width=\"50%%\">"); + } + printf("</tr>\n"); + rowCnt++; + } +printf("</tbody></table>\n"); +slFreeList(&supportEvids); +} + +static void writeTagEntry(struct gencodeTag *tag) +/* write HTML table entry for a Tag */ +{ +// FIXME: link to help once gencodegenes.org has it +printf("<td width=\"33.33%%\">%s", tag->tag); +} + +static void writeTagLinkHtml(struct gencodeTag *tags) +/* write HTML links to Tag */ +{ +printf("<table class=\"hgcCcds\"><thead>\n"); +printf("<tr><th colspan=3><a href=\"%s\" target=_blank>Tags</a></tr>\n", gencodeTagsUrl); +printf("</thead><tbody>\n"); +int i, rowCnt = 0; +struct gencodeTag *tag = tags; +while ((tag != NULL) || (rowCnt == 0)) + { + printf("<tr>"); + for (i = 0; i < 3; i++) + { + if (tag != NULL) + { + writeTagEntry(tag); + tag = tag->next; + } + else + printf("<td width=\"33.33%%\">"); + } + printf("</tr>\n"); + rowCnt++; + } +printf("</tbody></table>\n"); +} + +static void doGencodeGeneTrack(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *transAnno) +/* Process click on a GENCODE gene annotation track. */ +{ +struct gencodeAttrs *transAttrs = transAttrsLoad(tdb, conn, gencodeId); +char *gencodeGeneId = transAttrs->geneId; +struct gencodeGeneSource *geneSource = metaDataLoad(tdb, conn, gencodeGeneId, "gencodeGeneSource", "geneId", sqlQuerySingle, (sqlLoadFunc)gencodeGeneSourceLoad); +struct gencodeTranscriptSource *transcriptSource = metaDataLoad(tdb, conn, gencodeId, "gencodeTranscriptSource", "transcriptId", sqlQuerySingle, (sqlLoadFunc)gencodeTranscriptSourceLoad); +bool haveRemarks = haveGencodeTable(tdb, "gencodeAnnotationRemark"); +struct gencodeAnnotationRemark *remarks = haveRemarks ? metaDataLoad(tdb, conn, gencodeId, "gencodeAnnotationRemark", "transcriptId", 0, (sqlLoadFunc)gencodeAnnotationRemarkLoad) : NULL; +struct gencodeToPdb *pdbs = metaDataLoad(tdb, conn, gencodeId, "gencodeToPdb", "transcriptId", sqlQueryMulti, (sqlLoadFunc)gencodeToPdbLoad); +struct gencodeToPubMed *pubMeds = metaDataLoad(tdb, conn, gencodeId, "gencodeToPubMed", "transcriptId", sqlQueryMulti, (sqlLoadFunc)gencodeToPubMedLoad); +bool haveEntrezGene = haveGencodeTable(tdb, "gencodeToEntrezGene"); +struct gencodeToEntrezGene *entrezGenes = haveEntrezGene ? metaDataLoad(tdb, conn, gencodeId, "gencodeToEntrezGene", "transcriptId", sqlQueryMulti, (sqlLoadFunc)gencodeToEntrezGeneLoad) : NULL; +struct gencodeToRefSeq *refSeqs = metaDataLoad(tdb, conn, gencodeId, "gencodeToRefSeq", "transcriptId", sqlQueryMulti, (sqlLoadFunc)gencodeToRefSeqLoad); +struct gencodeTag *tags = metaDataLoad(tdb, conn, gencodeId, "gencodeTag", "transcriptId", sqlQueryMulti, (sqlLoadFunc)gencodeTagLoad); +struct gencodeTranscriptSupport *transcriptSupports = metaDataLoad(tdb, conn, gencodeId, "gencodeTranscriptSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)gencodeTranscriptSupportLoad); +struct gencodeExonSupport *exonSupports = NULL; +// exonSupports not available in back mapped GENCODE releases +if (haveGencodeTable(tdb, "gencodeExonSupport")) + exonSupports = metaDataLoad(tdb, conn, gencodeId, "gencodeExonSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)gencodeExonSupportLoad); +struct gencodeToUniProt *uniProts = metaDataLoad(tdb, conn, gencodeId, "gencodeToUniProt", "transcriptId", sqlQueryMulti, (sqlLoadFunc)gencodeToUniProtLoad); +slSort(&uniProts, uniProtDatasetCmp); +bool haveTsl = haveGencodeTable(tdb, "gencodeTranscriptionSupportLevel"); +struct gencodeTranscriptionSupportLevel *tsl = haveTsl ? metaDataLoad(tdb, conn, gencodeId, "gencodeTranscriptionSupportLevel", "transcriptId", 0, (sqlLoadFunc)gencodeTranscriptionSupportLevelLoad) : NULL; + +int geneChromStart, geneChromEnd; +getGeneBounds(tdb, conn, transAnno, &geneChromStart, &geneChromEnd); + +writeBasicInfoHtml(conn, tdb, gencodeId, transAnno, transAttrs, geneChromStart, geneChromEnd, geneSource, transcriptSource, tags, haveTsl, tsl); +writeTagLinkHtml(tags); +if (haveRemarks) + writeAnnotationRemarkHtml(remarks); +if (isProteinCodingTrans(transAttrs)) + writePdbLinkHtml(pdbs); +writePubMedLinkHtml(pubMeds); +if (haveEntrezGene) + writeEntrezGeneLinkHtml(entrezGenes); +writeSupportingEvidenceLinkHtml(tdb, gencodeId, transcriptSupports, exonSupports); + +gencodeAttrsFree(&transAttrs); +gencodeAnnotationRemarkFreeList(&remarks); +gencodeGeneSourceFreeList(&geneSource); +gencodeTranscriptSourceFreeList(&transcriptSource); +gencodeToPdbFreeList(&pdbs); +gencodeToPubMedFreeList(&pubMeds); +gencodeToEntrezGeneFreeList(&entrezGenes); +gencodeToRefSeqFreeList(&refSeqs); +gencodeTranscriptSupportFreeList(&transcriptSupports); +gencodeExonSupportFreeList(&exonSupports); +gencodeToUniProtFreeList(&uniProts); +gencodeTranscriptionSupportLevelFreeList(&tsl); +} + +static void gencodePrint(struct section *section, struct sqlConnection *conn, + char *gencodeId) +{ +struct trackDb *tdb = globalTdb; +struct genePred *anno = transAnnoLoad(conn, tdb, gencodeId); + +doGencodeGeneTrack(tdb, gencodeId, conn, anno); + +genePredFreeList(&anno); +} + + +bool isNewGencodeGene(struct trackDb *tdb) +/* is this a new-style gencode (>= V7) track, as indicated by + * the presence of the gencodeVersion setting */ +{ +return getGencodeVersion(tdb) != NULL; +} + +static boolean gencodeExists(struct section *section, struct sqlConnection *conn, char *geneId) +/* Return TRUE if necessary database exists and has something + * * on this one. */ +{ +struct trackDb *tdb = globalTdb; + +if (startsWith("gencodeAnnot", tdb->table)) + return TRUE; +return FALSE; +} + +struct section *gencodeSection(struct sqlConnection *conn, + struct hash *sectionRa) +/* Create links section. */ +{ +struct section *section = sectionNew(sectionRa, "gencode"); +section->exists = gencodeExists; +section->print = gencodePrint; +section->raFile = "gencode.ra"; +return section; +} +