3e38c690e11c057380d833734a7d906464f0e8bb markd Tue Aug 2 09:38:53 2022 -0700 adjusted link to APRIS to avoid broken redirect diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c index a3c189b..10e02c5 100644 --- src/hg/hgc/gencodeClick.c +++ src/hg/hgc/gencodeClick.c @@ -1,960 +1,960 @@ /* gencodeClick - click handling for GENCODE tracks */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "hgc.h" #include "gencodeClick.h" #include "ccdsClick.h" #include "genePred.h" #include "genePredReader.h" #include "ensFace.h" #include "htmshell.h" #include "jksql.h" #include "regexHelper.h" #include "encode/wgEncodeGencodeAttrs.h" #include "encode/wgEncodeGencodeGeneSource.h" #include "encode/wgEncodeGencodePdb.h" #include "encode/wgEncodeGencodePubMed.h" #include "encode/wgEncodeGencodeRefSeq.h" #include "encode/wgEncodeGencodeTag.h" #include "encode/wgEncodeGencodeTranscriptSource.h" #include "encode/wgEncodeGencodeTranscriptSupport.h" #include "encode/wgEncodeGencodeExonSupport.h" #include "encode/wgEncodeGencodeUniProt.h" #include "encode/wgEncodeGencodeEntrezGene.h" #include "encode/wgEncodeGencodeAnnotationRemark.h" #include "encode/wgEncodeGencodeTranscriptionSupportLevel.h" /* * General notes: * - this will be integrated into hgGene at some point, however this was * done as part of hgc for timing reasons and to allow more time to design * the hgGene part. * - Tables below will output at least one row even if no data is available. * */ /* Various URLs and URL templates. At one time, these were in the ra file, * but that didn't prove that helpful and end up requiring updated the ra * files for every GENCODE version if a URL was added or changed. */ //FIXME: clean up RA files when CGIs no longer need them static char *ensemblTranscriptIdUrl = "http://www.ensembl.org/%s/Transcript/Summary?db=core;t=%s"; static char *ensemblGeneIdUrl = "http://www.ensembl.org/%s/Gene/Summary?db=core;t=%s"; static char *ensemblProteinIdUrl = "http://www.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s"; static char *ensemblSupportingEvidUrl = "http://www.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s"; static char *ensemblH37TranscriptIdUrl = "http://grch37.ensembl.org/%s/Transcript/Summary?db=core;t=%s"; static char *ensemblH37GeneIdUrl = "http://grch37.ensembl.org/%s/Gene/Summary?db=core;t=%s"; static char *ensemblH37ProteinIdUrl = "http://grch37.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s"; static char *ensemblH37SupportingEvidUrl = "http://grch37.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s"; static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/pages/biotypes.html"; static char *gencodeTagsUrl = "http://www.gencodegenes.org/pages/tags.html"; static char *yalePseudoUrl = "http://tables.pseudogene.org/%s"; static char *hgncUrl = " https://www.genenames.org/data/gene-symbol-report/#!/symbol/%s"; static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s"; -static char *apprisHomeUrl = "http://appris-tools.org/"; -static char *apprisGeneUrl = "http://appris-tools.org/#/database/id/%s/%s?sc=ensembl"; +static char *apprisHomeUrl = "https://appris.bioinfo.cnio.es/#/"; +static char *apprisGeneUrl = "https://appris.bioinfo.cnio.es/#/database/id/%s/%s?sc=ensembl"; static char* UNKNOWN = "unknown"; static char *getBaseAcc(char *acc, char *accBuf, int accBufSize) /* get the accession with version number dropped. */ { safecpy(accBuf, accBufSize, acc); char *dot = strchr(accBuf, '.'); if (dot != NULL) *dot = '\0'; return accBuf; } static bool haveGencodeTable(struct trackDb *tdb, char *tableBase) /* determine if table is in settings and thus in this gencode release */ { return trackDbSetting(tdb, tableBase) != NULL; } static char *getGencodeTable(struct trackDb *tdb, char *tableBase) /* get a table name from the settings. */ { return trackDbRequiredSetting(tdb, tableBase); } static char* getGencodeVersion(struct trackDb *tdb) /* get the GENCODE version or NULL for < V7, which is not supported * by this module. */ { return trackDbSetting(tdb, "wgEncodeGencodeVersion"); } static boolean isGrcH37Native(struct trackDb *tdb) /* Is this GENCODE GRCh37 native build, which requires a different Ensembl site. */ { // check for non-lifted GENCODE on GRCh37/hg19 if (sameString(database, "hg19")) return stringIn("lift37", getGencodeVersion(tdb)) == NULL; else return FALSE; } static boolean isFakeGeneSymbol(char* sym) /* is this a static gene symbol? */ { static const char *regexp = "^AC[0-9]+\\.[0-9]+$"; return regexMatch(sym, regexp); } static int transAnnoCmp(const void *va, const void *vb) /* Compare genePreds, sorting to keep select gene first. The only cases * that annotations will be duplicated is if they are in the PAR and thus * on different chroms. */ { const struct genePred *a = *((struct genePred **)va); const struct genePred *b = *((struct genePred **)vb); if (sameString(a->name, seqName)) return -1; else if (sameString(b->name, seqName)) return 1; else return strcmp(a->name, b->name); } static bool isProteinCodingTrans(struct wgEncodeGencodeAttrs *transAttrs) /* is a transcript protein coding? */ { return sameString(transAttrs->transcriptClass, "coding"); } static struct genePred *transAnnoLoad(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId) /* load the gencode annotations and sort the one corresponding to the one that was clicked on is * first. Should only have one or two. */ { // must check chrom due to PAR char where[256]; sqlSafef(where, sizeof(where), "(chrom = \"%s\") and (name = \"%s\")", seqName, gencodeId); struct genePred *transAnno = genePredReaderLoadQuery(conn, tdb->track, where); slSort(&transAnno, transAnnoCmp); return transAnno; } static struct wgEncodeGencodeAttrs *transAttrsLoad(struct trackDb *tdb, struct sqlConnection *conn, char *gencodeId) /* load the gencode attributes */ { char query[1024]; sqlSafef(query, sizeof(query), "select * from %s where transcriptId = \"%s\"", getGencodeTable(tdb, "wgEncodeGencodeAttrs"), gencodeId); struct sqlResult *sr = sqlGetResult(conn, query); char **row = sqlNextRow(sr); if (row == NULL) errAbort("gencode transcript %s not found in %s", gencodeId, getGencodeTable(tdb, "wgEncodeGencodeAttrs")); // older version don't have proteinId column. struct wgEncodeGencodeAttrs *transAttrs = wgEncodeGencodeAttrsLoad(row, sqlCountColumns(sr)); sqlFreeResult(&sr); return transAttrs; } static void getGeneBounds(struct trackDb *tdb, struct sqlConnection *conn, struct genePred *transAnno, int *geneChromStart, int *geneChromEnd) /* find bounds for the gene */ { // must check chrom due to PAR char where[256]; sqlSafef(where, sizeof(where), "(chrom = \"%s\") and (name2 = \"%s\")", seqName, transAnno->name2); struct genePred *geneAnnos = genePredReaderLoadQuery(conn, tdb->track, where); struct genePred *geneAnno; *geneChromStart = transAnno->txStart; *geneChromEnd = transAnno->txEnd; for (geneAnno = geneAnnos; geneAnno != NULL; geneAnno = geneAnno->next) { *geneChromStart = min(*geneChromStart, geneAnno->txStart); *geneChromEnd = max(*geneChromEnd, transAnno->txEnd); } genePredFreeList(&geneAnnos); } static void *metaDataLoad(struct trackDb *tdb, struct sqlConnection *conn, char *gencodeId, char *tableBase, char *keyCol, unsigned queryOpts, sqlLoadFunc loadFunc) /* load autoSql objects for gencode meta data. */ { return sqlQueryObjs(conn, loadFunc, queryOpts, "select * from %s where %s = \"%s\"", getGencodeTable(tdb, tableBase), keyCol, gencodeId); } static int uniProtDatasetCmp(const void *va, const void *vb) /* Compare wgEncodeGencodeUniProt by dateset */ { const struct wgEncodeGencodeUniProt *a = *((struct wgEncodeGencodeUniProt **)va); const struct wgEncodeGencodeUniProt *b = *((struct wgEncodeGencodeUniProt **)vb); return a->dataset - b->dataset; } static char *getMethodDesc(char *source) /* return the annotation method name based gene or transcript source */ { // sometimes backmap doesn't get every entry method entry mapped. Until that // is fixed, allow it to be missing // looks for being havana and/or ensembl // classifies other sources as automatic (mt_genbank_import ncrna ncrna_pseudogene) bool hasHav = containsStringNoCase(source, "havana") != NULL; bool hasEns = containsStringNoCase(source, "ensembl") != NULL; if (hasHav && hasEns) return "manual & automatic"; else if (hasHav) return "manual"; else return "automatic"; } static char *getLevelDesc(int level) /* return english description for level */ { if (level == 1) return "validated"; else if (level == 2) return "manual"; else if (level == 3) return "automatic"; else return "unknown"; } static char *getSupportLevelDesc(struct wgEncodeGencodeTranscriptionSupportLevel *tsl) /* return description for level */ { static char buf[32]; if ((tsl == NULL) || (tsl->level <= 0)) return "tslNA"; else { safef(buf, sizeof(buf), "tsl%d", tsl->level); return buf; } } static char* getScientificNameSym(void) /* get the scientific name of an organism in the form "Homo_sapiens" * WARNING: static return */ { static char sciNameSym[128]; char *sciName = hScientificName(database); if (sciName == NULL) errAbort("can't get scientific name for %s", database); safecpy(sciNameSym, sizeof(sciNameSym), sciName); freeMem(sciName); subChar(sciNameSym, ' ', '_'); return sciNameSym; } static void prExtIdAnchor(char *id, char *urlTemplate) /* if an id to an external database is not empty, print an HTML anchor to it */ { if (!isEmpty(id)) { char urlBuf[512]; safef(urlBuf, sizeof(urlBuf), urlTemplate, id); printf("%s", urlBuf, id); } } #if UNUSED static void prTdExtIdAnchor(char *id, char *urlTemplate) /* print a table data element with an anchor for a id */ { printf(""); prExtIdAnchor(id, urlTemplate); } #endif static void prEnsIdAnchor(char *id, char *urlTemplate) /* if an id to an ensembl database is not empty, print an HTML anchor to it */ { if (!isEmpty(id)) { char idBuf[64], urlBuf[512]; /* The lift37 releases append a '_N' modifier to the ids to indicate the are * mapped. N is an integer mapping version. Don't include this in link if it exists. */ safecpy(idBuf, sizeof(idBuf), id); char *p = strchr(idBuf, '_'); if (p != NULL) *p = '\0'; safef(urlBuf, sizeof(urlBuf), urlTemplate, getScientificNameSym(), idBuf); printf("%s", urlBuf, id); } } static void prTdEnsIdAnchor(char *id, char *urlTemplate) /* print a table data element with an ensembl anchor for a id */ { printf(""); prEnsIdAnchor(id, urlTemplate); } static void prApprisTdAnchor(char *id, char *label, char *urlTemplate) /* print a gene or transcript link to APPRIS */ { // under bar separated, lower case species name. char *speciesArg = hScientificName(database); toLowerN(speciesArg, strlen(speciesArg)); subChar(speciesArg, ' ', '_'); char accBuf[64]; printf("%s", label); freeMem(speciesArg); } static void writePosLink(char *chrom, int chromStart, int chromEnd) /* write link to a genomic position */ { printf("%s:%d-%d", hgTracksPathAndSettings(), database, chrom, chromStart, chromEnd, chrom, chromStart+1, chromEnd); } static bool geneHasApprisTranscripts(struct trackDb *tdb, struct sqlConnection *conn, struct wgEncodeGencodeAttrs *transAttrs) /* check if any transcript in a gene has an APPRIS tags */ { char query[1024]; sqlSafef(query, sizeof(query), "%s tag where tag.tag like 'appris%%' and transcriptId in " "(select transcriptId from %s where geneId='%s')", getGencodeTable(tdb, "wgEncodeGencodeTag"), getGencodeTable(tdb, "wgEncodeGencodeAttrs"), transAttrs->geneId); return sqlRowCount(conn, query) > 0; } static char* findApprisTag(struct wgEncodeGencodeTag *tags) /* search list for APPRIS tag or NULL */ { struct wgEncodeGencodeTag *tag; for (tag = tags; tag != NULL; tag = tag->next) { if (startsWith("appris_", tag->tag)) return tag->tag; } return NULL; } static char* apprisTagToSymbol(char* tag) /* convert APPRIS tag to the symbol use by APPRIS. WARNING static return. */ { // appris_principal_1 -> PRINCIPAL:1 static char buf[64]; safecpy(buf, sizeof(buf), tag+7); touppers(buf); subChar(buf, '_', ':'); return buf; } static void writeAprrisRow(struct sqlConnection *conn, struct trackDb *tdb, struct wgEncodeGencodeAttrs *transAttrs, struct wgEncodeGencodeTag *tags) /* write row for APPRIS */ { // Get labels to use. if transcript has an appris tag, then we link to the transcript. // if it doesn;t have a appris tag, we can still link to the gene if any of the transcripts // have appris tags char* apprisTag = findApprisTag(tags); char* transLabel = (apprisTag != NULL) ? apprisTagToSymbol(apprisTag) : NULL; char *geneLabel = ((apprisTag != NULL) || geneHasApprisTranscripts(tdb, conn, transAttrs)) ? transAttrs->geneName : NULL; // APPRIS gene and transcript now go to the same location printf("APPRIS\n", apprisHomeUrl); if (transLabel != NULL) prApprisTdAnchor(transAttrs->geneId, transLabel, apprisGeneUrl); else printf(" "); if (geneLabel != NULL) prApprisTdAnchor(transAttrs->geneId, geneLabel, apprisGeneUrl); else printf(" "); printf("\n"); } static void writeBasicInfoHtml(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId, struct genePred *transAnno, struct wgEncodeGencodeAttrs *transAttrs, int geneChromStart, int geneChromEnd, struct wgEncodeGencodeGeneSource *geneSource, struct wgEncodeGencodeTranscriptSource *transcriptSource, struct wgEncodeGencodeTag *tags, bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl) /* write basic HTML info for all genes */ { // basic gene and transcript information printf("\n"); printf("\n"); printf("\n"); printf("\n"); if (transAttrs->proteinId != NULL) { // protein id in database, maybe not for this transcript printf("\n"); } printf("\n"); printf("\n"); printf("\n", transAnno->strand); printf("\n", gencodeBiotypesUrl, transAttrs->transcriptType, transAttrs->geneType); printf("\n", getLevelDesc(transAttrs->level), transAttrs->level); char *transSrcDesc = (transcriptSource != NULL) ? getMethodDesc(transcriptSource->source) : UNKNOWN; char *geneSrcDesc = (geneSource != NULL) ? getMethodDesc(geneSource->source) : UNKNOWN; printf("\n", transSrcDesc, geneSrcDesc); if (haveTsl) { char *tslDesc = getSupportLevelDesc(tsl); printf("\n", tslDesc, tslDesc); } printf("\n"); printf("\n"); printf("\n"); if (isProteinCodingTrans(transAttrs)) writeAprrisRow(conn, tdb, transAttrs, tags); // FIXME: add sequence here?? printf("
TranscriptGene
GENCODE id"); prTdEnsIdAnchor(transAttrs->transcriptId, (isGrcH37Native(tdb) ? ensemblH37TranscriptIdUrl: ensemblTranscriptIdUrl)); prTdEnsIdAnchor(transAttrs->geneId, (isGrcH37Native(tdb) ? ensemblH37GeneIdUrl : ensemblGeneIdUrl)); printf("
Protein id"); if (strlen(transAttrs->proteinId) > 0) prTdEnsIdAnchor(transAttrs->proteinId, (isGrcH37Native(tdb) ? ensemblH37ProteinIdUrl: ensemblProteinIdUrl)); else printf(" "); printf(""); printf("
HAVANA manual id"); printf("%s", transAttrs->havanaTranscriptId); printf("%s", transAttrs->havanaGeneId); printf("
Position"); printf(""); writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd); printf(""); writePosLink(transAnno->chrom, geneChromStart, geneChromEnd); printf("
Strand%s
Biotype%s%s
Annotation Level%s (%d)
Annotation Method%s%s
Transcription Support Level%s
HGNC gene symbol"); if (!isFakeGeneSymbol(transAttrs->geneName)) prExtIdAnchor(transAttrs->geneName, hgncUrl); printf("
CCDS"); if (!isEmpty(transAttrs->ccdsId)) { printf("ccdsId); printf("\" target=_blank>%s", transAttrs->ccdsId); } printf("
GeneCards"); if (!isFakeGeneSymbol(transAttrs->geneName)) prExtIdAnchor(transAttrs->geneName, geneCardsUrl); printf("
\n"); } static void writeSequenceHtml(struct trackDb *tdb, char *gencodeId, struct genePred *transAnno) /* write links to get sequences */ { printf("\n"); printf("\n"); printf("\n"); if (transAnno->cdsStart < transAnno->cdsEnd) { // protein coding printf("\n"); } else { // non-protein coding printf("\n"); } printf("
Sequences
"); hgcAnchorSomewhere("htcGeneMrna", gencodeId, tdb->table, seqName); printf("Predicted mRNA"); printf(""); hgcAnchorSomewhere("htcTranslatedPredMRna", gencodeId, "translate", seqName); printf("Predicted protein
"); hgcAnchorSomewhere("htcGeneMrna", gencodeId, tdb->table, seqName); printf("Predicted mRNA
\n"); } static void writeAnnotationRemarkHtml(struct wgEncodeGencodeAnnotationRemark *remarks) /* write HTML links to remarks */ { printf("\n"); printf("\n"); printf("\n"); // make sure at least one empty row in printed if (remarks == NULL) printf("\n"); struct wgEncodeGencodeAnnotationRemark *remark; for (remark = remarks; remark != NULL; remark = remark->next) { char *encRemark = htmlEncode(remark->remark); printf("\n", encRemark); freeMem(encRemark); } printf("
Annotation Remarks
%s
\n"); } static void writePdbLinkHtml(struct wgEncodeGencodePdb *pdbs) /* write HTML links to PDB */ { printf("\n"); printf("\n"); printf("\n"); struct wgEncodeGencodePdb *pdb = pdbs; int i, rowCnt = 0; while ((pdb != NULL) || (rowCnt == 0)) { printf(""); for (i = 0; i < 3; i++) { printf("\n"); rowCnt++; } printf("
Protein Data Bank
"); if (pdb != NULL) { printf("%s", pdb->pdbId, pdb->pdbId); pdb = pdb->next; } } printf("
\n"); } static void writePubMedEntry(struct wgEncodeGencodePubMed *pubMed) /* write HTML table entry for a pubMed */ { printf("pubMedId); printf("\" target=_blank>%d", pubMed->pubMedId); } static void writePubMedLinkHtml(struct wgEncodeGencodePubMed *pubMeds) /* write HTML links to PubMed */ { printf("\n"); printf("\n"); printf("\n"); struct wgEncodeGencodePubMed *pubMed = pubMeds; int i, rowCnt = 0; while ((pubMed != NULL) || (rowCnt == 0)) { printf(""); for (i = 0; i < 3; i++) { if (pubMed != NULL) { writePubMedEntry(pubMed); pubMed = pubMed->next; } else printf("\n"); rowCnt++; } printf("
PubMed
"); } printf("
\n"); } static void writeEntrezGeneEntry(struct wgEncodeGencodeEntrezGene *entrezGene) /* write HTML table entry for a entrezGene */ { printf("entrezGeneId); printf("\" target=_blank>%d", entrezGene->entrezGeneId); } static void writeEntrezGeneLinkHtml(struct wgEncodeGencodeEntrezGene *entrezGenes) /* write HTML links to EntrezGene */ { printf("\n"); printf("\n"); printf("\n"); struct wgEncodeGencodeEntrezGene *entrezGene = entrezGenes; int i, rowCnt = 0; while ((entrezGene != NULL) || (rowCnt == 0)) { printf(""); for (i = 0; i < 3; i++) { if (entrezGene != NULL) { writeEntrezGeneEntry(entrezGene); entrezGene = entrezGene->next; } else printf("\n"); rowCnt++; } printf("
Entrez Gene
"); } printf("
\n"); } static void writeRefSeqEntry(struct wgEncodeGencodeRefSeq *refSeq) /* write HTML table entry for a RefSeq */ { printf("rnaAcc); printf("\" target=_blank>%s", refSeq->rnaAcc); printf(""); if (!isEmpty(refSeq->pepAcc)) { printf("pepAcc); printf("\" target=_blank>%s", refSeq->pepAcc); } } static void writeRefSeqLinkHtml(struct wgEncodeGencodeRefSeq *refSeqs) /* write HTML links to RefSeq */ { printf("\n"); printf("\n"); printf("\n"); printf("\n"); struct wgEncodeGencodeRefSeq *refSeq = refSeqs; int rowCnt = 0; while ((refSeq != NULL) || (rowCnt == 0)) { printf(""); if (refSeq != NULL) { writeRefSeqEntry(refSeq); refSeq = refSeq->next; } else printf("\n"); rowCnt++; } printf("
RefSeq
RNAProtein
"); printf("
\n"); } static void writeUniProtEntry(struct wgEncodeGencodeUniProt *uniProt) /* write HTML table entry for a UniProt */ { printf("%s", (uniProt->dataset == wgEncodeGencodeUniProtSwissProt) ? "SwissProt" : "TrEMBL"); printf("acc); printf("\" target=_blank>%s", uniProt->acc); printf("name); printf("\" target=_blank>%s", uniProt->name); } static void writeUniProtLinkHtml(struct wgEncodeGencodeUniProt *uniProts) /* write HTML links to UniProt */ { printf("\n"); printf("\n"); printf("\n"); printf("\n"); int i, rowCnt = 0; struct wgEncodeGencodeUniProt *uniProt = uniProts; while ((uniProt != NULL) || (rowCnt == 0)) { printf(""); for (i = 0; i < 2; i++) { if (uniProt != NULL) { writeUniProtEntry(uniProt); uniProt = uniProt->next; } else printf("\n"); rowCnt++; } printf("
UniProt
Data setAccessionNameData setAccessionName
"); } printf("
\n"); } struct supportEvid /* temporary struct for subset of supporting information displayed */ { struct supportEvid *next; char *seqId; /* sequence id (memory not owned) */ char *seqSrc; /* evidence source database (memory not owned) */ }; static int supportEvidCmp(const void *va, const void *vb) /* Compare two supportEvid objects. */ { const struct supportEvid *a = *((struct supportEvid **)va); const struct supportEvid *b = *((struct supportEvid **)vb); int diff = strcmp(a->seqSrc, b->seqSrc); if (diff == 0) diff = strcmp(a->seqId, b->seqId); return diff; } static void transcriptSupportToSupportEvid(struct supportEvid **supportEvids, struct wgEncodeGencodeTranscriptSupport *transcriptSupports) /* convert transcriptSupport to common structure */ { struct wgEncodeGencodeTranscriptSupport *transcriptSupport; for (transcriptSupport = transcriptSupports; transcriptSupport != NULL; transcriptSupport = transcriptSupport->next) { struct supportEvid *supportEvid; AllocVar(supportEvid); supportEvid->seqId = transcriptSupport->seqId; supportEvid->seqSrc = transcriptSupport->seqSrc; slAddHead(supportEvids, supportEvid); } } static void exonSupportToSupportEvid(struct supportEvid **supportEvids, struct wgEncodeGencodeExonSupport *exonSupports) /* convert exonSupport to common structure */ { struct wgEncodeGencodeExonSupport *exonSupport; for (exonSupport = exonSupports; exonSupport != NULL; exonSupport = exonSupport->next) { struct supportEvid *supportEvid; AllocVar(supportEvid); supportEvid->seqId = exonSupport->seqId; supportEvid->seqSrc = exonSupport->seqSrc; slAddHead(supportEvids, supportEvid); } } static void sortUniqSupportExidence(struct supportEvid **supportEvids) /* sort support evidence and make unique */ { struct supportEvid *supportEvid, *supportEvids2 = NULL; slSort(supportEvids, supportEvidCmp); // make unique while ((supportEvid = slPopHead(supportEvids)) != NULL) { if ((supportEvids2 == NULL) || (supportEvidCmp(&supportEvid, &supportEvids2) != 0)) slAddHead(&supportEvids2, supportEvid); else freeMem(supportEvid); } slReverse(&supportEvids2); *supportEvids = supportEvids2; } static struct supportEvid *loadSupportEvid(struct wgEncodeGencodeTranscriptSupport *transcriptSupports, struct wgEncodeGencodeExonSupport *exonSupports) /* load transcript and supporting evidence into a common structure */ { struct supportEvid *supportEvids = NULL; transcriptSupportToSupportEvid(&supportEvids, transcriptSupports); exonSupportToSupportEvid(&supportEvids, exonSupports); sortUniqSupportExidence(&supportEvids); return supportEvids; } static void writeSupportEvidenceEntry(struct supportEvid *supportEvid) /* write HTML table entry for a supporting evidence */ { // FIXME: should link to sources when possible printf("%s", supportEvid->seqSrc); printf("%s", supportEvid->seqId); } static void writeSupportingEvidenceLinkHtml(struct trackDb *tdb, char *gencodeId, struct wgEncodeGencodeTranscriptSupport *transcriptSupports, struct wgEncodeGencodeExonSupport *exonSupports) /* write HTML links to supporting evidence */ { struct supportEvid *supportEvids = loadSupportEvid(transcriptSupports, exonSupports); printf("\n"); printf("\n"); printf("\n"); printf("\n"); struct supportEvid *supportEvid = supportEvids; int i, rowCnt = 0; while ((supportEvid != NULL) || (rowCnt == 0)) { printf(""); for (i = 0; i < 2; i++) { if (supportEvid != NULL) { writeSupportEvidenceEntry(supportEvid); supportEvid = supportEvid->next; } else printf("\n"); rowCnt++; } printf("
Supporting Evidence ("); prEnsIdAnchor(gencodeId, (isGrcH37Native(tdb) ? ensemblH37SupportingEvidUrl: ensemblSupportingEvidUrl)); printf(")
SourceSequenceSourceSequence
"); } printf("
\n"); slFreeList(&supportEvids); } static void writeTagEntry(struct wgEncodeGencodeTag *tag) /* write HTML table entry for a Tag */ { // FIXME: link to help once gencodegenes.org has it printf("%s", tag->tag); } static void writeTagLinkHtml(struct wgEncodeGencodeTag *tags) /* write HTML links to Tag */ { printf("\n"); printf("\n", gencodeTagsUrl); printf("\n"); int i, rowCnt = 0; struct wgEncodeGencodeTag *tag = tags; while ((tag != NULL) || (rowCnt == 0)) { printf(""); for (i = 0; i < 3; i++) { if (tag != NULL) { writeTagEntry(tag); tag = tag->next; } else printf("\n"); rowCnt++; } printf("
Tags
"); } printf("
\n"); } static void doGencodeGeneTrack(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *transAnno) /* Process click on a GENCODE gene annotation track. */ { struct wgEncodeGencodeAttrs *transAttrs = transAttrsLoad(tdb, conn, gencodeId); char *gencodeGeneId = transAttrs->geneId; struct wgEncodeGencodeGeneSource *geneSource = metaDataLoad(tdb, conn, gencodeGeneId, "wgEncodeGencodeGeneSource", "geneId", sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeGeneSourceLoad); struct wgEncodeGencodeTranscriptSource *transcriptSource = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptSource", "transcriptId", sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeTranscriptSourceLoad); bool haveRemarks = haveGencodeTable(tdb, "wgEncodeGencodeAnnotationRemark"); struct wgEncodeGencodeAnnotationRemark *remarks = haveRemarks ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeAnnotationRemark", "transcriptId", 0, (sqlLoadFunc)wgEncodeGencodeAnnotationRemarkLoad) : NULL; struct wgEncodeGencodePdb *pdbs = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodePdb", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodePdbLoad); struct wgEncodeGencodePubMed *pubMeds = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodePubMed", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodePubMedLoad); bool haveEntrezGene = haveGencodeTable(tdb, "wgEncodeGencodeEntrezGene"); struct wgEncodeGencodeEntrezGene *entrezGenes = haveEntrezGene ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeEntrezGene", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeEntrezGeneLoad) : NULL; struct wgEncodeGencodeRefSeq *refSeqs = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeRefSeq", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeRefSeqLoad); struct wgEncodeGencodeTag *tags = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTag", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeTagLoad); struct wgEncodeGencodeTranscriptSupport *transcriptSupports = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeTranscriptSupportLoad); struct wgEncodeGencodeExonSupport *exonSupports = NULL; // exonSupports not available in back mapped GENCODE releases if (haveGencodeTable(tdb, "wgEncodeGencodeExonSupport")) exonSupports = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeExonSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeExonSupportLoad); struct wgEncodeGencodeUniProt *uniProts = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeUniProt", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeUniProtLoad); slSort(&uniProts, uniProtDatasetCmp); bool haveTsl = haveGencodeTable(tdb, "wgEncodeGencodeTranscriptionSupportLevel"); struct wgEncodeGencodeTranscriptionSupportLevel *tsl = haveTsl ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptionSupportLevel", "transcriptId", 0, (sqlLoadFunc)wgEncodeGencodeTranscriptionSupportLevelLoad) : NULL; int geneChromStart, geneChromEnd; getGeneBounds(tdb, conn, transAnno, &geneChromStart, &geneChromEnd); char title[256]; safef(title, sizeof(title), "GENCODE V%s Transcript Annotation", getGencodeVersion(tdb)); char header[256]; safef(header, sizeof(header), "%s %s", title, gencodeId); if (!isEmpty(transAttrs->geneName)) safef(header, sizeof(header), "%s %s (%s)", title, gencodeId, transAttrs->geneName); else safef(header, sizeof(header), "%s %s", title, gencodeId); cartWebStart(cart, database, "%s", header); printf("

%s

\n", header); writeBasicInfoHtml(conn, tdb, gencodeId, transAnno, transAttrs, geneChromStart, geneChromEnd, geneSource, transcriptSource, tags, haveTsl, tsl); writeTagLinkHtml(tags); writeSequenceHtml(tdb, gencodeId, transAnno); if (haveRemarks) writeAnnotationRemarkHtml(remarks); if (isProteinCodingTrans(transAttrs)) writePdbLinkHtml(pdbs); writePubMedLinkHtml(pubMeds); if (haveEntrezGene) writeEntrezGeneLinkHtml(entrezGenes); writeRefSeqLinkHtml(refSeqs); if (isProteinCodingTrans(transAttrs)) writeUniProtLinkHtml(uniProts); writeSupportingEvidenceLinkHtml(tdb, gencodeId, transcriptSupports, exonSupports); wgEncodeGencodeAttrsFree(&transAttrs); wgEncodeGencodeAnnotationRemarkFreeList(&remarks); wgEncodeGencodeGeneSourceFreeList(&geneSource); wgEncodeGencodeTranscriptSourceFreeList(&transcriptSource); wgEncodeGencodePdbFreeList(&pdbs); wgEncodeGencodePubMedFreeList(&pubMeds); wgEncodeGencodeEntrezGeneFreeList(&entrezGenes); wgEncodeGencodeRefSeqFreeList(&refSeqs); wgEncodeGencodeTranscriptSupportFreeList(&transcriptSupports); wgEncodeGencodeExonSupportFreeList(&exonSupports); wgEncodeGencodeUniProtFreeList(&uniProts); wgEncodeGencodeTranscriptionSupportLevelFreeList(&tsl); } static void doGencodeGene2WayPseudo(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *pseudoAnno) /* Process click on a GENCODE two-way pseudogene annotation track. */ { char header[256]; safef(header, sizeof(header), "GENCODE 2-way consensus pseudogene %s", gencodeId); cartWebStart(cart, database, "%s", header); printf("

%s

\n", header); printf("Yale id: "); prExtIdAnchor(gencodeId, yalePseudoUrl); printf("
"); printPos(pseudoAnno->chrom, pseudoAnno->txStart, pseudoAnno->txEnd, pseudoAnno->strand, FALSE, NULL); } static void doGencodeGenePolyA(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *polyAAnno) /* Process click on a GENCODE poly-A annotation track. */ { char header[256]; safef(header, sizeof(header), "GENCODE PolyA Annotation %s (%s)", polyAAnno->name2, gencodeId); cartWebStart(cart, database, "%s", header); printf("

%s

\n", header); printf("Annotation id: %s
", gencodeId); printf("Annotation Type: %s
",polyAAnno->name2); printPos(polyAAnno->chrom, polyAAnno->txStart, polyAAnno->txEnd, polyAAnno->strand, FALSE, NULL); } void doGencodeGene(struct trackDb *tdb, char *gencodeId) /* Process click on a GENCODE annotation. */ { struct sqlConnection *conn = hAllocConn(database); struct genePred *anno = transAnnoLoad(conn, tdb, gencodeId); if (startsWith("wgEncodeGencodeBasic", tdb->track) || startsWith("wgEncodeGencodeComp", tdb->track) || startsWith("wgEncodeGencodePseudoGene", tdb->track)) doGencodeGeneTrack(tdb, gencodeId, conn, anno); else if (startsWith("wgEncodeGencode2wayConsPseudo", tdb->track)) doGencodeGene2WayPseudo(tdb, gencodeId, conn, anno); else if (startsWith("wgEncodeGencodePolya", tdb->track)) doGencodeGenePolyA(tdb, gencodeId, conn, anno); else errAbort("doGencodeGene: track not handled: \"%s\"", tdb->track); htmlHorizontalLine(); printTrackHtml(tdb); genePredFreeList(&anno); hFreeConn(&conn); } bool isNewGencodeGene(struct trackDb *tdb) /* is this a new-style gencode (>= V7) track, as indicated by * the presence of the wgEncodeGencodeVersion setting */ { return getGencodeVersion(tdb) != NULL; }