cb2705847f5359843f2b0c975b66692fc3af28ba markd Sat Apr 4 15:10:01 2015 -0700 import of GENCODE V22 diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c index c1f91a6..8e0bac0 100644 --- src/hg/hgc/gencodeClick.c +++ src/hg/hgc/gencodeClick.c @@ -9,30 +9,31 @@ #include "genePred.h" #include "genePredReader.h" #include "ensFace.h" #include "htmshell.h" #include "jksql.h" #include "encode/wgEncodeGencodeAttrs.h" #include "encode/wgEncodeGencodeGeneSource.h" #include "encode/wgEncodeGencodePdb.h" #include "encode/wgEncodeGencodePubMed.h" #include "encode/wgEncodeGencodeRefSeq.h" #include "encode/wgEncodeGencodeTag.h" #include "encode/wgEncodeGencodeTranscriptSource.h" #include "encode/wgEncodeGencodeTranscriptSupport.h" #include "encode/wgEncodeGencodeExonSupport.h" #include "encode/wgEncodeGencodeUniProt.h" +#include "encode/wgEncodeGencodeEntrezGene.h" #include "encode/wgEncodeGencodeAnnotationRemark.h" #include "encode/wgEncodeGencodeTranscriptionSupportLevel.h" /* * General notes: * - this will be integrated into hgGene at some point, however this was * done as part of hgc for timing reasons and to allow more time to design * the hgGene part. * - Tables below will output at least one row even if no data is available. * */ /* Various URLs and URL templates. At one time, these were in the ra file, * but that didn't prove that helpful and end up requiring updated the ra * files for every GENCODE version if a URL was added or changed. */ @@ -49,30 +50,36 @@ static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s"; static char *apprisHomeUrl = "http://appris.bioinfo.cnio.es/"; static char *apprisGeneUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Gene_Id&specie=%s"; static char *apprisTranscriptUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Transcript_Id&specie=%s"; static char *getBaseAcc(char *acc, char *accBuf, int accBufSize) /* get the accession with version number dropped. */ { safecpy(accBuf, accBufSize, acc); char *dot = strchr(accBuf, '.'); if (dot != NULL) *dot = '\0'; return accBuf; } +static bool haveGencodeTable(struct trackDb *tdb, char *tableBase) +/* determine if table is in settings and thus in this gencode release */ +{ +return trackDbSetting(tdb, tableBase) != NULL; +} + static char *getGencodeTable(struct trackDb *tdb, char *tableBase) /* get a table name from the settings. */ { return trackDbRequiredSetting(tdb, tableBase); } static int transAnnoCmp(const void *va, const void *vb) /* Compare genePreds, sorting to keep select gene first. The only cases * that annotations will be duplicated is if they are in the PAR and thus * on different chroms. */ { const struct genePred *a = *((struct genePred **)va); const struct genePred *b = *((struct genePred **)vb); if (sameString(a->name, seqName)) return -1; @@ -431,30 +438,65 @@ { if (pubMed != NULL) { writePubMedEntry(pubMed); pubMed = pubMed->next; } else printf(""); } printf("\n"); rowCnt++; } printf("\n"); } +static void writeEntrezGeneEntry(struct wgEncodeGencodeEntrezGene *entrezGene) +/* write HTML table entry for a entrezGene */ +{ +printf("entrezGeneId); +printf("\" target=_blank>%d", entrezGene->entrezGeneId); +} + +static void writeEntrezGeneLinkHtml(struct wgEncodeGencodeEntrezGene *entrezGenes) +/* write HTML links to EntrezGene */ +{ +printf("\n"); +printf("\n"); +printf("\n"); +struct wgEncodeGencodeEntrezGene *entrezGene = entrezGenes; +int i, rowCnt = 0; +while ((entrezGene != NULL) || (rowCnt == 0)) + { + printf(""); + for (i = 0; i < 3; i++) + { + if (entrezGene != NULL) + { + writeEntrezGeneEntry(entrezGene); + entrezGene = entrezGene->next; + } + else + printf("\n"); + rowCnt++; + } +printf("
Entrez Gene
"); + } + printf("
\n"); +} + static void writeRefSeqEntry(struct wgEncodeGencodeRefSeq *refSeq) /* write HTML table entry for a RefSeq */ { printf("rnaAcc); printf("\" target=_blank>%s", refSeq->rnaAcc); printf(""); if (!isEmpty(refSeq->pepAcc)) { printf("pepAcc); printf("\" target=_blank>%s", refSeq->pepAcc); } } @@ -669,75 +711,80 @@ printf(""); } printf("\n"); rowCnt++; } printf("\n"); } static void doGencodeGeneTrack(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *transAnno) /* Process click on a GENCODE gene annotation track. */ { struct wgEncodeGencodeAttrs *transAttrs = transAttrsLoad(tdb, conn, gencodeId); char *gencodeGeneId = transAttrs->geneId; struct wgEncodeGencodeGeneSource *geneSource = metaDataLoad(tdb, conn, gencodeGeneId, "wgEncodeGencodeGeneSource", "geneId", sqlQueryMust|sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeGeneSourceLoad); struct wgEncodeGencodeTranscriptSource *transcriptSource = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptSource", "transcriptId", sqlQueryMust|sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeTranscriptSourceLoad); -bool haveRemarks = (trackDbSetting(tdb, "wgEncodeGencodeAnnotationRemark") != NULL); +bool haveRemarks = haveGencodeTable(tdb, "wgEncodeGencodeAnnotationRemark"); struct wgEncodeGencodeAnnotationRemark *remarks = haveRemarks ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeAnnotationRemark", "transcriptId", 0, (sqlLoadFunc)wgEncodeGencodeAnnotationRemarkLoad) : NULL; struct wgEncodeGencodePdb *pdbs = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodePdb", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodePdbLoad); struct wgEncodeGencodePubMed *pubMeds = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodePubMed", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodePubMedLoad); +bool haveEntrezGene = haveGencodeTable(tdb, "wgEncodeGencodeEntrezGene"); +struct wgEncodeGencodeEntrezGene *entrezGenes = haveEntrezGene ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeEntrezGene", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeEntrezGeneLoad) : NULL; struct wgEncodeGencodeRefSeq *refSeqs = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeRefSeq", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeRefSeqLoad); struct wgEncodeGencodeTag *tags = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTag", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeTagLoad); struct wgEncodeGencodeTranscriptSupport *transcriptSupports = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeTranscriptSupportLoad); struct wgEncodeGencodeExonSupport *exonSupports = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeExonSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeExonSupportLoad); struct wgEncodeGencodeUniProt *uniProts = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeUniProt", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeUniProtLoad); slSort(&uniProts, uniProtDatasetCmp); -bool haveTsl = (trackDbSetting(tdb, "wgEncodeGencodeTranscriptionSupportLevel") != NULL); +bool haveTsl = haveGencodeTable(tdb, "wgEncodeGencodeTranscriptionSupportLevel"); struct wgEncodeGencodeTranscriptionSupportLevel *tsl = haveTsl ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptionSupportLevel", "transcriptId", 0, (sqlLoadFunc)wgEncodeGencodeTranscriptionSupportLevelLoad) : NULL; int geneChromStart, geneChromEnd; getGeneBounds(tdb, conn, transAnno, &geneChromStart, &geneChromEnd); char *title = "GENCODE Transcript Annotation"; char header[256]; safef(header, sizeof(header), "%s %s", title, gencodeId); if (!isEmpty(transAttrs->geneName)) safef(header, sizeof(header), "%s %s (%s)", title, gencodeId, transAttrs->geneName); else safef(header, sizeof(header), "%s %s", title, gencodeId); cartWebStart(cart, database, "%s", header); printf("

%s

\n", header); writeBasicInfoHtml(conn, tdb, gencodeId, transAnno, transAttrs, geneChromStart, geneChromEnd, geneSource, transcriptSource, haveTsl, tsl); writeTagLinkHtml(tags); writeSequenceHtml(tdb, gencodeId, transAnno); if (haveRemarks) writeAnnotationRemarkHtml(remarks); if (isProteinCodingTrans(transAttrs)) writePdbLinkHtml(pdbs); writePubMedLinkHtml(pubMeds); +if (haveEntrezGene) + writeEntrezGeneLinkHtml(entrezGenes); writeRefSeqLinkHtml(refSeqs); if (isProteinCodingTrans(transAttrs)) writeUniProtLinkHtml(uniProts); writeSupportingEvidenceLinkHtml(gencodeId, transcriptSupports, exonSupports); wgEncodeGencodeAttrsFree(&transAttrs); wgEncodeGencodeAnnotationRemarkFreeList(&remarks); wgEncodeGencodeGeneSourceFreeList(&geneSource); wgEncodeGencodeTranscriptSourceFreeList(&transcriptSource); wgEncodeGencodePdbFreeList(&pdbs); wgEncodeGencodePubMedFreeList(&pubMeds); +wgEncodeGencodeEntrezGeneFreeList(&entrezGenes); wgEncodeGencodeRefSeqFreeList(&refSeqs); wgEncodeGencodeTranscriptSupportFreeList(&transcriptSupports); wgEncodeGencodeExonSupportFreeList(&exonSupports); wgEncodeGencodeUniProtFreeList(&uniProts); wgEncodeGencodeTranscriptionSupportLevelFreeList(&tsl); } static void doGencodeGene2WayPseudo(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *pseudoAnno) /* Process click on a GENCODE two-way pseudogene annotation track. */ { char header[256]; safef(header, sizeof(header), "GENCODE 2-way consensus pseudogene %s", gencodeId); cartWebStart(cart, database, "%s", header); printf("

%s

\n", header); printf("Yale id: ");