72f056ceabce7b9778635a48def6ee9be5b29ebe markd Fri Sep 21 03:05:03 2012 -0700 link GENCODE to APPRIS and HGNC diff --git src/hg/hgc/ccdsClick.c src/hg/hgc/ccdsClick.c index f5fce46..a33d3a0 100644 --- src/hg/hgc/ccdsClick.c +++ src/hg/hgc/ccdsClick.c @@ -1,428 +1,433 @@ /* ccdsClick - click handling for CCDS track and related functions */ #include "common.h" #include "hgc.h" #include "ccdsClick.h" #include "ccdsInfo.h" #include "ccdsNotes.h" #include "ccdsGeneMap.h" #include "geneSimilarities.h" #include "genbank.h" #include "genePred.h" #include "genePredReader.h" #include "ensFace.h" #include "mgcClick.h" #include "htmshell.h" -struct ccdsInfo *getCcdsUrlForSrcDb(struct sqlConnection *conn, char *acc) +static struct ccdsInfo *getCcdsInfoForSrcDb(struct sqlConnection *conn, char *acc) /* Get a ccdsInfo object for a RefSeq, ensembl, or vega gene, if it * exists, otherwise return NULL */ { if (sqlTableExists(conn, "ccdsInfo")) return ccdsInfoSelectByMrna(conn, acc); else return NULL; } -void printCcdsUrlForSrcDb(struct sqlConnection *conn, struct ccdsInfo *ccdsInfo) -/* Print out CCDS URL for a refseq, ensembl, or vega gene, if it +void printCcdsExtUrl(char *ccdsId) +/* Print out URL to link to CCDS database at NCBI */ +{ +printf("http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&BUILDS=ALLBUILDS&DATA=%s", ccdsId); +} + +static void printCcdsUrlForSrcDb(struct sqlConnection *conn, struct ccdsInfo *ccdsInfo) +/* Print out CCDS hgc URL for a refseq, ensembl, or vega gene, if it * exists. */ { printf("../cgi-bin/hgc?%s&g=ccdsGene&i=%s&c=%s&o=%d&l=%d&r=%d&db=%s", cartSidUrlString(cart), ccdsInfo->ccds, seqName, winStart, winStart, winEnd, database); } void printCcdsForSrcDb(struct sqlConnection *conn, char *acc) -/* Print out CCDS link for a refseq, ensembl, or vega gene, if it +/* Print out CCDS hgc link for a refseq, ensembl, or vega gene, if it * exists. */ { -struct ccdsInfo *ccdsInfo = getCcdsUrlForSrcDb(conn, acc);; +struct ccdsInfo *ccdsInfo = getCcdsInfoForSrcDb(conn, acc);; if (ccdsInfo != NULL) { printf("<B>CCDS:</B> <A href=\""); printCcdsUrlForSrcDb(conn, ccdsInfo); printf("\">%s</A><BR>", ccdsInfo->ccds); } } struct ccdsGeneMap *getCcdsGenesForMappedGene(struct sqlConnection *conn, char *acc, char *mapTable) /* get a list of ccds genes associated with a current and window from a * mapping table, or NULL */ { struct ccdsGeneMap *ccdsGenes = NULL; if (sqlTableExists(conn, mapTable) && sqlTableExists(conn, "ccdsInfo")) ccdsGenes = ccdsGeneMapSelectByGeneOver(conn, mapTable, acc, seqName, winStart, winEnd, 0.0); slSort(&ccdsGenes, ccdsGeneMapCcdsIdCmp); return ccdsGenes; } - void printCcdsUrl(struct sqlConnection *conn, char *ccdsId) -/* Print out CCDS url for a gene */ +/* Print out CCDS hgc URL for a gene */ { printf("../cgi-bin/hgc?%s&g=ccdsGene&i=%s&c=%s&o=%d&l=%d&r=%d&db=%s", cartSidUrlString(cart), ccdsId, seqName, winStart, winStart, winEnd, database); } void printCcdsForMappedGene(struct sqlConnection *conn, char *acc, char *mapTable) /* Print out CCDS links for a gene mapped via a cddsGeneMap table */ { struct ccdsGeneMap *ccdsGenes = getCcdsGenesForMappedGene(conn, acc, mapTable); if (ccdsGenes != NULL) { printf("<B>CCDS:</B> "); struct ccdsGeneMap *gene; for (gene = ccdsGenes; gene != NULL; gene = gene->next) { if (gene != ccdsGenes) printf(", "); printf("<A href=\""); printCcdsUrl(conn, gene->ccdsId); printf("\">%s</A>", gene->ccdsId); } printf("<BR>\n"); } } static char *getCcdsGeneSymbol(struct sqlConnection *conn, struct ccdsInfo *rsCcds) /* get the gene name for a CCDS */ { struct ccdsInfo *ci; char accBuf[GENBANK_ACC_BUFSZ], query[256]; char *geneSym = NULL; for (ci = rsCcds; ci != NULL; ci = ci->next) { safef(query, sizeof(query), "select name from refLink where mrnaAcc='%s'", genbankDropVer(accBuf, ci->mrnaAcc)); geneSym = sqlQuickString(conn, query); if (geneSym != NULL) return geneSym; } return NULL; } static char *getCcdsRefSeqSummary(struct sqlConnection *conn, struct ccdsInfo *rsCcds) /* get the refseq summary for a CCDS */ { struct ccdsInfo *ci; char accBuf[GENBANK_ACC_BUFSZ]; char *summary = NULL; for (ci = rsCcds; ci != NULL; ci = ci->next) { summary = getRefSeqSummary(conn, genbankDropVer(accBuf, ci->mrnaAcc)); if (summary != NULL) return summary; } return NULL; } static struct ccdsGeneMap *ccdsGetGenes(struct sqlConnection *conn, char *mapTable, char *ccdsId) /* Get ccdsGeneMap objects for a ccdsId. Returns only * the best overlapping ones (ones with the same cdsSimilariy as * the highest cdsSimilariy. */ { struct ccdsGeneMap *ccdsGenes = NULL, *bestCcdsGenes = NULL, *ccdsGene; /* filter by chrom due to PAR */ ccdsGenes = ccdsGeneMapSelectByCcds(conn, mapTable, ccdsId, seqName, 0.0); if (ccdsGenes == NULL) return NULL; bestCcdsGenes = slPopHead(&ccdsGenes); /* seed with first */ while ((ccdsGene = slPopHead(&ccdsGenes)) != NULL) { if (ccdsGene->cdsSimilarity == bestCcdsGenes->cdsSimilarity) { /* same as best, keep */ slAddHead(&bestCcdsGenes, ccdsGene); } else if (ccdsGene->cdsSimilarity > bestCcdsGenes->cdsSimilarity) { /* new best, replace list */ ccdsGeneMapFreeList(&bestCcdsGenes); bestCcdsGenes = ccdsGene; } else { /* worse, drop */ ccdsGeneMapFree(&ccdsGene); } } /* only keep one of each gene */ slUniqify(&bestCcdsGenes, ccdsGeneMapGeneIdCmp, ccdsGeneMapFree); return bestCcdsGenes; } static void printCcdsHgGeneUrl(struct sqlConnection *conn, char *ccdsId, char* kgId) /* output a URL to hgGene for a ccds */ { char where[128]; struct genePredReader *gpr; struct genePred *ccdsGene = NULL, *kgGene = NULL; /* get ccds genePred to get location */ safef(where, sizeof(where), "chrom = '%s' and name = '%s'", seqName, ccdsId); gpr = genePredReaderQuery(conn, "ccdsGene", where); ccdsGene = genePredReaderAll(gpr); genePredReaderFree(&gpr); if (ccdsGene == NULL) errAbort("%s not found in ccdsGene table for chrom %s", ccdsId, seqName); else if (ccdsGene->next != NULL) errAbort("multiple %s rows found in ccdsGene table for chrom %s", ccdsId, seqName); /* get KG genePred, as need exact location for link */ safef(where, sizeof(where), "name = '%s' and strand = '%s'", kgId, ccdsGene->strand); gpr = genePredReaderRangeQuery(conn, "knownGene", seqName, ccdsGene->txStart, ccdsGene->txEnd, where); kgGene = genePredReaderAll(gpr); genePredReaderFree(&gpr); if (kgGene == NULL) errAbort("%s not found in knownGene table for chrom %s", kgId, seqName); else if (kgGene->next != NULL) errAbort("multiple %s rows found in knownGene table for chrom %s", kgId, seqName); printf("../cgi-bin/hgGene?%s&%s=%s&%s=%s&%s=%s&%s=%d&%s=%d", cartSidUrlString(cart), "db", database, "hgg_gene", kgId, "hgg_chrom", seqName, "hgg_start", kgGene->txStart, "hgg_end", kgGene->txEnd); genePredFree(&ccdsGene); genePredFree(&kgGene); } static void ccdsNcbiRows(char *ccdsId, struct ccdsInfo *rsCcds) /* output RefSeq CCDS entries */ { struct ccdsInfo *ci; for (ci = rsCcds; ci != NULL; ci = ci->next) { printf("<TR>"); if (ci == rsCcds) printf("<TH ROWSPAN=%d>RefSeq", slCount(rsCcds)); printf("<TD><A HREF=\""); printEntrezNucleotideUrl(stdout, ci->mrnaAcc); printf("\" TARGET=_blank>%s</A> ", ci->mrnaAcc); printf("<TD><A HREF=\""); printEntrezProteinUrl(stdout, ci->protAcc); printf("\" TARGET=_blank>%s</A>", ci->protAcc); printf("</TR>\n"); } } static void ccdsHinxtonRows(char *ccdsId, bool isVega,struct ccdsInfo *hinCcds) /* output Ensembl or vega CCDS entries */ { struct ccdsInfo *ci; char *dbArg = isVega ? "&db=vega" : ""; char *ensGenome = ensOrgNameFromScientificName(scientificName); if (ensGenome == NULL) errAbort("%s: ensOrgNameFromScientificName failed", ccdsId); for (ci = hinCcds; ci != NULL; ci = ci->next) { printf("<TR>"); if (ci == hinCcds) printf("<TH ROWSPAN=%d>%s", slCount(hinCcds), (isVega ? "Vega" : "Ensembl")); printf("<TD><A HREF=\"http://www.ensembl.org/%s/geneview?transcript=%s%s\"" " TARGET=_blank>%s</A> ", ensGenome, ci->mrnaAcc, dbArg, ci->mrnaAcc); printf("<TD><A HREF=\"http://www.ensembl.org/%s/protview?peptide=%s%s\"" " TARGET=_blank>%s</A>", ensGenome, ci->protAcc, dbArg, ci->protAcc); printf("</TR>\n"); } } static void ccdsKnownGenesRows(struct sqlConnection *conn, char *ccdsId) /* output KnownGenes mapped to CCDS */ { struct ccdsGeneMap *ccdsKgs = ccdsGetGenes(conn, "ccdsKgMap", ccdsId); struct ccdsGeneMap *ccdsKg; for (ccdsKg = ccdsKgs; ccdsKg != NULL; ccdsKg = ccdsKg->next) { char *spId = kgIdToSpId(conn, ccdsKg->geneId); printf("<TR>"); if (ccdsKg == ccdsKgs) printf("<TH ROWSPAN=%d>%s", slCount(ccdsKgs), "UCSC Genes"); printf("<TD><A HREF=\""); printCcdsHgGeneUrl(conn, ccdsId, ccdsKg->geneId); printf("\" TARGET=_blank>%s</A>", ccdsKg->geneId); printf("<TD> "); freez(&spId); printf("</TR>\n"); } } static void ccdsMgcRows(struct sqlConnection *conn, char *ccdsId) /* output MGCs mapped to CCDS */ { // only possible to get multiple CCDS genePreds in PAR, since we are linking // to details, not browser, only use the first set. struct geneSimilarities *geneSims = geneSimilaritiesBuildAll(conn, TRUE, ccdsId, "ccdsGene", "mgcGenes"); struct geneSim *mgc; for (mgc = geneSims->genes; mgc != NULL; mgc = mgc->next) { printf("<TR>"); if (mgc == geneSims->genes) printf("<TH ROWSPAN=%d>MGC", slCount(geneSims->genes)); printf("<TD><A HREF=\""); printMgcDetailsUrl(mgc->gene->name, mgc->gene->txStart); printf("\">%s</A>", mgc->gene->name); printf("<TD> </TR>\n"); } geneSimilaritiesFreeList(&geneSims); } static void writeBasicInfoHtml(struct sqlConnection *conn, char *ccdsId, struct ccdsInfo *rsCcds, struct ccdsInfo *vegaCcds, struct ccdsInfo *ensCcds) /* write table with basic information about the CCDS (2 columns) */ { { printf("<TABLE class=\"hgcCcds\"><TBODY>\n"); /* gene symbol */ char *geneSym = getCcdsGeneSymbol(conn, rsCcds); if (geneSym != NULL) printf("<TR><TH>Gene<TD>%s</TR>\n", geneSym); freez(&geneSym); /* description */ char *desc = hGenBankGetDesc(database, rsCcds->mrnaAcc, TRUE); if (desc != NULL) printf("<TR><TH>Description<TD>%s</TR>\n", desc); freez(&desc); /* CCDS sequence links */ printf("<TR>\n"); printf("<TH>Sequences"); printf("<TD>"); hgcAnchorSomewhere("htcGeneMrna", ccdsId, "ccdsGene", seqName); printf("CDS</A>, "); hgcAnchorSomewhere("htcTranslatedPredMRna", ccdsId, "translate", seqName); printf("protein</A>, "); hgcAnchorSomewhere( "htcGeneInGenome", ccdsId, "ccdsGene", seqName); printf("genomic</A>"); printf("</TR>\n"); /* CCDS databases */ printf("<TR>\n"); -printf("<TH>CCDS database"); -printf("<TD> <A HREF=\"http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&BUILDS=ALLBUILDS&DATA=%s\" TARGET=_blank>%s</A>", - ccdsId, ccdsId); +printf("<TH>CCDS database<TD> <A HREF=\""); +printCcdsExtUrl(ccdsId); +printf("\" TARGET=_blank>%s</A>", ccdsId); printf("</TR>\n"); printf("</TBODY></TABLE>\n"); printf("<BR>\n"); } } static void writeLinksHtml(struct sqlConnection *conn, char *ccdsId, struct ccdsInfo *rsCcds, struct ccdsInfo *vegaCcds, struct ccdsInfo *ensCcds) /* write table with links to other browser apps or external databases (3 columns) */ { printf("<H3>Associated Sequences</H3>\n"); printf("<TABLE class=\"hgcCcds\">\n"); printf("<THEAD>\n"); printf("<TR><TH> <TH>mRNA<TH>Protein</TR>\n"); printf("</THEAD><TBODY>\n"); if (sqlTableExists(conn, "ccdsKgMap")) ccdsKnownGenesRows(conn, ccdsId); ccdsNcbiRows(ccdsId, rsCcds); if (vegaCcds != NULL) ccdsHinxtonRows(ccdsId, TRUE, vegaCcds); if (ensCcds != NULL) ccdsHinxtonRows(ccdsId, FALSE, ensCcds); if (sqlTableExists(conn, "mgcGenes")) ccdsMgcRows(conn, ccdsId); printf("</TBODY></TABLE>\n"); printf("<P><EM>Note: mRNA and protein sequences in other gene collections " "may differ from the CCDS sequences.</EM><br>\n"); } static void writePublicNotesHtml(struct sqlConnection *conn, char *ccdsId) /* write public notes if available */ { struct ccdsNotes *ccdsNotes = NULL; if (sqlTableExists(conn, "ccdsNotes")) ccdsNotes = sqlQueryObjs(conn, (sqlLoadFunc)ccdsNotesLoad, sqlQueryMulti, "select * from ccdsNotes where ccds = \"%s\" order by createDate", ccdsId); if (ccdsNotes != NULL) { htmlHorizontalLine(); printf("<H3>Annotator Notes</H3>\n"); printf("<TABLE class=\"hgcCcds\">\n"); printf("<THEAD>\n"); printf("<TR><TH>Date<TH>Note</TR>\n"); printf("</THEAD><TBODY>\n"); struct ccdsNotes *note; for (note = ccdsNotes; note != NULL; note = note->next) printf("<TR><TD style=\"white-space: nowrap;\">%s<TD>%s</TR>", note->createDate, note->note); printf("</TBODY></TABLE>\n"); } } static void writeRefSeqSummaryHtml(struct sqlConnection *conn, char *ccdsId, struct ccdsInfo *rsCcds) /* write refseq summary HTML, if a summary is available */ { char *summary = getCcdsRefSeqSummary(conn, rsCcds); if (summary != NULL) { htmlHorizontalLine(); printf("<H3>RefSeq summary of %s</H3>\n", ccdsId); printf("<P>%s</P>\n", summary); freez(&summary); } } void doCcdsGene(struct trackDb *tdb, char *ccdsId) /* Process click on a CCDS gene. */ { struct sqlConnection *conn = hAllocConn(database); struct ccdsInfo *rsCcds = ccdsInfoSelectByCcds(conn, ccdsId, ccdsInfoNcbi); struct ccdsInfo *vegaCcds = ccdsInfoSelectByCcds(conn, ccdsId, ccdsInfoVega); struct ccdsInfo *ensCcds = ccdsInfoSelectByCcds(conn, ccdsId, ccdsInfoEnsembl); if (rsCcds == NULL) errAbort("database inconsistency: no NCBI ccdsInfo entries found for %s", ccdsId); if ((vegaCcds == NULL) && (ensCcds == NULL)) errAbort("database inconsistency: no Hinxton ccdsInfo entries found for %s", ccdsId); ccdsInfoMRnaSort(&rsCcds); ccdsInfoMRnaSort(&vegaCcds); ccdsInfoMRnaSort(&ensCcds); cartWebStart(cart, database, "CCDS Gene"); printf("<H2>Consensus CDS Gene %s</H2>\n", ccdsId); writeBasicInfoHtml(conn, ccdsId, rsCcds, vegaCcds, ensCcds); writeLinksHtml(conn, ccdsId, rsCcds, vegaCcds, ensCcds); writePublicNotesHtml(conn, ccdsId); writeRefSeqSummaryHtml(conn, ccdsId, rsCcds); htmlHorizontalLine(); printTrackHtml(tdb); cartWebEnd(); ccdsInfoFreeList(&rsCcds); ccdsInfoFreeList(&vegaCcds); ccdsInfoFreeList(&ensCcds); hFreeConn(&conn); }