e1b46b0188570dc6f03b14520ba708233ec56446 hiram Tue Aug 16 13:59:07 2011 -0700 now showing correctly protein coding vs not for Ensembl genes diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index 374e3b4..4681f34 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -2359,66 +2359,85 @@ } void showGenePos(char *name, struct trackDb *tdb) /* Show gene prediction position and other info. */ { char *rootTable = tdb->table; char query[512]; struct sqlConnection *conn = hAllocConn(database); struct genePred *gpList = NULL, *gp = NULL; boolean hasBin; char table[64]; struct sqlResult *sr = NULL; char **row = NULL; char *classTable = trackDbSetting(tdb, GENEPRED_CLASS_TBL); + hFindSplitTable(database, seqName, rootTable, table, &hasBin); safef(query, sizeof(query), "name = \"%s\"", name); gpList = genePredReaderLoadQuery(conn, table, query); for (gp = gpList; gp != NULL; gp = gp->next) { printPos(gp->chrom, gp->txStart, gp->txEnd, gp->strand, FALSE, NULL); if(sameString(tdb->type,"genePred") && startsWith("ENCODE Gencode",tdb->longLabel) && startsWith("ENST",name)) { char *ensemblIdUrl = trackDbSetting(tdb, "ensemblIdUrl"); printf("<b>Ensembl Transcript Id: </b>"); if (ensemblIdUrl != NULL) printf("<a href=\"%s%s\" target=\"_blank\">%s</a><br>", ensemblIdUrl,name,name); else printf("%s<br>",name); } if (gp->name2 != NULL && strlen(trimSpaces(gp->name2))> 0) { /* in Ensembl gene info downloaded from ftp site, sometimes the name2 field is populated with "noXref" because there is no alternate name. Replace this with "none" */ printf("<b>Gene Symbol:"); if (sameString(gp->name2, "noXref")) printf("</b> none<br>\n"); else printf("</b> %s<br>\n",gp->name2); } + char *ensemblSource = NULL; + if (sameString("ensGene", table)) + { + if (hTableExists(database, "ensemblSource")) + { + safef(query, sizeof(query), + "select source from ensemblSource where name='%s'", name); + ensemblSource = sqlQuickString(conn, query); + } + } if ((gp->exonFrames != NULL) && (!genbankIsRefSeqNonCodingMRnaAcc(gp->name))) { + if (ensemblSource && differentString("protein_coding",ensemblSource)) + { + printf("<b>CDS Start: </b> none (non-coding)<BR>\n"); + printf("<b>CDS End: </b> none (non-coding)<BR>\n"); + } + else + { printf("<b>CDS Start: </b>"); printCdsStatus((gp->strand[0] == '+') ? gp->cdsStartStat : gp->cdsEndStat); printf("<b>CDS End: </b>"); printCdsStatus((gp->strand[0] == '+') ? gp->cdsEndStat : gp->cdsStartStat); } + } /* if a gene class table exists, get gene class and print */ if (classTable != NULL) { if (hTableExists(database, classTable)) { safef(query, sizeof(query), "select class from %s where name = \"%s\"", classTable, name); sr = sqlGetResult(conn, query); /* print class */ if ((row = sqlNextRow(sr)) != NULL) printf("<b>Prediction Class:</b> %s<br>\n", row[0]); sqlFreeResult(&sr); if (sqlFieldIndex(conn, classTable, "level") > 0 ) { safef(query, sizeof(query), @@ -8241,33 +8260,39 @@ warn("Organism %s not found!", organism); fflush(stdout); return; } /* print URL that links to Ensembl transcript details */ if (archive != NULL) safef(ensUrl, sizeof(ensUrl), "http://%s.archive.ensembl.org/%s", archive, genomeStrEnsembl); else safef(ensUrl, sizeof(ensUrl), "http://www.ensembl.org/%s", genomeStrEnsembl); char query[512]; char *geneName = NULL; if (hTableExists(database, "ensemblToGeneName")) { - safef(query, sizeof(query), "select value from ensemblToGeneName where name='%s' limit 1", itemName); + safef(query, sizeof(query), "select value from ensemblToGeneName where name='%s'", itemName); geneName = sqlQuickString(conn, query); } +char *ensemblSource = NULL; +if (hTableExists(database, "ensemblSource")) + { + safef(query, sizeof(query), "select source from ensemblSource where name='%s'", itemName); + ensemblSource = sqlQuickString(conn, query); + } boolean nonCoding = FALSE; safef(query, sizeof(query), "name = \"%s\"", itemName); struct genePred *gpList = genePredReaderLoadQuery(conn, "ensGene", query); if (gpList && gpList->name2) { if (gpList->cdsStart == gpList->cdsEnd) nonCoding = TRUE; printf("<B>Ensembl Gene Link: </B>"); if (sameString(gpList->name2, "noXref")) printf("none<BR>\n"); else printf("<A HREF=\"%s/geneview?gene=%s\" " "target=_blank>%s</A><BR>", ensUrl, gpList->name2, gpList->name2); } @@ -8385,30 +8410,35 @@ printf("<B>Ensembl Protein: </B>"); printf("<A HREF=\"%s/protview?peptide=%s\" target=_blank>", ensUrl,proteinID); printf("%s</A><BR>\n", proteinID); } else { printf("<B>Ensembl Protein: </B>none (non-coding)<BR>\n"); } } if (geneName) { printf("<B>Gene Name: </B>%s<BR>\n", geneName); freeMem(geneName); } +if (ensemblSource) + { + printf("<B>Ensembl type: </B>%s<BR>\n", ensemblSource); + freeMem(ensemblSource); + } freeMem(shortItemName); } void printEnsemblOrVegaCustomUrl(struct trackDb *tdb, char *itemName, boolean encode, char *archive) /* Print Ensembl Gene URL. */ { boolean isEnsembl = FALSE; boolean isVega = FALSE; boolean hasEnsGtp = FALSE; boolean hasVegaGtp = FALSE; char *shortItemName; char *genomeStrEnsembl = ""; struct sqlConnection *conn = hAllocConn(database); char cond_str[256], cond_str2[256]; char *geneID = NULL;