src/hg/hgc/lowelab.c 1.45
1.45 2010/03/21 01:02:55 pchan
rework refseq track detail page; add gene tree
Index: src/hg/hgc/lowelab.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgc/lowelab.c,v
retrieving revision 1.44
retrieving revision 1.45
diff -b -B -U 4 -r1.44 -r1.45
--- src/hg/hgc/lowelab.c 5 Mar 2010 00:13:40 -0000 1.44
+++ src/hg/hgc/lowelab.c 21 Mar 2010 01:02:55 -0000 1.45
@@ -92,8 +92,9 @@
#include "cddDesc.h"
#include "arCOGs.h"
#include "arcogdesc.h"
#include "megablastInfo.h"
+#include "geneTree.h"
#define LISTUI
static char const rcsid[] = "$Id$";
@@ -113,8 +114,9 @@
"Biological Process",
"Cellular Component",
};
int aspectIx;
+int termCount = 0;
if (sqlTableExists(goConn,"goaPart") &&
sqlTableExists(goConn,"term"))
for (aspectIx = 0; aspectIx < ArraySize(aspects); ++aspectIx)
@@ -139,14 +141,16 @@
}
printf("<A HREF = \"");
printf("http://godatabase.org/cgi-bin/go.cgi?view=details&depth=1&query=%s", goID);
printf("\" TARGET=_blank>%s</A> %s<BR>\n", goID, goTermName);
+ termCount++;
}
if (hasFirst)
printf("<BR>");
sqlFreeResult(&sr);
}
-sqlDisconnect(&goConn);
+ if (termCount == 0) printf("Not available<BR>\n");
+ sqlDisconnect(&goConn);
}
void keggOtherGenes(struct sqlConnection *conn, char *geneId,
char *table, char *mapId)
@@ -239,15 +243,53 @@
percent = (float) count / (float) length * 100.0f;
return percent;
}
+int getGeneTree(struct sqlConnection *conn, char *geneId, char *treeFileName)
+{
+ int success = 0;
+ char query[256];
+ char *geneTreeTable = "geneTree";
+ struct sqlResult *sr;
+ char **row;
+ struct geneTree *genetree;
+
+ if (!hTableExists(database, geneTreeTable))
+ return 0;
+ safef(query, sizeof(query),
+ "select * from %s where name = '%s'", geneTreeTable, geneId);
+ sr = sqlGetResult(conn, query);
+ while ((row = sqlNextRow(sr)) != NULL)
+ {
+ genetree = geneTreeLoad(row);
+ if (!fileExists(treeFileName))
+ {
+ FILE *f;
+ f = fopen(treeFileName, "w");
+ if (f != NULL)
+ {
+ fprintf(f, "%s\n", genetree->tree);
+ fclose(f);
+ success = 1;
+ }
+ }
+ else
+ success = 1;
+ }
+ sqlFreeResult(&sr);
+ geneTreeFree(&genetree);
+
+ return success;
+}
+
void doRefSeq(struct trackDb *tdb, char *item,
char *pepTable, char *extraTable)
/* Handle click on gene track. */
{
struct minGeneInfo ginfo;
char query[256];
char query2[256];
+char *gi = NULL;
struct sqlResult *sr, *sr2;
struct dnaSeq *sequence;
char **row, **row2;
char *words[16], *dupe = cloneString(tdb->type);
@@ -267,8 +309,18 @@
char *pdb = hPdbFromGdb(database);
struct genePred *gpList = NULL, *gp = NULL;
char tableName[64];
boolean hasBin;
+int itemCount = 0;
+
+char treeFileName[256];
+char treeTmpPsFileName[256];
+char treePsFileName[256];
+char treePngFileName[256];
+char treePdfFileName[256];
+char command[512];
+char buffer[512];
+char searchTerm[256];
char pepTableName[64];
char extraTableName[64];
@@ -289,14 +341,15 @@
if (wordCount > 1)
num = atoi(words[1]);
if (num < 3) num = 3;
if (extraTableName != NULL && hTableExists(database, extraTableName))
- {
+{
sprintf(query, "select * from %s where name = '%s'", extraTableName, item);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
minGeneInfoStaticLoad(row, &ginfo);
+ gi = cloneString(ginfo.gi);
if (ginfo.gene != NULL && differentString(ginfo.gene,"none"))
printf("<B>Gene: </B>%s<BR>\n", ginfo.gene);
if (ginfo.product != NULL && differentString(ginfo.product,"none"))
medlineLinkedLine("Product", ginfo.product, ginfo.product);
@@ -316,13 +369,13 @@
"TARGET=_BLANK>%s</A><BR>\n", ginfo.ec, ginfo.ec);
}
}
sqlFreeResult(&sr);
- }
+}
/* lookup swissprot acc */
spAcc = uniProtFindPrimAccFromGene(item, database);
if (spAcc != NULL)
- {
+{
printf("<B>UniProtKB:</B> ");
printf("<A HREF=");
printf(uniprotFormat, spAcc);
if (spAcc == NULL)
@@ -332,21 +385,51 @@
else
{
printf(" TARGET=_blank>%s</A></B><BR>\n", spAcc);
}
- }
-/* ncbi blast hits */
- hits = chopString(ginfo.gi,":",giwords,sizeof(giwords));
- if (hits > 0)
- {
- printf("<B>NCBI Blast Hits:</B> "
- "<A HREF=\"http://www.ncbi.nlm.nih.gov/sutils/blink.cgi?pid=%s\" "
- "TARGET=_BLANK>%s</A><BR>\n", (hits == 2) ? giwords[1] : giwords[0],
- (hits == 2) ? giwords[1] : giwords[0]);
- }
+}
+
+/* print table of contents */
+printf("<br><hr style=\"width: 100%%; height: 2px;\"><font size=\"+1\">\n");
+printf("<b>[<a href=\"#positions\">Positions and Sequence</a>] \n");
+printf("[<a href=\"#COG\">COG</a>] \n");
+printf("[<a href=\"#GO\">Gene Ontology</a>] \n");
+printf("[<a href=\"#domain\">Protein Domain and Structure Infomation</a>] \n");
+printf("[<a href=\"#pathway\">Pathway</a>] \n");
+printf("[<a href=\"#homology\">Gene Homology</a>]</b></font> <br>\n");
+printf("<hr style=\"width: 100%%; height: 2px;\"><br>\n");
+
+/* Positions and sequence */
+printf("<table style=\"text-align: left; width: 99%%;\" border=\"1\" cellpadding=\"5\" cellspacing=\"0\">\n");
+printf("<tbody><tr><td style=\"background-color:#eee9e9;\">\n");
+printf("<a name=\"positions\"></a><b>Positions and Sequence</b><br></td></tr>\n");
+printf("<tr><td>\n");
+
+hFindSplitTable(database, seqName, table, tableName, &hasBin);
+safef(query, sizeof(query), "name = \"%s\"", item);
+gpList = genePredReaderLoadQuery(conn, tableName, query);
+for (gp = gpList; gp != NULL; gp = gp->next)
+{
+ sequence = hDnaFromSeq(database, gp->chrom, gp->txStart, gp->txEnd, dnaUpper);
+ if (sequence != NULL)
+ printf("<B>GC content:</B> %0.2f%%<BR>\n", computeGCContent(sequence->dna, sequence->size));
+}
+geneShowPosAndLinks(item, item, tdb, pepTableName, "htcTranslatedProtein",
+ "htcGeneMrna", "htcGeneInGenome", "Predicted mRNA");
+genePredFreeList(&gpList);
+
+printf("</td></tr></tbody></table><br>\n");
+
+/* COG */
+printf("<table style=\"text-align: left; width: 99%%;\" border=\"1\" cellpadding=\"5\" cellspacing=\"0\">\n");
+printf("<tbody><tr><td style=\"background-color:#eee9e9;\">\n");
+printf("<a name=\"COG\"></a><b>COG</b><br></td></tr>\n");
+printf("<tr><td>\n");
+
/* cog description */
+itemCount = 0;
if (hTableExists(database, "COG"))
- {
+{
sprintf(query, "select * from COG where name = '%s'", item);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
@@ -368,18 +451,20 @@
">%s</A> "
"<A HREF=\"http://www.ncbi.nlm.nih.gov/COG/grace/wiew.cgi?fun=%s\" "
">Code %s</A> \n",
COGXra->name, COGXra->name, COG->code,COG->code);
- printf(" %s<BR><BR>\n", COGXra->info);
+ printf(" %s<BR>\n", COGXra->info);
+ itemCount++;
}
sqlFreeResult(&sr2);
hFreeConn(&conn2);
}
}
}
sqlFreeResult(&sr);
//hFreeConn(&conn2);
- }
+}
+
if (hTableExists(database, "arCOGs"))
{
struct arCOGs *infoload = NULL;
struct arcogdesc *description = NULL;
@@ -402,20 +487,40 @@
description=arcogdescLoad(row2);
if(description!=NULL)
{
printf("<B>arCOG:</B> %s Code %s",infoload->name, description->code);
- printf(" %s<BR/><BR/>\n", description->description);
+ printf(" %s<BR/>\n", description->description);
+ itemCount++;
}
}
sqlFreeResult(&sr2);
hFreeConn(&conn2);
}
}
}
+if (itemCount == 0) printf("Not available\n");
+printf("</td></tr></tbody></table><br>\n");
+
+/* GO */
+printf("<table style=\"text-align: left; width: 99%%;\" border=\"1\" cellpadding=\"5\" cellspacing=\"0\">\n");
+printf("<tbody><tr><td style=\"background-color:#eee9e9;\">\n");
+printf("<a name=\"GO\"></a><b>Gene Ontology</b><br></td></tr>\n");
+printf("<tr><td>\n");
+
+/* print go terms */
+goPrint( conn, item, spAcc);
+printf("</td></tr></tbody></table><br>\n");
+
+/* Protein domain and structure information */
+printf("<table style=\"text-align: left; width: 99%%;\" border=\"1\" cellpadding=\"5\" cellspacing=\"0\">\n");
+printf("<tbody><tr><td style=\"background-color:#eee9e9;\">\n");
+printf("<a name=\"domain\"></a><b>Protein Domain and Structure Information</b><br></td></tr>\n");
+printf("<tr><td>\n");
+
/* interpro domains */
list = spExtDbAcc1List(spConn, spAcc, "InterPro");
if (list != NULL)
- {
+{
char query[256], **row, **row2;
struct sqlResult *sr, *sr2;
printf("<B>InterPro Domains: </B> ");
printf("<A HREF=\"http://www.ebi.ac.uk/interpro/ISpy?mode=single&ac=%s\" TARGET=_blank>",
@@ -452,14 +557,14 @@
}
}
printf("<BR>\n");
slFreeList(&list);
- }
+}
/* pfam domains */
list = spExtDbAcc1List(spConn, spAcc, "Pfam");
if (list != NULL)
- {
+{
printf("<B>Pfam Domains:</B><BR>");
for (el = list; el != NULL; el = el->next)
{
char query[256];
@@ -486,13 +591,13 @@
freez(&description);
}
slFreeList(&list);
printf("<BR>\n");
- }
+}
list = spExtDbAcc1List(spConn, spAcc, "PDB");
if (list != NULL)
- {
+{
char query[256], **row;
struct sqlResult *sr;
int column = 0, maxColumn=4, rowCount=0;
printf("<B>Protein Data Bank (PDB) 3-D Structure</B><BR>");
@@ -518,28 +623,17 @@
}
printf("<TD>");
printf("<A HREF=\"http://www.rcsb.org/pdb/cgi/explore.cgi?pdbId=%s\" TARGET=_blank>", row[0]);
if (rowCount < 1)
- printf("<IMG SRC=\"http://www.rcsb.org/pdb/cgi/pdbImage.cgi/%sx150.jpg\"><BR>", row[0]);
+ printf("<IMG SRC=\"http://www.rcsb.org/pdb/images/%s_bio_r_80.jpg\"><BR>", row[0]);
printf("%s</A> - %s<BR>\n", row[0], row[1]);
printf("</TD>");
}
printf("</TR></TABLE>\n");
printf("<BR>\n");
slFreeList(&list);
}
-/* kegg pathway links */
-if (keggCount(conn, item) > 0)
- {
- keggLink(conn, item, table, "<B>Kegg Pathway: </b><BR>");
- }
-/* Do SAM-T02 sub-section */
-//doSamT02(spAcc, database);
-
-/* print go terms */
-goPrint( conn, item, spAcc);
-
/* Do modBase link. */
{
printf("<B>ModBase Predicted Comparative 3D Structure on ");
modBaseAnchor(spAcc);
@@ -565,24 +659,114 @@
"frequently covers just a fragment of the protein. You may "
"be asked to log onto ModBase the first time you click on the "
"pictures. It is simplest after logging in to just click on "
"the picture again to get to the specific info on that model.</I><p>");
- }
+}
+printf("</td></tr></tbody></table><br>\n");
-hFindSplitTable(database, seqName, table, tableName, &hasBin);
-safef(query, sizeof(query), "name = \"%s\"", item);
-gpList = genePredReaderLoadQuery(conn, tableName, query);
-for (gp = gpList; gp != NULL; gp = gp->next)
+/* Pathway */
+printf("<table style=\"text-align: left; width: 99%%;\" border=\"1\" cellpadding=\"5\" cellspacing=\"0\">\n");
+printf("<tbody><tr><td style=\"background-color:#eee9e9;\">\n");
+printf("<a name=\"pathway\"></a><b>Pathway</b><br></td></tr>\n");
+printf("<tr><td>\n");
+
+/* kegg pathway links */
+if (keggCount(conn, item) > 0)
{
- sequence = hDnaFromSeq(database, gp->chrom, gp->txStart, gp->txEnd, dnaUpper);
- if (sequence != NULL)
- printf("<B>GC content:</B> %0.2f%%<BR>\n", computeGCContent(sequence->dna, sequence->size));
+ keggLink(conn, item, table, "<B>Kegg Pathway: </b><BR>");
}
+else
+ printf("Not available\n");
+printf("</td></tr></tbody></table><br>\n");
-geneShowPosAndLinks(item, item, tdb, pepTableName, "htcTranslatedProtein",
- "htcGeneMrna", "htcGeneInGenome", "Predicted mRNA");
+/* Do SAM-T02 sub-section */
+//doSamT02(spAcc, database);
-genePredFreeList(&gpList);
+/* Gene Homology */
+printf("<table style=\"text-align: left; width: 99%%;\" border=\"1\" cellpadding=\"5\" cellspacing=\"0\">\n");
+printf("<tbody><tr><td style=\"background-color:#eee9e9;\">\n");
+printf("<a name=\"homology\"></a><b>Gene Homology</b><br></td></tr>\n");
+printf("<tr><td>\n");
+
+/* ncbi blast hits */
+hits = chopString(gi,":",giwords,sizeof(giwords));
+if (hits > 0)
+{
+ printf("<B>NCBI Blast Hits:</B> "
+ "<A HREF=\"http://www.ncbi.nlm.nih.gov/sutils/blink.cgi?pid=%s\" "
+ "TARGET=_BLANK>%s</A><BR>\n", (hits == 2) ? giwords[1] : giwords[0],
+ (hits == 2) ? giwords[1] : giwords[0]);
+}
+
+/* Gene tree */
+safef(treeFileName, sizeof(treeFileName), "../trash/geneTree/geneTree_%s.txt", item);
+if (getGeneTree(conn, item, treeFileName))
+{
+ safef(treeTmpPsFileName, sizeof(treeTmpPsFileName), "../trash/geneTree/tmp_geneTree_%s.ps", item);
+ safef(treePsFileName, sizeof(treePsFileName), "../trash/geneTree/geneTree_%s.ps", item);
+ safef(treePngFileName, sizeof(treePngFileName), "../trash/geneTree/geneTree_%s.png", item);
+ safef(treePdfFileName, sizeof(treePdfFileName), "../trash/geneTree/geneTree_%s.pdf", item);
+ safef(searchTerm, sizeof(searchTerm), ".%s) show\n", item);
+
+ if (!fileExists(treePsFileName))
+ {
+ safef(command, sizeof(command), "../bin/draw_tree -b %s | grep -v translate | sed '5 i 10 530 translate' | sed '6 i 0.33 0.33 scale' | sed 's/findfont 12/findfont 24/' > %s",
+ treeFileName, treeTmpPsFileName);
+ mustSystem(command);
+
+ FILE *fi = mustOpen(treeTmpPsFileName, "r");
+ FILE *fo = mustOpen(treePsFileName, "w");
+ if ((fi != NULL) && (fo != NULL))
+ {
+ mustGetLine(fi, buffer, 512);
+ while (!sameWord(buffer, ""))
+ {
+ if (endsWith(buffer, searchTerm))
+ {
+ fprintf(fo, "255 0 0 setrgbcolor\n");
+ fprintf(fo, "%s", buffer);
+ fprintf(fo, "0 0 0 setrgbcolor\n");
+ }
+ else
+ fprintf(fo, "%s", buffer);
+ mustGetLine(fi, buffer, 512);
+ }
+ fclose(fo);
+ fclose(fi);
+ }
+ safef(command, sizeof(command), "rm -f %s", treeTmpPsFileName);
+ mustSystem(command);
+ }
+
+ if (!fileExists(treePngFileName))
+ {
+ safef(command, sizeof(command), "pstoimg -scale 0.4 -aaliastext -type png -density 300 -out %s %s",
+ treePngFileName, treePsFileName);
+ mustSystem(command);
+ }
+
+ if (!fileExists(treePdfFileName))
+ {
+ safef(command, sizeof(command), "ps2pdf %s %s",
+ treePsFileName, treePdfFileName);
+ mustSystem(command);
+ }
+
+ printf("<BR><B>Phylogenetic tree with homologous genes:</B> View as \n");
+ printf("<A HREF=\"%s\" TARGET=_BLANK>Postscript</A> \n", treePsFileName);
+ printf("<A HREF=\"%s\" TARGET=_BLANK>PNG</A> \n", treePngFileName);
+ printf("<A HREF=\"%s\" TARGET=_BLANK>PDF</A> \n", treePdfFileName);
+ printf("<A HREF=\"%s\" TARGET=_BLANK>Text</A> <BR>\n", treeFileName);
+
+ printf("<BR>Homologous genes of this protein were retrieved by BLASTP. ");
+ printf("The alignments of the protein sequences, made by <A HREF=\"http://www.drive5.com/muscle/\" TARGET=_BLANK>MUSCLE</A>, ");
+ printf("were used to construct the phylogenetic tree. This maximum likelihood tree was computed by using ");
+ printf("<A HREF=\"http://www.atgc-montpellier.fr/phyml/\" TARGET=_BLANK>PHYML</A> with the Jones Taylor Thornton model of sequence evolution, ");
+ printf("by including a Gamma-correction (eight categories of evolutionary rates, ");
+ printf("an estimated alpha-parameter and an estimated proportion of invariant sites).<BR>");
+}
+
+printf("</td></tr></tbody></table><br>\n");
printTrackHtml(tdb);
hFreeConn(&conn);
}