src/hg/hgc/lowelab.c 1.47
1.47 2010/03/22 04:18:14 pchan
add selfHomologs track
Index: src/hg/hgc/lowelab.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgc/lowelab.c,v
retrieving revision 1.46
retrieving revision 1.47
diff -b -B -U 4 -r1.46 -r1.47
--- src/hg/hgc/lowelab.c 21 Mar 2010 02:53:18 -0000 1.46
+++ src/hg/hgc/lowelab.c 22 Mar 2010 04:18:14 -0000 1.47
@@ -243,8 +243,174 @@
percent = (float) count / (float) length * 100.0f;
return percent;
}
+int selfBlastpHitCount(struct sqlConnection *conn, char *geneId)
+/* Count up number of hits. */
+{
+char query[512];
+char *blastpHitsTable = "blastpHits";
+if (!hTableExists(database, blastpHitsTable))
+ return 0;
+safef(query, sizeof(query),
+ "select count(*) from %s where query = '%s' and target like '%s:%%' and target != '%s:%s'",
+ blastpHitsTable, geneId, database, database, geneId);
+return sqlQuickNum(conn, query);
+}
+
+struct blastTab* loadSelfBlastpHits(struct sqlConnection *conn, char* queryName, int self)
+/* Load all blastp hits in the same genome of the given query gene into a list */
+{
+ char query[512];
+ struct sqlResult *srBlastpHits = NULL;
+ struct blastTab *list = NULL;
+ struct blastTab *blastpHits;
+ char **row;
+ char blastpHitsTable[] = "blastpHits";
+
+ if (hTableExists(database, blastpHitsTable))
+ {
+ if (self)
+ {
+ sprintf(query, "select * from %s where query = '%s' and target like '%s:%%'",
+ blastpHitsTable, queryName, database);
+ }
+ else
+ {
+ sprintf(query, "select * from %s where query = '%s' and target like '%s:%%' and target != '%s:%s'",
+ blastpHitsTable, queryName, database, database, queryName);
+ }
+ srBlastpHits = sqlGetResult(conn, query);
+ while ((row = sqlNextRow(srBlastpHits)) != NULL)
+ {
+ blastpHits = blastTabLoad(row);
+ slAddTail(&list, blastpHits);
+ }
+ }
+ if (srBlastpHits != NULL)
+ sqlFreeResult(&srBlastpHits);
+ return list;
+}
+
+void printSelfHomologs(struct sqlConnection *conn, struct blastTab *blastpHitsList)
+/* Print self homologs in refseq */
+{
+ char query[512];
+ struct sqlResult *sr;
+ char **row;
+ char refSeq[50];
+ char xraTable[50];
+ char product[255];
+ struct blastTab *blastpHits;
+ struct minGeneInfo *ginfo;
+ char *blastpTarget[2];
+ char **buffer = NULL;
+ boolean findTable = FALSE;
+ unsigned int cdsStart = 0;
+ unsigned int cdsEnd = 0;
+
+ printf("<br>\n");
+ printf("<b>Homologs within genome</b><BR>\n");
+
+ /* Print table */
+ printf("<table style=\"width: 60%%;\" bgcolor=\"#%s\" border=\"0\" cellpadding=\"1\" cellspacing=\"0\">", HG_COL_BORDER);
+ printf("<tbody><tr><td>\n");
+ printf("<table style=\"width: 100%%; text-align: left;\" bgcolor=\"%s\" border=\"1\" cellpadding=\"2\" cellspacing=\"2\">\n", HG_COL_INSIDE);
+ printf("<tbody>\n");
+
+ /* Print table column heading */
+ printf("<tr style=\"vertical-align: top;\">\n");
+ printf("<td width=\"20%%\"><b>Gene</b></td>\n");
+ printf("<td><b>Product</b></td>\n");
+ printf("<td width=\"30%%\"><b>BlastP E-value</b></td>\n");
+ printf("</tr>\n");
+
+ blastpHits = blastpHitsList;
+ while (blastpHits != NULL)
+ {
+ parseDelimitedString(blastpHits->target, ':', blastpTarget, 2);
+
+ if (hTableExists(blastpTarget[0], "lookup"))
+ {
+ sprintf(query, "select lookupValue from %s.lookup where lookupCode = 'annotRev'", blastpTarget[0]);
+ sr = sqlGetResult(conn, query);
+ if ((row = sqlNextRow(sr)) != NULL)
+ {
+ strcpy(refSeq, row[0]);
+ findTable = TRUE;
+ sqlFreeResult(&sr);
+ }
+ }
+ else if (hTableExists(blastpTarget[0], "refSeq"))
+ {
+ strcpy(refSeq, "refSeq");
+ findTable = TRUE;
+ }
+ if (findTable)
+ {
+ sprintf(query, "select chrom, cdsStart, cdsEnd from %s where name = '%s'",
+ refSeq, blastpTarget[1]);
+ sr = sqlGetResult(conn, query);
+ if ((row = sqlNextRow(sr)) != NULL)
+ {
+ cdsStart = strtoul(row[1], buffer, 10);
+ cdsEnd = strtoul(row[2], buffer, 10);
+ printf("<tr style=\"vertical-align: top;\">\n");
+ printf("<td><a href=\"hgTracks\?position=%s:%u-%u&db=%s\" TARGET=_blank>%s</a></td>\n",
+ row[0], cdsStart, cdsEnd, blastpTarget[0], blastpTarget[1]);
+ }
+ else
+ printf("<td>%s</td>\n", blastpTarget[1]);
+ sqlFreeResult(&sr);
+ }
+ else
+ printf("<td>%s</td>\n", blastpTarget[1]);
+
+ if (hTableExists(blastpTarget[0], "lookup"))
+ {
+ sprintf(query, "select lookupValue from %s.lookup where lookupCode = 'annotRevXra'", blastpTarget[0]);
+ sr = sqlGetResult(conn, query);
+ if ((row = sqlNextRow(sr)) != NULL)
+ {
+ strcpy(xraTable, row[0]);
+ sqlFreeResult(&sr);
+ }
+ else
+ strcpy(product, "N/A");
+
+ sprintf(query, "select product from %s where name = '%s'", xraTable, blastpTarget[1]);
+ sr = sqlGetResult(conn, query);
+ if ((row = sqlNextRow(sr)) != NULL)
+ {
+ strcpy(product, row[0]);
+ sqlFreeResult(&sr);
+ }
+ else
+ strcpy(product, "N/A");
+ }
+ else
+ {
+ ginfo = getGbProtCodeInfo(conn, blastpTarget[0], blastpTarget[1]);
+ if (ginfo != NULL && ginfo->product != NULL && differentString(ginfo->product,"none"))
+ strcpy(product, ginfo->product);
+ else
+ strcpy(product, "N/A");
+ }
+ printf("<td>%s</td>\n", product);
+ printf("<td style=\"text-align: right;\">%0.0e</td>\n", blastpHits->eValue);
+ printf("</tr>\n");
+
+ free(blastpTarget[0]);
+ free(blastpTarget[1]);
+ blastpHits = blastpHits->next;
+ }
+ /* Close table */
+ printf("</tbody>\n");
+ printf("</table>\n");
+ printf("</td></tr></tbody>\n");
+ printf("</table>\n");
+}
+
int getGeneTree(struct sqlConnection *conn, char *geneId, char *treeFileName)
{
int success = 0;
char query[256];
@@ -320,8 +486,10 @@
char command[512];
char buffer[512];
char searchTerm[256];
+struct blastTab *blastpHitsList;
+
char pepTableName[64];
char extraTableName[64];
if (startsWith("annotRev", table))
@@ -746,8 +914,14 @@
printf("by including a Gamma-correction (eight categories of evolutionary rates, ");
printf("an estimated alpha-parameter and an estimated proportion of invariant sites).<BR>");
}
+/* Self Homologs */
+if (selfBlastpHitCount(conn, item) > 0)
+{
+ blastpHitsList = loadSelfBlastpHits(conn, item, 0);
+ printSelfHomologs(conn, blastpHitsList);
+}
printf("</td></tr></tbody></table><br>\n");
/* Pathway */
printf("<table style=\"text-align: left; width: 99%%;\" border=\"1\" cellpadding=\"5\" cellspacing=\"0\">\n");
@@ -2425,8 +2599,49 @@
hFreeConn(&conn);
printTrackHtml(tdb);
}
+void doSelfHomologs(struct trackDb *tdb, char *targetName)
+/* Handle the Self Homologs tracks. */
+{
+ char queryName[50];
+ char queryTable[50];
+ unsigned int querySeqLength = 0;
+ struct sqlConnection *conn = hAllocConn(database);
+ struct bed *blastpTrack;
+ struct blastTab *blastpHitsList;
+ char query[512];
+ struct sqlResult *sr;
+ char **row;
+
+ cartWebStart(cart, database, "%s", "Homologs Within Genome by BlastP Search");
+
+ blastpTrack = getBlastpTrackRecord(conn, tdb, targetName);
+
+ if (hTableExists(database, "lookup"))
+ {
+ sprintf(query, "select lookupValue from lookup where lookupCode = 'annotRev'");
+ sr = sqlGetResult(conn, query);
+ if ((row = sqlNextRow(sr)) != NULL)
+ {
+ strcpy(queryTable, row[0]);
+ sqlFreeResult(&sr);
+ }
+ }
+ else
+ strcpy(queryTable, "refSeq");
+ printQueryGeneInfo(conn, blastpTrack, queryName, &querySeqLength, queryTable);
+
+ blastpHitsList = loadSelfBlastpHits(conn, queryName, 1);
+
+ printBlastpResult(conn, blastpHitsList, querySeqLength);
+
+ printf("<BR>Note: All measurements are counted by the number of amino acids.<BR>\n");
+ printf("<hr>\n");
+ hFreeConn(&conn);
+ printTrackHtml(tdb);
+}
+
void printQuerySequenceInfo(struct sqlConnection *conn, struct bed *blastxTrack, char *queryName, unsigned int *querySeqLength, char *queryTable)
/* Get and print blastx query sequence info */
{
char query[512];
@@ -3671,8 +3886,12 @@
else if (sameWord(track,"loweOrthologs"))
{
doloweOrthologs(tdb, item);
}
+else if (sameWord(track,"selfHomologs"))
+ {
+ doSelfHomologs(tdb, item);
+ }
else if (sameWord(track,"CRISPRs"))
{
doCRISPRs(tdb, item);
}