src/hg/hgc/lowelab.c 1.32
1.32 2009/05/08 21:15:26 pchan
fix hTableExist call for blastp track; change code to use centraldb name from hg.conf; add code to support blastx track
Index: src/hg/hgc/lowelab.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgc/lowelab.c,v
retrieving revision 1.31
retrieving revision 1.32
diff -b -B -U 4 -r1.31 -r1.32
--- src/hg/hgc/lowelab.c 30 Jan 2009 23:47:45 -0000 1.31
+++ src/hg/hgc/lowelab.c 8 May 2009 21:15:26 -0000 1.32
@@ -407,18 +407,21 @@
{
char query[256];
char *description;
safef(query, sizeof(query), "select description from proteome.pfamDesc where pfamAC='%s'", el->name);
- if (!sqlTableExists(spConn,"pfamDesc"))
+ description = sqlQuickString(spConn, query);
+ if (description == NULL)
{
safef(query, sizeof(query),
"select extDbRef.extAcc1 from extDbRef,extDb "
"where extDbRef.acc = '%s' "
"and extDbRef.extDb = extDb.id "
"and extDb.val = '%s'"
, spAcc,el->name);
- }
+
+ printf("%s\n", query);
description = sqlQuickString(spConn, query);
+ }
if (description == NULL)
description = cloneString("n/a");
printf("<A HREF=\"http://pfam.sanger.ac.uk/family?acc=%s\" TARGET=_blank>",
el->name);
@@ -1556,9 +1559,9 @@
pfamHit = lowelabPfamHitsLoad(row+rowOffset);
safef(query, sizeof(query), "select description from proteome.pfamDesc where pfamAC='%s'", pfamHit->pfamAC);
- if (!sqlTableExists(spConn,"pfamDesc"))
+ if (!sqlTableExists(spConn,"proteome.pfamDesc"))
{
safef(query, sizeof(query),
"select extDbRef.extAcc1 from extDbRef,extDb "
"where extDbRef.acc = '%s' "
@@ -1853,23 +1856,29 @@
struct minGeneInfo* getGbProtCodeInfo(struct sqlConnection *conn, char* dbName, char *geneName)
/* Get refseq protein annotation by given gene name in a given database */
{
char query[512];
- struct sqlResult *sr;
+ struct sqlResult *sr = NULL;
char **row;
struct minGeneInfo* ginfo = NULL;
char gbProtCodeXra[50];
+ if (strcmp(database, dbName) == 0)
+ strcpy(gbProtCodeXra, "gbProtCodeXra");
+ else
+ {
strcpy(gbProtCodeXra, dbName);
strcat(gbProtCodeXra, ".gbProtCodeXra");
- if (hTableExists(database, gbProtCodeXra))
+ }
+ if (hTableExists(dbName, "gbProtCodeXra"))
{
sprintf(query, "select * from %s where name = '%s'", gbProtCodeXra, geneName);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
ginfo = minGeneInfoLoad(row);
}
+ if (sr != NULL)
sqlFreeResult(&sr);
return ginfo;
}
@@ -1970,15 +1979,18 @@
{
char query[512];
struct sqlResult *srDb;
char **rowDb;
+ char *centraldb = cfgOption("central.db");
- sprintf(query, "select count(*) from centraldb.genomeClade a, centraldb.dbDb b, centraldb.clade c where a.genome = b.genome and a.clade = c.name and b.name = '%s'", dbName);
+ sprintf(query, "select count(*) from %s.genomeClade a, %s.dbDb b, %s.clade c where a.genome = b.genome and a.clade = c.name and b.name = '%s'",
+ centraldb, centraldb, centraldb, dbName);
srDb = sqlGetResult(conn, query);
if ((rowDb = sqlNextRow(srDb)) != NULL)
{
sqlFreeResult(&srDb);
- sprintf(query, "select a.genome, c.label from centraldb.genomeClade a, centraldb.dbDb b, centraldb.clade c where a.genome = b.genome and a.clade = c.name and b.name = '%s'", dbName);
+ sprintf(query, "select a.genome, c.label from %s.genomeClade a, %s.dbDb b, %s.clade c where a.genome = b.genome and a.clade = c.name and b.name = '%s'",
+ centraldb, centraldb, centraldb, dbName);
srDb = sqlGetResult(conn, query);
if ((rowDb = sqlNextRow(srDb)) != NULL)
{
strcpy(genome, rowDb[0]);
@@ -1996,9 +2008,9 @@
char **rowDb;
struct slName *list = NULL;
char clade[50];
- sprintf(query, "select label from centraldb.clade");
+ sprintf(query, "select label from %s.clade", cfgOption("central.db"));
srDb = sqlGetResult(conn, query);
while ((rowDb = sqlNextRow(srDb)) != NULL)
{
strcpy(clade, rowDb[0]);
@@ -2012,9 +2024,9 @@
struct blastTab* loadBlastpHits(struct sqlConnection *conn, char* queryName)
/* Load all blastp hits of the given query gene into a list */
{
char query[512];
- struct sqlResult *srBlastpHits;
+ struct sqlResult *srBlastpHits = NULL;
struct blastTab *list = NULL;
struct blastTab *blastpHits;
char **row;
char blastpHitsTable[] = "blastpHits";
@@ -2028,8 +2040,9 @@
blastpHits = blastTabLoad(row);
slAddTail(&list, blastpHits);
}
}
+ if (srBlastpHits != NULL)
sqlFreeResult(&srBlastpHits);
return list;
}
@@ -2075,9 +2088,9 @@
printf("<td width=\"5%%\"><b>E-value</b></td>\n");
printf("<td width=\"5%%\"><b>Log of E-value</b></td>\n");
printf("<td width=\"5%%\"><b>Bit Score</b></td>\n");
printf("<td width=\"6%%\"><b>Protein Alignment Length</b></td>\n");
- printf("<td width=\"5%%\"><b>Protein Mismatchs</b></td>\n");
+ printf("<td width=\"5%%\"><b>Protein Mismatches</b></td>\n");
printf("<td width=\"5%%\"><b>Alignment Gaps</b></td>\n");
printf("</tr>\n");
blastpHits = blastpHitsList;
@@ -2104,9 +2117,9 @@
/* Get target gene position from refSeq */
strcpy(refSeq, blastpTarget[0]);
strcat(refSeq, ".refSeq");
- if (hDbExists(blastpTarget[0]) && hTableExists(database, refSeq))
+ if (hDbExists(blastpTarget[0]) && hTableExists(blastpTarget[0], "refSeq"))
{
sprintf(query, "select chrom, cdsStart, cdsEnd from %s where name = '%s'",
refSeq, blastpTarget[1]);
sr = sqlGetResult(conn, query);
@@ -2182,8 +2195,321 @@
hFreeConn(&conn);
printTrackHtml(tdb);
}
+void printQuerySequenceInfo(struct sqlConnection *conn, struct bed *blastxTrack, char *queryName, unsigned int *querySeqLength, char *queryTable)
+/* Get and print blastx query sequence info */
+{
+ char query[512];
+ struct sqlResult *srQuerySeq;
+ char **row;
+ int seqCount;
+ char **buffer = NULL;
+ char *targetGeneName[2];
+
+ char blastxHits[] = "blastxHits";
+
+ unsigned int queryStart = 0;
+ unsigned int queryEnd = 0;
+
+ parseDelimitedString(blastxTrack->name, ':', targetGeneName, 2);
+
+ if (hTableExists(database, queryTable) && hTableExists(database, blastxHits))
+ {
+ /* Get query sequence from query table */
+ sprintf(query, "select count(*) from %s where chrom = '%s' and chromStart <= %u and chromEnd >= %u",
+ queryTable, blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd);
+ srQuerySeq = sqlGetResult(conn, query);
+ if ((row = sqlNextRow(srQuerySeq)) != NULL)
+ {
+ seqCount = atoi(row[0]);
+ sqlFreeResult(&srQuerySeq);
+
+ if (seqCount == 1)
+ {
+ sprintf(query, "select name, chromStart, chromEnd from %s where chrom = '%s' and chromStart <= %u and chromEnd >= %u",
+ queryTable, blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd);
+ srQuerySeq = sqlGetResult(conn, query);
+ if ((row = sqlNextRow(srQuerySeq)) != NULL)
+ {
+ strcpy(queryName, row[0]);
+ queryStart = strtoul(row[1], buffer, 10);
+ queryEnd = strtoul(row[2], buffer, 10);
+ }
+ sqlFreeResult(&srQuerySeq);
+ }
+ else
+ {
+ /* Check blastxHits if more than 1 query sequence is found within the region */
+ sprintf(query, "select a.name, a.chromStart, a.chromEnd from %s a, %s b where a.chrom = '%s' and a.chromStart <= %u and a.chromEnd >= %u and a.name = b.query and b.target like '%%%s'",
+ queryTable, blastxHits,
+ blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd, targetGeneName[0]);
+ srQuerySeq = sqlGetResult(conn, query);
+ if ((row = sqlNextRow(srQuerySeq)) != NULL)
+ {
+ strcpy(queryName, row[0]);
+ queryStart = strtoul(row[1], buffer, 10);
+ queryEnd = strtoul(row[2], buffer, 10);
+ }
+ sqlFreeResult(&srQuerySeq);
+ }
+
+ if ((queryStart == 0) && (queryEnd == 0))
+ printf("Query sequence not found for %s at %s:%u-%u\n", blastxTrack->name, blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd);
+ else
+ {
+ *querySeqLength = (blastxTrack->chromEnd - blastxTrack->chromStart);
+
+ /* Print query sequence info */
+ printf("<B>Query sequence: </B>%s<BR>\n", queryName);
+ printf("<B>Position:</B> "
+ "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">",
+ hgTracksPathAndSettings(), database, blastxTrack->chrom, queryStart + 1, queryEnd);
+ printf("%s:%d-%d</A><BR>\n", blastxTrack->chrom, queryStart + 1, queryEnd);
+ printf("<B>Genomic size: </B> %d nt<BR><BR>\n", (queryEnd - queryStart));
+
+ printf("<B>BlastX hit position:</B> "
+ "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">",
+ hgTracksPathAndSettings(), database, blastxTrack->chrom, blastxTrack->chromStart + 1, blastxTrack->chromEnd);
+ printf("%s:%d-%d</A><BR>\n", blastxTrack->chrom, blastxTrack->chromStart + 1, blastxTrack->chromEnd);
+ printf("<B>BlastX hit position relative to query sequence: </B> %d-%d<BR>\n", blastxTrack->chromStart - queryStart + 1, blastxTrack->chromEnd - queryStart);
+ printf("<B>BlastX hit genomic size: </B> %d nt<BR>\n", (blastxTrack->chromEnd - blastxTrack->chromStart));
+ }
+ }
+ else
+ printf("<BR>Query sequence not found for %s at %s:%u-%u<BR>\n", blastxTrack->name, blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd);
+ }
+
+ sqlFreeResult(&srQuerySeq);
+ free(targetGeneName[0]);
+ free(targetGeneName[1]);
+}
+
+struct blastTab* loadBlastxHits(struct sqlConnection *conn, char* queryName, char* queryTable, struct bed *blastxTrack)
+/* Load all blastx hits of the given query region into a list */
+{
+ char query[512];
+ struct sqlResult *srBlastxHits = NULL;
+ struct blastTab *list = NULL;
+ struct blastTab *blastxHits;
+ struct sqlResult *srQuery = NULL;
+ struct bed *queryTrack = NULL;
+ char **rowQuery;
+ int rowOffset;
+ char **row;
+ char blastxHitsTable[] = "blastxHits";
+ unsigned int queryStart = 0;
+ unsigned int queryEnd = 0;
+ unsigned int qStart = 0;
+ unsigned int qEnd = 0;
+
+ if (hTableExists(database, queryTable) && hTableExists(database, blastxHitsTable))
+ {
+ rowOffset = hOffsetPastBin(database, seqName, queryTable);
+ sprintf(query, "select * from %s where name = '%s'", queryTable, queryName);
+ srQuery = sqlGetResult(conn, query);
+ if ((rowQuery = sqlNextRow(srQuery)) != NULL)
+ {
+ queryTrack = bedLoadN(rowQuery+rowOffset, 6);
+ queryStart = blastxTrack->chromStart - queryTrack->chromStart + 1;
+ queryEnd = blastxTrack->chromEnd - queryTrack->chromStart;
+ }
+ sqlFreeResult(&srQuery);
+ srQuery = NULL;
+
+ sprintf(query, "select * from %s where query = '%s'", blastxHitsTable, queryName);
+ srBlastxHits = sqlGetResult(conn, query);
+ while ((row = sqlNextRow(srBlastxHits)) != NULL)
+ {
+ blastxHits = blastTabLoad(row);
+ if (blastxHits->qStart < blastxHits->qEnd)
+ {
+ qStart = blastxHits->qStart;
+ qEnd = blastxHits->qEnd;
+ }
+ else
+ {
+ qStart = blastxHits->qEnd;
+ qEnd = blastxHits->qStart;
+ }
+ if (((qStart <= queryStart) && (qEnd >= queryEnd)) ||
+ ((qStart >= queryStart) && (qStart <= queryEnd)) ||
+ ((qEnd >= queryStart) && (qEnd <= queryEnd)))
+ slAddTail(&list, blastxHits);
+ }
+ }
+ if (srQuery != NULL)
+ sqlFreeResult(&srQuery);
+ if (srBlastxHits != NULL)
+ sqlFreeResult(&srBlastxHits);
+ return list;
+}
+
+void printBlastxResult(struct sqlConnection *conn, struct blastTab *blastxHitsList, unsigned int querySeqLength)
+/* Print Blastx result of given clade */
+{
+ char query[512];
+ struct sqlResult *sr;
+ char **row;
+ char refSeq[50];
+ struct blastTab *blastxHits;
+ struct minGeneInfo *ginfo;
+ char *blastxTarget[2];
+ char *clades[2];
+ int cladePortionCount = 0;
+ char genome[50] = "";
+ char clade[50] = "";
+ unsigned int targetProteinStart = 0;
+ unsigned int targetProteinEnd = 0;
+ unsigned int hitStart = 0;
+ unsigned int hitEnd = 0;
+ char **buffer = NULL;
+
+ int tStart = cartInt(cart, "o");
+ int tEnd = cartInt(cart, "t");
+ char *tChrom = cartString(cart, "c");
+
+ printf("<br>\n");
+
+ /* Print table */
+ printf("<table style=\"width: 100%%;\" bgcolor=\"#%s\" border=\"0\" cellpadding=\"1\" cellspacing=\"0\">", HG_COL_BORDER);
+ printf("<tbody><tr><td>\n");
+ printf("<table style=\"width: 100%%; text-align: left;\" bgcolor=\"%s\" border=\"1\" cellpadding=\"2\" cellspacing=\"2\">\n", HG_COL_INSIDE);
+ printf("<tbody>\n");
+
+ /* Print table column heading */
+ printf("<tr style=\"vertical-align: top;\">\n");
+ printf("<td width=\"10%%\"><b>Organism</b></td>\n");
+ printf("<td width=\"7%%\"><b>Clade</b></td>\n");
+ printf("<td width=\"7%%\"><b>Gene</b></td>\n");
+ printf("<td><b>Product</b></td>\n");
+ printf("<td width=\"7%%\"><b>Position in Query Genomic Sequence</b></td>\n");
+ printf("<td width=\"7%%\"><b>Position in Target Protein</b></td>\n");
+ printf("<td width=\"7%%\"><b>Percent Length of Target Protein</b></td>\n");
+ printf("<td width=\"5%%\"><b>Protein Identity (%%)</b></td>\n");
+ printf("<td width=\"5%%\"><b>E-value</b></td>\n");
+ printf("<td width=\"5%%\"><b>Log of E-value</b></td>\n");
+ printf("<td width=\"5%%\"><b>Bit Score</b></td>\n");
+ printf("<td width=\"8%%\"><b>Protein Alignment Length</b></td>\n");
+ printf("<td width=\"8%%\"><b>Protein Mismatches</b></td>\n");
+ printf("<td width=\"8%%\"><b>Alignment Gaps</b></td>\n");
+ printf("</tr>\n");
+
+ blastxHits = blastxHitsList;
+ while (blastxHits != NULL)
+ {
+ parseDelimitedString(blastxHits->target, ':', blastxTarget, 2);
+
+ /* Get species info */
+ memset(genome, 0, 50);
+ memset(clade, 0, 50);
+ getGenomeClade(conn, blastxTarget[0], genome, clade);
+
+ if ((strcmp(genome , "") != 0) && (strcmp(clade, "") != 0))
+ {
+ cladePortionCount = parseDelimitedString(clade, '-', clades, 2);
+
+ printf("<tr style=\"vertical-align: top;\">\n");
+
+ printf("<td><a name=\"%s:%s:%u-%u\"><i>%s</i></td>\n", blastxTarget[1], tChrom, tStart, tEnd, genome);
+ if (cladePortionCount == 1)
+ printf("<td>%s</td>\n", clades[0]);
+ else if (cladePortionCount == 2)
+ printf("<td>%s<br>%s</td>\n", clades[0], clades[1]);
+
+ /* Get target gene position from refSeq */
+ strcpy(refSeq, blastxTarget[0]);
+ strcat(refSeq, ".refSeq");
+ if (hDbExists(blastxTarget[0]) && hTableExists(blastxTarget[0], "refSeq"))
+ {
+ sprintf(query, "select chrom, cdsStart, cdsEnd from %s where name = '%s'",
+ refSeq, blastxTarget[1]);
+ sr = sqlGetResult(conn, query);
+ if ((row = sqlNextRow(sr)) != NULL)
+ {
+ targetProteinStart = strtoul(row[1], buffer, 10);
+ targetProteinEnd = strtoul(row[2], buffer, 10);
+ hitStart = targetProteinStart + blastxHits->tStart * 3 + 1;
+ hitEnd = targetProteinStart + blastxHits->tEnd * 3;
+ printf("<td><a href=\"hgTracks\?position=%s:%u-%u&db=%s\" TARGET=_blank>%s</a></td>\n",
+ row[0], hitStart, hitEnd, blastxTarget[0], blastxTarget[1]);
+ }
+ else
+ printf("<td>%s</td>\n", blastxTarget[1]);
+ sqlFreeResult(&sr);
+ }
+ else
+ printf("<td>%s</td>\n", blastxTarget[1]);
+
+ /* Get target gene product annotation */
+ if (hDbExists(blastxTarget[0]))
+ {
+ ginfo = getGbProtCodeInfo(conn, blastxTarget[0], blastxTarget[1]);
+ if (ginfo != NULL && ginfo->product != NULL && differentString(ginfo->product,"none"))
+ printf("<td>%s</td>\n", ginfo->product);
+ else
+ printf("<td>%s</td>\n", "N/A");
+ }
+ else
+ printf("<td>%s</td>\n", "N/A");
+
+ printf("<td style=\"text-align: center;\">%u - %u</td>\n", blastxHits->qStart + 1, blastxHits->qEnd);
+ printf("<td style=\"text-align: center;\">%u - %u</td>\n", blastxHits->tStart + 1, blastxHits->tEnd);
+ if ((targetProteinStart == 0) || (targetProteinEnd == 0))
+ printf("<td style=\"text-align: center;\">N/A</td>\n");
+ else
+ printf("<td style=\"text-align: center;\">%0.f</td>\n", ((double) (blastxHits->tEnd - blastxHits->tStart) /
+ ((double) (targetProteinEnd-targetProteinStart-3) / 3.0f)) * 100.0f);
+ printf("<td style=\"text-align: right;\">%0.1f</td>\n", blastxHits->identity);
+ printf("<td style=\"text-align: right;\">%0.0e</td>\n", blastxHits->eValue);
+ printf("<td style=\"text-align: right;\">e%0.0f</td>\n", (blastxHits->eValue == 0)? 0 : log(blastxHits->eValue) / log(10));
+ printf("<td style=\"text-align: right;\">%0.1f</td>\n", blastxHits->bitScore);
+ printf("<td style=\"text-align: right;\">%u</td>\n", blastxHits->aliLength);
+ printf("<td style=\"text-align: right;\">%u</td>\n", blastxHits->mismatch);
+ printf("<td style=\"text-align: right;\">%u</td>\n", blastxHits->gapOpen);
+
+ printf("</tr>\n");
+ }
+ free(blastxTarget[0]);
+ free(blastxTarget[1]);
+ blastxHits = blastxHits->next;
+ }
+ /* Close table */
+ printf("</tbody>\n");
+ printf("</table>\n");
+ printf("</td></tr></tbody>\n");
+ printf("</table>\n");
+}
+
+void doBlastX(struct trackDb *tdb, char *targetName)
+/* Handle the BlastX Archaea and BlastX Bacteria tracks. */
+{
+ char queryName[50];
+ char queryTable[50];
+ unsigned int querySeqLength = 0;
+ struct sqlConnection *conn = hAllocConn(database);
+ struct bed *blastxTrack;
+ struct blastTab *blastxHitsList;
+
+ cartWebStart(cart, database, "%s", "BlastX Alignment Hits");
+
+ blastxTrack = getBlastpTrackRecord(conn, tdb, targetName);
+ if (!differentWord(blastxTrack->chrom, "assembly"))
+ strcpy(queryTable, "scaffolds");
+ else
+ strcpy(queryTable, "igenics");
+ printQuerySequenceInfo(conn, blastxTrack, queryName, &querySeqLength, queryTable);
+
+ blastxHitsList = loadBlastxHits(conn, queryName, queryTable, blastxTrack);
+
+ printBlastxResult(conn, blastxHitsList, querySeqLength);
+ printf("<BR>\n");
+
+ printf("<hr>\n");
+ hFreeConn(&conn);
+ printTrackHtml(tdb);
+}
+
void doUltraConserved(struct trackDb *tdb, char *item)
/* Handle the ultraConserved track. */
{
char tableName[65];
@@ -2649,8 +2975,14 @@
&& differentWord(track, "BlastP_Bacteria"))
{
doBlastP(tdb, item);
}
+else if (startsWith("BlastX_", track)
+ && differentWord(track, "BlastX_Crenarchaea") && differentWord(track, "BlastX_Nanoarch") && differentWord(track, "BlastX_Euryarch")
+ && differentWord(track, "BlastX_Bacteria"))
+ {
+ doBlastX(tdb, item);
+ }
else if (sameWord(track,"ultraConserved"))
{
doUltraConserved(tdb, item);
}