src/hg/hgc/lowelab.c 1.32

1.32 2009/05/08 21:15:26 pchan
fix hTableExist call for blastp track; change code to use centraldb name from hg.conf; add code to support blastx track
Index: src/hg/hgc/lowelab.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgc/lowelab.c,v
retrieving revision 1.31
retrieving revision 1.32
diff -b -B -U 4 -r1.31 -r1.32
--- src/hg/hgc/lowelab.c	30 Jan 2009 23:47:45 -0000	1.31
+++ src/hg/hgc/lowelab.c	8 May 2009 21:15:26 -0000	1.32
@@ -407,18 +407,21 @@
 	{
 	char query[256];
 	char *description;
 	safef(query, sizeof(query), "select description from proteome.pfamDesc where pfamAC='%s'", el->name);
-	if (!sqlTableExists(spConn,"pfamDesc")) 
+	description = sqlQuickString(spConn, query);
+	if (description == NULL)
 	    {
 	    safef(query, sizeof(query), 
 	    "select extDbRef.extAcc1 from extDbRef,extDb "
 	    "where extDbRef.acc = '%s' "
 	    "and extDbRef.extDb = extDb.id "
 	    "and extDb.val = '%s'"
 	    , spAcc,el->name);
-	    }
+	    
+	    printf("%s\n", query);
 	description = sqlQuickString(spConn, query);
+	    }
 	if (description == NULL)
 	    description = cloneString("n/a");
 	printf("<A HREF=\"http://pfam.sanger.ac.uk/family?acc=%s\" TARGET=_blank>", 
 	    el->name);
@@ -1556,9 +1559,9 @@
       pfamHit = lowelabPfamHitsLoad(row+rowOffset);
 
       safef(query, sizeof(query), "select description from proteome.pfamDesc where pfamAC='%s'", pfamHit->pfamAC);
       
-	if (!sqlTableExists(spConn,"pfamDesc")) 
+	if (!sqlTableExists(spConn,"proteome.pfamDesc")) 
 	    {
 	    safef(query, sizeof(query), 
 	    "select extDbRef.extAcc1 from extDbRef,extDb "
 	    "where extDbRef.acc = '%s' "
@@ -1853,23 +1856,29 @@
 struct minGeneInfo* getGbProtCodeInfo(struct sqlConnection *conn, char* dbName, char *geneName)
 /* Get refseq protein annotation by given gene name in a given database */
 {
     char query[512];
-    struct sqlResult *sr;
+    struct sqlResult *sr = NULL;
     char **row;
     struct minGeneInfo* ginfo = NULL;
     char gbProtCodeXra[50];
     
+    if (strcmp(database, dbName) == 0)
+        strcpy(gbProtCodeXra, "gbProtCodeXra");
+    else
+    {
     strcpy(gbProtCodeXra, dbName);
     strcat(gbProtCodeXra, ".gbProtCodeXra");
-    if (hTableExists(database, gbProtCodeXra))
+    }
+    if (hTableExists(dbName, "gbProtCodeXra"))
     {
         sprintf(query, "select * from %s where name = '%s'", gbProtCodeXra, geneName);
         sr = sqlGetResult(conn, query);
         if ((row = sqlNextRow(sr)) != NULL) 
             ginfo = minGeneInfoLoad(row);
     }
     
+    if (sr != NULL)
     sqlFreeResult(&sr);
     return ginfo;
 }
 
@@ -1970,15 +1979,18 @@
 {
     char query[512];
     struct sqlResult *srDb;
     char **rowDb;
+    char *centraldb = cfgOption("central.db");
 
-    sprintf(query, "select count(*) from centraldb.genomeClade a, centraldb.dbDb b, centraldb.clade c where a.genome = b.genome and a.clade = c.name and b.name = '%s'", dbName);
+    sprintf(query, "select count(*) from %s.genomeClade a, %s.dbDb b, %s.clade c where a.genome = b.genome and a.clade = c.name and b.name = '%s'",
+            centraldb, centraldb, centraldb, dbName);
     srDb = sqlGetResult(conn, query);
     if ((rowDb = sqlNextRow(srDb)) != NULL)
     {
         sqlFreeResult(&srDb);
-        sprintf(query, "select a.genome, c.label from centraldb.genomeClade a, centraldb.dbDb b, centraldb.clade c where a.genome = b.genome and a.clade = c.name and b.name = '%s'", dbName);
+        sprintf(query, "select a.genome, c.label from %s.genomeClade a, %s.dbDb b, %s.clade c where a.genome = b.genome and a.clade = c.name and b.name = '%s'",
+                centraldb, centraldb, centraldb, dbName);
         srDb = sqlGetResult(conn, query);
         if ((rowDb = sqlNextRow(srDb)) != NULL)
         {
             strcpy(genome, rowDb[0]);
@@ -1996,9 +2008,9 @@
     char **rowDb;
     struct slName *list = NULL;
     char clade[50];
 
-    sprintf(query, "select label from centraldb.clade");
+    sprintf(query, "select label from %s.clade", cfgOption("central.db"));
     srDb = sqlGetResult(conn, query);
     while ((rowDb = sqlNextRow(srDb)) != NULL)
     {
         strcpy(clade, rowDb[0]);
@@ -2012,9 +2024,9 @@
 struct blastTab* loadBlastpHits(struct sqlConnection *conn, char* queryName)
 /* Load all blastp hits of the given query gene into a list */
 {
     char query[512];
-    struct sqlResult *srBlastpHits;
+    struct sqlResult *srBlastpHits = NULL;
     struct blastTab *list = NULL;
     struct blastTab *blastpHits;
     char **row;
     char blastpHitsTable[] = "blastpHits";
@@ -2028,8 +2040,9 @@
             blastpHits = blastTabLoad(row);
             slAddTail(&list, blastpHits);
         }
     }
+    if (srBlastpHits != NULL)
     sqlFreeResult(&srBlastpHits);
     return list;
 }
 
@@ -2075,9 +2088,9 @@
     printf("<td width=\"5%%\"><b>E-value</b></td>\n");
     printf("<td width=\"5%%\"><b>Log of E-value</b></td>\n");
     printf("<td width=\"5%%\"><b>Bit Score</b></td>\n");
     printf("<td width=\"6%%\"><b>Protein Alignment Length</b></td>\n");
-    printf("<td width=\"5%%\"><b>Protein Mismatchs</b></td>\n");
+    printf("<td width=\"5%%\"><b>Protein Mismatches</b></td>\n");
     printf("<td width=\"5%%\"><b>Alignment Gaps</b></td>\n");
     printf("</tr>\n");
     
     blastpHits = blastpHitsList;
@@ -2104,9 +2117,9 @@
             
             /* Get target gene position from refSeq */
             strcpy(refSeq, blastpTarget[0]);
             strcat(refSeq, ".refSeq");
-            if (hDbExists(blastpTarget[0]) && hTableExists(database, refSeq))
+            if (hDbExists(blastpTarget[0]) && hTableExists(blastpTarget[0], "refSeq"))
             {
                 sprintf(query, "select chrom, cdsStart, cdsEnd from %s where name = '%s'",
                         refSeq, blastpTarget[1]);
                 sr = sqlGetResult(conn, query);
@@ -2182,8 +2195,321 @@
     hFreeConn(&conn);
     printTrackHtml(tdb);
 }
 
+void printQuerySequenceInfo(struct sqlConnection *conn, struct bed *blastxTrack, char *queryName, unsigned int *querySeqLength, char *queryTable)
+/* Get and print blastx query sequence info */
+{
+    char query[512];
+    struct sqlResult *srQuerySeq;
+    char **row;
+    int seqCount;
+    char **buffer = NULL;
+    char *targetGeneName[2];
+    
+    char blastxHits[] = "blastxHits";
+    
+    unsigned int queryStart = 0;
+    unsigned int queryEnd = 0;
+    
+    parseDelimitedString(blastxTrack->name, ':', targetGeneName, 2); 
+        
+    if (hTableExists(database, queryTable) && hTableExists(database, blastxHits))
+    {
+        /* Get query sequence from query table */
+        sprintf(query, "select count(*) from %s where chrom = '%s' and chromStart <= %u and chromEnd >= %u",
+                queryTable, blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd);
+        srQuerySeq = sqlGetResult(conn, query);
+        if ((row = sqlNextRow(srQuerySeq)) != NULL)
+        {
+            seqCount = atoi(row[0]);
+            sqlFreeResult(&srQuerySeq);
+            
+            if (seqCount == 1)
+            {
+                sprintf(query, "select name, chromStart, chromEnd from %s where chrom = '%s' and chromStart <= %u and chromEnd >= %u",
+                        queryTable, blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd);
+                srQuerySeq = sqlGetResult(conn, query);
+                if ((row = sqlNextRow(srQuerySeq)) != NULL)
+                {
+                    strcpy(queryName, row[0]);
+                    queryStart = strtoul(row[1], buffer, 10);
+                    queryEnd = strtoul(row[2], buffer, 10);
+                }
+                sqlFreeResult(&srQuerySeq);
+            }
+            else
+            {
+                /* Check blastxHits if more than 1 query sequence is found within the region */
+                sprintf(query, "select a.name, a.chromStart, a.chromEnd from %s a, %s b where a.chrom = '%s' and a.chromStart <= %u and a.chromEnd >= %u and a.name = b.query and b.target like '%%%s'",
+                        queryTable, blastxHits,
+                        blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd, targetGeneName[0]);
+                srQuerySeq = sqlGetResult(conn, query);
+                if ((row = sqlNextRow(srQuerySeq)) != NULL)
+                {
+                    strcpy(queryName, row[0]);
+                    queryStart = strtoul(row[1], buffer, 10);
+                    queryEnd = strtoul(row[2], buffer, 10);
+                }
+                sqlFreeResult(&srQuerySeq);
+            }
+            
+            if ((queryStart == 0) && (queryEnd == 0))
+                printf("Query sequence not found for %s at %s:%u-%u\n", blastxTrack->name, blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd);
+            else
+            {
+                *querySeqLength = (blastxTrack->chromEnd - blastxTrack->chromStart);
+		  
+                /* Print query sequence info */
+                printf("<B>Query sequence: </B>%s<BR>\n", queryName);                                
+                printf("<B>Position:</B> "
+                       "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">",
+                       hgTracksPathAndSettings(), database, blastxTrack->chrom, queryStart + 1, queryEnd);
+                printf("%s:%d-%d</A><BR>\n", blastxTrack->chrom, queryStart + 1, queryEnd);
+                printf("<B>Genomic size: </B> %d nt<BR><BR>\n", (queryEnd - queryStart));
+
+                printf("<B>BlastX hit position:</B> "
+                       "<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">",
+                       hgTracksPathAndSettings(), database, blastxTrack->chrom, blastxTrack->chromStart + 1, blastxTrack->chromEnd);
+                printf("%s:%d-%d</A><BR>\n", blastxTrack->chrom, blastxTrack->chromStart + 1, blastxTrack->chromEnd);
+                printf("<B>BlastX hit position relative to query sequence: </B> %d-%d<BR>\n", blastxTrack->chromStart - queryStart + 1, blastxTrack->chromEnd - queryStart);
+                printf("<B>BlastX hit genomic size: </B> %d nt<BR>\n", (blastxTrack->chromEnd - blastxTrack->chromStart));
+            }
+        }
+        else
+            printf("<BR>Query sequence not found for %s at %s:%u-%u<BR>\n", blastxTrack->name, blastxTrack->chrom, blastxTrack->chromStart, blastxTrack->chromEnd);           
+    }
+ 
+    sqlFreeResult(&srQuerySeq);
+    free(targetGeneName[0]);
+    free(targetGeneName[1]);
+}
+
+struct blastTab* loadBlastxHits(struct sqlConnection *conn, char* queryName, char* queryTable, struct bed *blastxTrack)
+/* Load all blastx hits of the given query region into a list */
+{
+    char query[512];
+    struct sqlResult *srBlastxHits = NULL;
+    struct blastTab *list = NULL;
+    struct blastTab *blastxHits;
+    struct sqlResult *srQuery = NULL;
+    struct bed *queryTrack = NULL;
+    char **rowQuery;
+    int rowOffset;
+    char **row;
+    char blastxHitsTable[] = "blastxHits";
+    unsigned int queryStart = 0;
+    unsigned int queryEnd = 0;
+    unsigned int qStart = 0;
+    unsigned int qEnd = 0;
+
+    if (hTableExists(database, queryTable) && hTableExists(database, blastxHitsTable))
+    {
+        rowOffset = hOffsetPastBin(database, seqName, queryTable);
+        sprintf(query, "select * from %s where name = '%s'", queryTable, queryName);
+        srQuery = sqlGetResult(conn, query);
+        if ((rowQuery = sqlNextRow(srQuery)) != NULL)
+        {
+            queryTrack = bedLoadN(rowQuery+rowOffset, 6);
+            queryStart = blastxTrack->chromStart - queryTrack->chromStart + 1;
+            queryEnd = blastxTrack->chromEnd - queryTrack->chromStart;
+        }
+        sqlFreeResult(&srQuery);
+        srQuery = NULL;
+
+        sprintf(query, "select * from %s where query = '%s'", blastxHitsTable, queryName);
+        srBlastxHits = sqlGetResult(conn, query);
+        while ((row = sqlNextRow(srBlastxHits)) != NULL)
+        {
+            blastxHits = blastTabLoad(row);
+            if (blastxHits->qStart < blastxHits->qEnd)
+            {
+                qStart = blastxHits->qStart;
+                qEnd = blastxHits->qEnd;
+            }
+            else
+            {
+                qStart = blastxHits->qEnd;
+                qEnd = blastxHits->qStart;                
+            }
+            if (((qStart <= queryStart) && (qEnd >= queryEnd)) ||
+                ((qStart >= queryStart) && (qStart <= queryEnd)) ||
+                ((qEnd >= queryStart) && (qEnd <= queryEnd)))
+                slAddTail(&list, blastxHits);
+        }
+    }
+    if (srQuery != NULL)
+        sqlFreeResult(&srQuery);
+    if (srBlastxHits != NULL)
+        sqlFreeResult(&srBlastxHits);
+    return list;
+}
+
+void printBlastxResult(struct sqlConnection *conn, struct blastTab *blastxHitsList, unsigned int querySeqLength)
+/* Print Blastx result of given clade */
+{
+    char query[512];
+    struct sqlResult *sr;
+    char **row;
+    char refSeq[50];
+    struct blastTab *blastxHits;
+    struct minGeneInfo *ginfo;
+    char *blastxTarget[2];
+    char *clades[2];
+    int cladePortionCount = 0;
+    char genome[50] = "";
+    char clade[50] = "";
+    unsigned int targetProteinStart = 0;
+    unsigned int targetProteinEnd = 0;
+    unsigned int hitStart = 0;
+    unsigned int hitEnd = 0;
+    char **buffer = NULL;
+
+    int tStart = cartInt(cart, "o");
+    int tEnd = cartInt(cart, "t");
+    char *tChrom = cartString(cart, "c");
+
+    printf("<br>\n");
+	
+    /* Print table */
+    printf("<table style=\"width: 100%%;\" bgcolor=\"#%s\" border=\"0\" cellpadding=\"1\" cellspacing=\"0\">", HG_COL_BORDER);
+    printf("<tbody><tr><td>\n");
+    printf("<table style=\"width: 100%%; text-align: left;\" bgcolor=\"%s\" border=\"1\" cellpadding=\"2\" cellspacing=\"2\">\n", HG_COL_INSIDE);
+    printf("<tbody>\n");
+
+    /* Print table column heading */
+    printf("<tr style=\"vertical-align: top;\">\n");
+    printf("<td width=\"10%%\"><b>Organism</b></td>\n");
+    printf("<td width=\"7%%\"><b>Clade</b></td>\n");
+    printf("<td width=\"7%%\"><b>Gene</b></td>\n");
+    printf("<td><b>Product</b></td>\n");
+    printf("<td width=\"7%%\"><b>Position in Query Genomic Sequence</b></td>\n");
+    printf("<td width=\"7%%\"><b>Position in Target Protein</b></td>\n");
+    printf("<td width=\"7%%\"><b>Percent Length of Target Protein</b></td>\n");
+    printf("<td width=\"5%%\"><b>Protein Identity (%%)</b></td>\n");
+    printf("<td width=\"5%%\"><b>E-value</b></td>\n");
+    printf("<td width=\"5%%\"><b>Log of E-value</b></td>\n");
+    printf("<td width=\"5%%\"><b>Bit Score</b></td>\n");
+    printf("<td width=\"8%%\"><b>Protein Alignment Length</b></td>\n");
+    printf("<td width=\"8%%\"><b>Protein Mismatches</b></td>\n");
+    printf("<td width=\"8%%\"><b>Alignment Gaps</b></td>\n");
+    printf("</tr>\n");
+    
+    blastxHits = blastxHitsList;
+    while (blastxHits != NULL)
+    {
+        parseDelimitedString(blastxHits->target, ':', blastxTarget, 2);
+        
+        /* Get species info */
+        memset(genome, 0, 50);
+        memset(clade, 0, 50);
+        getGenomeClade(conn, blastxTarget[0], genome, clade);
+
+        if ((strcmp(genome , "") != 0) && (strcmp(clade, "") != 0))
+        {
+            cladePortionCount = parseDelimitedString(clade, '-', clades, 2);       
+    
+            printf("<tr style=\"vertical-align: top;\">\n");
+           
+            printf("<td><a name=\"%s:%s:%u-%u\"><i>%s</i></td>\n", blastxTarget[1], tChrom, tStart, tEnd, genome);
+            if (cladePortionCount == 1)
+                printf("<td>%s</td>\n", clades[0]);
+            else if (cladePortionCount == 2)
+                printf("<td>%s<br>%s</td>\n", clades[0], clades[1]);
+            
+            /* Get target gene position from refSeq */
+            strcpy(refSeq, blastxTarget[0]);
+            strcat(refSeq, ".refSeq");
+            if (hDbExists(blastxTarget[0]) && hTableExists(blastxTarget[0], "refSeq"))
+            {
+                sprintf(query, "select chrom, cdsStart, cdsEnd from %s where name = '%s'",
+                        refSeq, blastxTarget[1]);
+                sr = sqlGetResult(conn, query);
+                if ((row = sqlNextRow(sr)) != NULL)
+                {
+                    targetProteinStart = strtoul(row[1], buffer, 10);
+                    targetProteinEnd = strtoul(row[2], buffer, 10);
+                    hitStart = targetProteinStart + blastxHits->tStart * 3 + 1;
+                    hitEnd = targetProteinStart + blastxHits->tEnd * 3;
+                    printf("<td><a href=\"hgTracks\?position=%s:%u-%u&db=%s\" TARGET=_blank>%s</a></td>\n",
+                           row[0], hitStart, hitEnd, blastxTarget[0], blastxTarget[1]);
+                }
+                else
+                    printf("<td>%s</td>\n", blastxTarget[1]);
+                sqlFreeResult(&sr);
+            }
+            else
+                printf("<td>%s</td>\n", blastxTarget[1]);
+    
+            /* Get target gene product annotation */
+            if (hDbExists(blastxTarget[0]))
+            {
+                ginfo = getGbProtCodeInfo(conn, blastxTarget[0], blastxTarget[1]);
+                if (ginfo != NULL && ginfo->product != NULL && differentString(ginfo->product,"none"))
+                    printf("<td>%s</td>\n", ginfo->product);
+                else
+                    printf("<td>%s</td>\n", "N/A");
+                }
+            else
+                printf("<td>%s</td>\n", "N/A");        
+            
+            printf("<td style=\"text-align: center;\">%u - %u</td>\n", blastxHits->qStart + 1, blastxHits->qEnd);
+            printf("<td style=\"text-align: center;\">%u - %u</td>\n", blastxHits->tStart + 1, blastxHits->tEnd);
+            if ((targetProteinStart == 0) || (targetProteinEnd == 0))
+                printf("<td style=\"text-align: center;\">N/A</td>\n");
+            else
+                printf("<td style=\"text-align: center;\">%0.f</td>\n", ((double) (blastxHits->tEnd - blastxHits->tStart) /
+                                                                     ((double) (targetProteinEnd-targetProteinStart-3) / 3.0f)) * 100.0f);
+            printf("<td style=\"text-align: right;\">%0.1f</td>\n", blastxHits->identity);
+            printf("<td style=\"text-align: right;\">%0.0e</td>\n", blastxHits->eValue);
+            printf("<td style=\"text-align: right;\">e%0.0f</td>\n", (blastxHits->eValue == 0)? 0 : log(blastxHits->eValue) / log(10));
+            printf("<td style=\"text-align: right;\">%0.1f</td>\n", blastxHits->bitScore);
+            printf("<td style=\"text-align: right;\">%u</td>\n", blastxHits->aliLength);
+            printf("<td style=\"text-align: right;\">%u</td>\n", blastxHits->mismatch);
+            printf("<td style=\"text-align: right;\">%u</td>\n", blastxHits->gapOpen);
+    
+            printf("</tr>\n");
+        }
+        free(blastxTarget[0]);        
+        free(blastxTarget[1]);
+        blastxHits = blastxHits->next;
+    }
+    /* Close table */
+    printf("</tbody>\n");
+    printf("</table>\n");
+    printf("</td></tr></tbody>\n");
+    printf("</table>\n");
+}
+
+void doBlastX(struct trackDb *tdb, char *targetName)
+/* Handle the BlastX Archaea and BlastX Bacteria tracks. */
+{
+    char queryName[50];
+    char queryTable[50];
+    unsigned int querySeqLength = 0;
+    struct sqlConnection *conn = hAllocConn(database);
+    struct bed *blastxTrack;
+    struct blastTab *blastxHitsList;
+ 
+    cartWebStart(cart, database, "%s", "BlastX Alignment Hits");
+    
+    blastxTrack = getBlastpTrackRecord(conn, tdb, targetName);
+    if (!differentWord(blastxTrack->chrom, "assembly"))
+        strcpy(queryTable, "scaffolds");
+    else
+        strcpy(queryTable, "igenics");
+    printQuerySequenceInfo(conn, blastxTrack, queryName, &querySeqLength, queryTable);
+
+    blastxHitsList = loadBlastxHits(conn, queryName, queryTable, blastxTrack);
+    
+    printBlastxResult(conn, blastxHitsList, querySeqLength);
+    printf("<BR>\n");
+    
+    printf("<hr>\n");
+    hFreeConn(&conn);
+    printTrackHtml(tdb);
+}
+
 void doUltraConserved(struct trackDb *tdb, char *item)
 /* Handle the ultraConserved track. */
 {
     char tableName[65];
@@ -2649,8 +2975,14 @@
         && differentWord(track, "BlastP_Bacteria"))
     {
     doBlastP(tdb, item);
     }
+else if (startsWith("BlastX_", track)
+        && differentWord(track, "BlastX_Crenarchaea") && differentWord(track, "BlastX_Nanoarch") && differentWord(track, "BlastX_Euryarch")
+        && differentWord(track, "BlastX_Bacteria"))
+    {
+    doBlastX(tdb, item);
+    }
 else if (sameWord(track,"ultraConserved"))  
   {
     doUltraConserved(tdb, item);
   }