src/lib/blastParse.c 1.22

1.22 2009/04/12 03:47:20 markd
added support for PSI BLAST madness
Index: src/lib/blastParse.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/blastParse.c,v
retrieving revision 1.21
retrieving revision 1.22
diff -b -B -U 4 -r1.21 -r1.22
--- src/lib/blastParse.c	17 Sep 2008 17:56:37 -0000	1.21
+++ src/lib/blastParse.c	12 Apr 2009 03:47:20 -0000	1.22
@@ -53,14 +53,14 @@
 {
 char *line = NULL;
 if (lineFileNext(bf->lf, &line, NULL))
     {
-    verbose(TRACE_LEVEL, "=> %s\n", line);
+    verbose(TRACE_LEVEL, "    => %s\n", line);
     return line;
     }
 else
     {
-    verbose(TRACE_LEVEL, "=> EOF\n");
+    verbose(TRACE_LEVEL, "    => EOF\n");
     return NULL;
     }
 }
 
@@ -236,8 +236,25 @@
 bq->dbSeqCount = atoi(words[0]);
 bq->dbBaseCount = atoi(words[2]);
 }
 
+static char *roundLinePrefix = "Results from round "; // start of a round line
+
+static boolean isRoundLine(char *line)
+/* check if a line is a PSI round number line */
+{
+return startsWith(roundLinePrefix, line);
+}
+
+static void parseRoundLine(char *line, struct blastQuery *bq)
+/* round line and save current round in query
+ *   Results from round 1
+ */
+{
+char *p = skipLeadingSpaces(line + strlen(roundLinePrefix));
+bq->psiRounds = atoi(p);
+}
+
 struct blastQuery *blastFileNextQuery(struct blastFile *bf)
 /* Read all alignments associated with next query.  Return NULL at EOF. */
 {
 char *line;
@@ -267,9 +284,11 @@
 	{
 	lineFileReuse(bf->lf);
 	break;
 	}
-    if (stringIn("No hits found", line) != NULL)
+    else if (isRoundLine(line))
+        parseRoundLine(line, bq);
+    else if (stringIn("No hits found", line) != NULL)
         break;
     }
 
 /* Read in gapped alignments. */
@@ -297,10 +316,8 @@
 struct blastBlock *bb;
 int lenSearch;
 
 verbose(TRACE_LEVEL, "blastFileNextGapped\n");
-AllocVar(bga);
-bga->query = bq;
 
 /* First line should be query. */
 if (!bfSkipBlankLines(bf))
     return NULL;
@@ -313,9 +330,13 @@
 if (startsWith("  Database:", line) || (stringIn("BLAST", line) != NULL))
     return NULL;
 if (line[0] != '>')
     bfError(bf, "Expecting >target");
+
+AllocVar(bga);
+bga->query = bq;
 bga->targetName = cloneString(line+1); 
+bga->psiRound = bq->psiRounds;
 
 /* Process something like:
  *      Length = 100000
  * however this follows a possible multi-line description, so be specified
@@ -323,8 +344,10 @@
  */
 for (lenSearch=0; lenSearch<25; lenSearch++)
 	{
 	line = bfNeedNextLine(bf);
+        if (isRoundLine(line))
+            parseRoundLine(line, bq);
 	wordCount = chopLine(line, words);
 	if (wordCount == 3 && sameString(words[0], "Length") &&  sameString(words[1], "=")
             && isdigit(words[2][0]))
 		break;
@@ -356,9 +379,9 @@
     return 0;
     }
 }
 
-static boolean nextBlockLine(struct blastFile *bf, char **retLine)
+static boolean nextBlockLine(struct blastFile *bf, struct blastQuery *bq, char **retLine)
 /* Get next block line.  Return FALSE and reuse line if it's
  * an end of block type line. */
 {
 struct lineFile *lf = bf->lf;
@@ -366,8 +389,11 @@
 
 *retLine = line = bfNextLine(bf);
 if (line == NULL)
     return FALSE;
+if (isRoundLine(line))
+    parseRoundLine(line, bq);
+
 /*
 the last condition was added to deal with the new blast output format and is meant to find lines such as this one:
 TBLASTN 2.2.15 [Oct-15-2006]
 I am hoping that by looking for only "BLAST" this will work with things like blastp, blastn, psi-blast, etc
@@ -463,9 +489,9 @@
  * or something that looks like we're done with this gapped
  * alignment. */
 for (;;)
     {
-    if (!nextBlockLine(bf, &line))
+    if (!nextBlockLine(bf, bq, &line))
 	return NULL;
     if (startsWith(" Score", line))
 	break;
     }
@@ -593,9 +619,9 @@
     {
     /* Fetch next first line. */
     for (;;)
 	{
-	if (!nextBlockLine(bf, &line))
+	if (!nextBlockLine(bf, bq, &line))
 	    goto DONE;
 	if (startsWith(" Score", line))
 	    {
 	    lineFileReuse(bf->lf);
@@ -757,9 +783,12 @@
 void blastGappedAliPrint(struct blastGappedAli* ba, FILE* out)
 /* print a BLAST gapped alignment for debugging purposes  */
 {
 struct blastBlock *bb;
-fprintf(out, "%s <=> %s\n", ba->query->query, ba->targetName);
+fprintf(out, "%s <=> %s", ba->query->query, ba->targetName);
+if (ba->psiRound > 0)
+    fprintf(out, " round: %d", ba->psiRound);
+fputc('\n', out);
 for (bb = ba->blocks; bb != NULL; bb = bb->next)
     {
     blastBlockPrint(bb, out);
     }