src/lib/blastParse.c 1.22
1.22 2009/04/12 03:47:20 markd
added support for PSI BLAST madness
Index: src/lib/blastParse.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/blastParse.c,v
retrieving revision 1.21
retrieving revision 1.22
diff -b -B -U 4 -r1.21 -r1.22
--- src/lib/blastParse.c 17 Sep 2008 17:56:37 -0000 1.21
+++ src/lib/blastParse.c 12 Apr 2009 03:47:20 -0000 1.22
@@ -53,14 +53,14 @@
{
char *line = NULL;
if (lineFileNext(bf->lf, &line, NULL))
{
- verbose(TRACE_LEVEL, "=> %s\n", line);
+ verbose(TRACE_LEVEL, " => %s\n", line);
return line;
}
else
{
- verbose(TRACE_LEVEL, "=> EOF\n");
+ verbose(TRACE_LEVEL, " => EOF\n");
return NULL;
}
}
@@ -236,8 +236,25 @@
bq->dbSeqCount = atoi(words[0]);
bq->dbBaseCount = atoi(words[2]);
}
+static char *roundLinePrefix = "Results from round "; // start of a round line
+
+static boolean isRoundLine(char *line)
+/* check if a line is a PSI round number line */
+{
+return startsWith(roundLinePrefix, line);
+}
+
+static void parseRoundLine(char *line, struct blastQuery *bq)
+/* round line and save current round in query
+ * Results from round 1
+ */
+{
+char *p = skipLeadingSpaces(line + strlen(roundLinePrefix));
+bq->psiRounds = atoi(p);
+}
+
struct blastQuery *blastFileNextQuery(struct blastFile *bf)
/* Read all alignments associated with next query. Return NULL at EOF. */
{
char *line;
@@ -267,9 +284,11 @@
{
lineFileReuse(bf->lf);
break;
}
- if (stringIn("No hits found", line) != NULL)
+ else if (isRoundLine(line))
+ parseRoundLine(line, bq);
+ else if (stringIn("No hits found", line) != NULL)
break;
}
/* Read in gapped alignments. */
@@ -297,10 +316,8 @@
struct blastBlock *bb;
int lenSearch;
verbose(TRACE_LEVEL, "blastFileNextGapped\n");
-AllocVar(bga);
-bga->query = bq;
/* First line should be query. */
if (!bfSkipBlankLines(bf))
return NULL;
@@ -313,9 +330,13 @@
if (startsWith(" Database:", line) || (stringIn("BLAST", line) != NULL))
return NULL;
if (line[0] != '>')
bfError(bf, "Expecting >target");
+
+AllocVar(bga);
+bga->query = bq;
bga->targetName = cloneString(line+1);
+bga->psiRound = bq->psiRounds;
/* Process something like:
* Length = 100000
* however this follows a possible multi-line description, so be specified
@@ -323,8 +344,10 @@
*/
for (lenSearch=0; lenSearch<25; lenSearch++)
{
line = bfNeedNextLine(bf);
+ if (isRoundLine(line))
+ parseRoundLine(line, bq);
wordCount = chopLine(line, words);
if (wordCount == 3 && sameString(words[0], "Length") && sameString(words[1], "=")
&& isdigit(words[2][0]))
break;
@@ -356,9 +379,9 @@
return 0;
}
}
-static boolean nextBlockLine(struct blastFile *bf, char **retLine)
+static boolean nextBlockLine(struct blastFile *bf, struct blastQuery *bq, char **retLine)
/* Get next block line. Return FALSE and reuse line if it's
* an end of block type line. */
{
struct lineFile *lf = bf->lf;
@@ -366,8 +389,11 @@
*retLine = line = bfNextLine(bf);
if (line == NULL)
return FALSE;
+if (isRoundLine(line))
+ parseRoundLine(line, bq);
+
/*
the last condition was added to deal with the new blast output format and is meant to find lines such as this one:
TBLASTN 2.2.15 [Oct-15-2006]
I am hoping that by looking for only "BLAST" this will work with things like blastp, blastn, psi-blast, etc
@@ -463,9 +489,9 @@
* or something that looks like we're done with this gapped
* alignment. */
for (;;)
{
- if (!nextBlockLine(bf, &line))
+ if (!nextBlockLine(bf, bq, &line))
return NULL;
if (startsWith(" Score", line))
break;
}
@@ -593,9 +619,9 @@
{
/* Fetch next first line. */
for (;;)
{
- if (!nextBlockLine(bf, &line))
+ if (!nextBlockLine(bf, bq, &line))
goto DONE;
if (startsWith(" Score", line))
{
lineFileReuse(bf->lf);
@@ -757,9 +783,12 @@
void blastGappedAliPrint(struct blastGappedAli* ba, FILE* out)
/* print a BLAST gapped alignment for debugging purposes */
{
struct blastBlock *bb;
-fprintf(out, "%s <=> %s\n", ba->query->query, ba->targetName);
+fprintf(out, "%s <=> %s", ba->query->query, ba->targetName);
+if (ba->psiRound > 0)
+ fprintf(out, " round: %d", ba->psiRound);
+fputc('\n', out);
for (bb = ba->blocks; bb != NULL; bb = bb->next)
{
blastBlockPrint(bb, out);
}