src/lib/blastOut.c 1.28
1.28 2009/02/26 00:05:49 markd
don't go past end of string if code checked out with -kk
Index: src/lib/blastOut.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/blastOut.c,v
retrieving revision 1.27
retrieving revision 1.28
diff -b -B -U 1000000 -r1.27 -r1.28
--- src/lib/blastOut.c 28 Apr 2008 07:30:38 -0000 1.27
+++ src/lib/blastOut.c 26 Feb 2009 00:05:49 -0000 1.28
@@ -1,827 +1,831 @@
/* blastOut.c - stuff to output an alignment in blast format. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "axt.h"
#include "obscure.h"
#include "genoFind.h"
static char const rcsid[] = "$Id$";
struct axtRef
/* A reference to an axt. */
{
struct axtRef *next;
struct axt *axt;
};
static int axtRefCmpScore(const void *va, const void *vb)
/* Compare to sort based on score. */
{
const struct axtRef *a = *((struct axtRef **)va);
const struct axtRef *b = *((struct axtRef **)vb);
return b->axt->score - a->axt->score;
}
struct targetHits
/* Collection of hits to a single target. */
{
struct targetHits *next;
char *name; /* Target name */
int size; /* Target size */
struct axtRef *axtList; /* List of axts, sorted by score. */
int score; /* Score of best element */
};
static void targetHitsFree(struct targetHits **pObj)
/* Free one target hits structure. */
{
struct targetHits *obj = *pObj;
if (obj != NULL)
{
freeMem(obj->name);
slFreeList(&obj->axtList);
freez(pObj);
}
}
static void targetHitsFreeList(struct targetHits **pList)
/* Free a list of dynamically allocated targetHits's */
{
struct targetHits *el, *next;
for (el = *pList; el != NULL; el = next)
{
next = el->next;
targetHitsFree(&el);
}
*pList = NULL;
}
static int targetHitsCmpScore(const void *va, const void *vb)
/* Compare to sort based on score. */
{
const struct targetHits *a = *((struct targetHits **)va);
const struct targetHits *b = *((struct targetHits **)vb);
return b->score - a->score;
}
static int blastzToWublastScore(int bzScore)
/* Convert from 100 points/match blastz score to 5 points/match
* wu-blast score. */
{
return bzScore/19;
}
static double blastzScoreToWuBits(int bzScore, boolean isProt)
/* Convert from blastz score to bit score. The magic number's
* here are taken from comparing wu-blast scores to axtScores on
* a couple of sequences. This doesn't really seem to be a bit
* score. It's not very close to 2 bits per base. */
{
int wuScore = blastzToWublastScore(bzScore);
if (isProt)
return wuScore * 0.3545;
else
return wuScore * 0.1553;
}
static double blastzScoreToWuExpectation(int bzScore, double databaseSize)
/* I'm puzzled by the wu-blast expectation score. I would
* think it would be databaseSize / (2^bitScore) but
* it's not. I think the best I can do is approximate
* it with something scaled to be close to this expectation. */
{
double logProbOne = -log(2) * bzScore / 32.1948;
return databaseSize * exp(logProbOne);
}
static int blastzScoreToNcbiBits(int bzScore)
/* Convert blastz score to bit score in NCBI sense. */
{
return round(bzScore * 0.0205);
}
static int blastzScoreToNcbiScore(int bzScore)
/* Conver blastz score to NCBI matrix score. */
{
return round(bzScore * 0.0529);
}
static double blastzScoreToNcbiExpectation(int bzScore)
/* Convert blastz score to expectation in NCBI sense. */
{
double bits = bzScore * 0.0205;
double logProb = -bits*log(2);
return 3.0e9 * exp(logProb);
}
static double expectationToProbability(double e)
/* Convert expected number of hits to probability of at least
* one hit. This is a crude approximation, but actually pretty precise
* for e < 0.1, which is all that really matters.... */
{
if (e < 0.999)
return e;
else
return 0.999;
}
static int countMatches(char *a, char *b, int size)
/* Count number of characters that match between a and b. */
{
int count = 0;
int i;
for (i=0; i<size; ++i)
if (toupper(a[i]) == toupper(b[i]))
++count;
return count;
}
static int countGaps(char *a, char *b, int size)
/* Count number of inserts in either strand. */
{
int count = 0;
int i;
for (i=0; i<size; ++i)
{
if (a[i] == '-')
++count;
if (b[i] == '-')
++count;
}
return count;
}
static int countGapOpens(char *a, char *b, int size)
/* Count number of inserts in either strand. */
{
int i, count = 0;
boolean inGap = FALSE;
for (i=0; i<size; ++i)
{
if (a[i] == '-' || b[i] == '-')
{
if (!inGap)
{
++count;
inGap = TRUE;
}
}
else
{
inGap = FALSE;
}
}
return count;
}
static int countPositives(char *a, char *b, int size)
/* Count positive (not necessarily identical) protein matches. */
{
int count = 0;
int i;
struct axtScoreScheme *ss = axtScoreSchemeProteinDefault();
for (i=0; i<size; ++i)
{
if (ss->matrix[(int)a[i]][(int)b[i]] > 0)
++count;
}
return count;
}
static int plusStrandPos(int pos, int size, char strand, boolean isEnd)
/* Return position on plus strand, one based. */
{
if (strand == '-')
{
pos = size - pos;
if (isEnd)
++pos;
}
else
{
if (!isEnd)
++pos;
}
return pos;
}
static int calcDigitCount(struct axt *axt, int tSize, int qSize)
/* Figure out how many digits needed for blast position display. */
{
int tDig, qDig;
if (axt->qStrand == '-')
qDig = digitsBaseTen(qSize - axt->qStart + 1);
else
qDig = digitsBaseTen(axt->qEnd);
if (axt->tStrand == '-')
tDig = digitsBaseTen(tSize - axt->tStart + 1);
else
tDig = digitsBaseTen(axt->tEnd);
return max(tDig, qDig);
}
static void blastiodAxtOutput(FILE *f, struct axt *axt, int tSize, int qSize,
int lineSize, boolean isProt, boolean isTranslated)
/* Output base-by-base part of alignment in blast-like fashion. */
{
int tOff = axt->tStart;
int dt = (isTranslated ? 3 : 1);
int qOff = axt->qStart;
int lineStart, lineEnd, i;
int digits = calcDigitCount(axt, tSize, qSize);
struct axtScoreScheme *ss = NULL;
if (isProt)
ss = axtScoreSchemeProteinDefault();
for (lineStart = 0; lineStart < axt->symCount; lineStart = lineEnd)
{
lineEnd = lineStart + lineSize;
if (lineEnd > axt->symCount) lineEnd = axt->symCount;
fprintf(f, "Query: %-*d ", digits, plusStrandPos(qOff, qSize, axt->qStrand, FALSE));
for (i=lineStart; i<lineEnd; ++i)
{
char c = axt->qSym[i];
fputc(c, f);
if (c != '-')
++qOff;
}
fprintf(f, " %-d\n", plusStrandPos(qOff, qSize, axt->qStrand, TRUE));
fprintf(f, " %*s ", digits, " ");
for (i=lineStart; i<lineEnd; ++i)
{
char q, t, c;
q = axt->qSym[i];
t = axt->tSym[i];
if (isProt)
{
if (q == t)
c = q;
else if (ss->matrix[(int)q][(int)t] > 0)
c = '+';
else
c = ' ';
}
else
c = ((toupper(q) == toupper(t)) ? '|' : ' ');
fputc(c, f);
}
fprintf(f, "\n");
fprintf(f, "Sbjct: %-*d ", digits, plusStrandPos(tOff, tSize, axt->tStrand, FALSE));
for (i=lineStart; i<lineEnd; ++i)
{
char c = axt->tSym[i];
fputc(c, f);
if (c != '-')
tOff += dt;
}
fprintf(f, " %-d\n", plusStrandPos(tOff, tSize, axt->tStrand, TRUE));
fprintf(f, "\n");
}
}
static struct targetHits *bundleIntoTargets(struct axtBundle *abList)
/* BLAST typically outputs everything on the same query and target
* in one clump. This routine rearranges axts in abList to do this. */
{
struct targetHits *targetList = NULL, *target;
struct hash *targetHash = newHash(10);
struct axtBundle *ab;
struct axtRef *ref;
/* Build up a list of targets in database hit by query sorted by
* score of hits. */
for (ab = abList; ab != NULL; ab = ab->next)
{
struct axt *axt;
for (axt = ab->axtList; axt != NULL; axt = axt->next)
{
target = hashFindVal(targetHash, axt->tName);
if (target == NULL)
{
AllocVar(target);
slAddHead(&targetList, target);
hashAdd(targetHash, axt->tName, target);
target->name = cloneString(axt->tName);
target->size = ab->tSize;
}
if (axt->score > target->score)
target->score = axt->score;
AllocVar(ref);
ref->axt = axt;
slAddHead(&target->axtList, ref);
}
}
slSort(&targetList, targetHitsCmpScore);
for (target = targetList; target != NULL; target = target->next)
slSort(&target->axtList, axtRefCmpScore);
hashFree(&targetHash);
return targetList;
}
static char *nameForStrand(char strand)
/* Return Plus/Minus for +/- */
{
if (strand == '-')
return "Minus";
else
return "Plus";
}
static char *progType(boolean isProt, struct axtBundle *ab, boolean isUpper)
/* Return blast 'program' */
{
if (!isProt)
return isUpper ? "BLASTN" : "blastn";
else
{
if (ab->axtList->frame != 0)
return isUpper ? "TBLASTN" : "tblastn";
else
return isUpper ? "BLASTP" : "blastp";
}
}
static void wuBlastOut(struct axtBundle *abList, int queryIx, boolean isProt,
FILE *f,
char *databaseName, int databaseSeqCount, double databaseLetterCount,
char *ourId)
/* Do wublast-like output at end of processing query. */
{
char asciiNum[32];
struct targetHits *targetList = NULL, *target;
char *queryName;
int isRc;
int querySize = abList->qSize;
boolean isTranslated = (abList->axtList->frame != 0);
/* Print out stuff that doesn't depend on query or database. */
if (ourId == NULL)
ourId = "axtBlastOut";
fprintf(f, "%s 2.0MP-WashU [%s]\n", progType(isProt, abList, TRUE), ourId);
fprintf(f, "\n");
fprintf(f, "Copyright (C) 2000-2002 Jim Kent\n");
fprintf(f, "All Rights Reserved\n");
fprintf(f, "\n");
fprintf(f, "Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
fprintf(f, "\n");
if (!isProt)
{
fprintf(f, "Notice: this program and its default parameter settings are optimized to find\n");
fprintf(f, "nearly identical sequences very rapidly. For slower but more sensitive\n");
fprintf(f, "alignments please use other methods.\n");
fprintf(f, "\n");
}
/* Print query and database info. */
queryName = abList->axtList->qName;
fprintf(f, "Query= %s\n", queryName);
fprintf(f, " (%d letters; record %d)\n", abList->qSize, queryIx);
fprintf(f, "\n");
fprintf(f, "Database: %s\n", databaseName);
sprintLongWithCommas(asciiNum, databaseLetterCount);
fprintf(f, " %d sequences; %s total letters\n", databaseSeqCount, asciiNum);
fprintf(f, "Searching....10....20....30....40....50....60....70....80....90....100%% done\n");
fprintf(f, "\n");
targetList = bundleIntoTargets(abList);
/* Print out summary of hits. */
fprintf(f, " Smallest\n");
fprintf(f, " Sum\n");
fprintf(f, " High Probability\n");
fprintf(f, "Sequences producing High-scoring Segment Pairs: Score P(N) N\n");
fprintf(f, "\n");
for (target = targetList; target != NULL; target = target->next)
{
double expectation = blastzScoreToWuExpectation(target->score, databaseLetterCount);
double p = expectationToProbability(expectation);
fprintf(f, "%-61s %4d %8.1e %2d\n", target->name,
blastzToWublastScore(target->score), p, slCount(target->axtList));
}
/* Print out details on each target. */
for (target = targetList; target != NULL; target = target->next)
{
fprintf(f, "\n\n>%s\n", target->name);
fprintf(f, " Length = %d\n", target->size);
fprintf(f, "\n");
for (isRc=0; isRc <= 1; ++isRc)
{
boolean saidStrand = FALSE;
char strand = (isRc ? '-' : '+');
char *strandName = nameForStrand(strand);
struct axtRef *ref;
struct axt *axt;
for (ref = target->axtList; ref != NULL; ref = ref->next)
{
axt = ref->axt;
if (axt->qStrand == strand)
{
int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
if (!saidStrand)
{
saidStrand = TRUE;
if (!isProt)
fprintf(f, " %s Strand HSPs:\n\n", strandName);
}
fprintf(f, " Score = %d (%2.1f bits), Expect = %5.1e, P = %5.1e\n",
blastzToWublastScore(axt->score),
blastzScoreToWuBits(axt->score, isProt),
blastzScoreToWuExpectation(axt->score, databaseLetterCount),
blastzScoreToWuExpectation(axt->score, databaseLetterCount));
fprintf(f, " Identities = %d/%d (%d%%), Positives = %d/%d (%d%%)",
matches, axt->symCount, round(100.0 * matches / axt->symCount),
positives, axt->symCount, round(100.0 * positives / axt->symCount));
if (isProt)
{
if (axt->frame != 0)
fprintf(f, ", Frame = %c%d", axt->tStrand, axt->frame);
fprintf(f, "\n");
}
else
fprintf(f, ", Strand = %s / Plus\n", strandName);
fprintf(f, "\n");
blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
}
}
}
}
/* Cleanup time. */
targetHitsFreeList(&targetList);
}
static void ncbiPrintE(FILE *f, double e)
/* Print a small number NCBI style. */
{
if (e <= 0.0)
fprintf(f, "0.0");
else if (e <= 1.0e-100)
{
char buf[256], *s;
safef(buf, sizeof(buf), "%e", e);
s = strchr(buf, 'e');
if (s == NULL)
fprintf(f, "0.0");
else
fprintf(f, "%s", s);
}
else
fprintf(f, "%1.0e", e);
}
/* special global variable needed for Known Genes track building. Fan 1/21/03 */
int answer_for_kg;
static void ncbiBlastOut(struct axtBundle *abList, int queryIx, boolean isProt,
FILE *f, char *databaseName, int databaseSeqCount,
double databaseLetterCount, char *ourId, double minIdentity)
/* Do ncbiblast-like output at end of processing query. */
{
char asciiNum[32];
struct targetHits *targetList = NULL, *target;
char *queryName;
int querySize = abList->qSize;
boolean isTranslated = (abList->axtList->frame != 0);
/* Print out stuff that doesn't depend on query or database. */
if (ourId == NULL)
ourId = "axtBlastOut";
fprintf(f, "%s 2.2.11 [%s]\n", progType(isProt, abList, TRUE), ourId);
fprintf(f, "\n");
fprintf(f, "Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
fprintf(f, "\n");
/* Print query and database info. */
queryName = abList->axtList->qName;
fprintf(f, "Query= %s\n", queryName);
fprintf(f, " (%d letters)\n", abList->qSize);
fprintf(f, "\n");
fprintf(f, "Database: %s \n", databaseName);
sprintLongWithCommas(asciiNum, databaseLetterCount);
fprintf(f, " %d sequences; %s total letters\n", databaseSeqCount, asciiNum);
fprintf(f, "\n");
fprintf(f, "Searching.done\n");
targetList = bundleIntoTargets(abList);
/* Print out summary of hits. */
fprintf(f, " Score E\n");
fprintf(f, "Sequences producing significant alignments: (bits) Value\n");
fprintf(f, "\n");
for (target = targetList; target != NULL; target = target->next)
{
struct axtRef *ref;
struct axt *axt;
int matches;
double identity, expectation;
int bit;
for (ref = target->axtList; ref != NULL; ref = ref->next)
{
axt = ref->axt;
matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
identity = round(100.0 * matches / axt->symCount);
/* skip output if minIdentity not reached */
if (identity < minIdentity) continue;
bit = blastzScoreToNcbiBits(axt->score);
expectation = blastzScoreToNcbiExpectation(axt->score);
fprintf(f, "%-67s %4d ", target->name, bit);
ncbiPrintE(f, expectation);
fprintf(f, "\n");
}
}
fprintf(f, "\n");
/* Print out details on each target. */
for (target = targetList; target != NULL; target = target->next)
{
struct axtRef *ref;
struct axt *axt;
int matches, gaps;
char *oldName;
int ii = 0;
double identity;
oldName = strdup("");
for (ref = target->axtList; ref != NULL; ref = ref->next)
{
ii++;
axt = ref->axt;
matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
identity = round(100.0 * matches / axt->symCount);
/* skip output if minIdentity not reached */
if (identity < minIdentity) continue;
/* print target sequence name and length only once */
if (!sameWord(oldName, target->name))
{
fprintf(f, "\n\n>%s \n", target->name);
fprintf(f, " Length = %d\n", target->size);
oldName = strdup(target->name);
}
fprintf(f, "\n");
fprintf(f, " Score = %d bits (%d), Expect = ",
blastzScoreToNcbiBits(axt->score),
blastzScoreToNcbiScore(axt->score));
ncbiPrintE(f, blastzScoreToNcbiExpectation(axt->score));
fprintf(f, "\n");
if (isProt)
{
int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
fprintf(f, " Identities = %d/%d (%d%%),",
matches, axt->symCount, round(100.0 * matches / axt->symCount));
fprintf(f, " Positives = %d/%d (%d%%),",
positives, axt->symCount, round(100.0 * positives / axt->symCount));
fprintf(f, " Gaps = %d/%d (%d%%)\n",
gaps, axt->symCount, round(100.0 * gaps / axt->symCount));
if (axt->frame != 0)
fprintf(f, " Frame = %c%d\n", axt->tStrand, axt->frame);
/* set the special global variable, answer_for_kg.
This is needed for Known Genes track building. Fan 1/21/03 */
answer_for_kg=axt->symCount - matches;
}
else
{
fprintf(f, " Identities = %d/%d (%d%%)\n",
matches, axt->symCount, round(100.0 * matches / axt->symCount));
/* blast displays dna searches as +- instead of blat's default -+ */
if (!isTranslated)
if ((axt->qStrand == '-') && (axt->tStrand == '+'))
{
reverseIntRange(&axt->qStart, &axt->qEnd, querySize);
reverseIntRange(&axt->tStart, &axt->tEnd, target->size);
reverseComplement(axt->qSym, axt->symCount);
reverseComplement(axt->tSym, axt->symCount);
axt->qStrand = '+';
axt->tStrand = '-';
}
fprintf(f, " Strand = %s / %s\n", nameForStrand(axt->qStrand),
nameForStrand(axt->tStrand));
}
fprintf(f, "\n");
blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
}
}
fprintf(f, " Database: %s\n", databaseName);
/* Cleanup time. */
targetHitsFreeList(&targetList);
}
static void xmlBlastOut(struct axtBundle *abList, int queryIx, boolean isProt,
FILE *f, char *databaseName, int databaseSeqCount,
double databaseLetterCount, char *ourId)
/* Do ncbi blast xml-like output at end of processing query.
* WARNING - still not completely baked. Format at NCBI seems
* to be missing some end tags actually when I checked. -jk */
{
char *queryName = abList->axtList->qName;
int querySize = abList->qSize;
int hitNum = 0;
struct targetHits *targetList = NULL, *target;
if (ourId == NULL)
ourId = "axtBlastOut";
fprintf(f, "<?xml version=\"1.0\"?>\n");
fprintf(f, "<!DOCTYPE BlastOutput PUBLIC \"-//NCBI//NCBI BlastOutput/EN\" \"NCBI_BlastOutput.dtd\">\n");
fprintf(f, "<BlastOutput>\n");
fprintf(f, " <BlastOutput_program>%s</BlastOutput_program>\n",
progType(isProt, abList, FALSE));
fprintf(f, " <BlastOutput_version>%s 2.2.4 [%s]</BlastOutput_version>\n",
progType(isProt, abList, FALSE), ourId);
fprintf(f, " <BlastOutput_reference>~Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool</BlastOutput_reference>\n");
fprintf(f, " <BlastOutput_db>%s</BlastOutput_db>\n", databaseName);
fprintf(f, " <BlastOutput_query-ID>%d</BlastOutput_query-ID>\n", queryIx);
fprintf(f, " <BlastOutput_query-def>%s</BlastOutput_query-def>\n", queryName);
fprintf(f, " <BlastOutput_query-len>%d</BlastOutput_query-len>\n", querySize);
if (isProt)
{
fprintf(f, " <BlastOutput_param>\n");
fprintf(f, " <Parameters_matrix>BLOSUM62</Parameters_matrix>\n");
fprintf(f, " <Parameters_expect>0.001</Parameters_expect>\n");
fprintf(f, " <Parameters_expect>10</Parameters_expect>\n");
fprintf(f, " <Parameters_gap-extend>1</Parameters_gap-extend>\n");
fprintf(f, " </BlastOutput_param>\n");
}
fprintf(f, " <BlastOutput_iterations>\n");
fprintf(f, " <Iteration>\n");
fprintf(f, " <Iteration_iter-num>1</Iteration_iter-num>\n");
fprintf(f, " <Iteration_hits>\n");
/* Print out details on each target. */
targetList = bundleIntoTargets(abList);
for (target = targetList; target != NULL; target = target->next)
{
struct axtRef *ref;
fprintf(f, " <hit>\n");
fprintf(f, " <Hit_num>%d</Hit_num>\n", hitNum);
fprintf(f, " <Hit_id>%s</Hit_id>\n", target->name);
fprintf(f, " <Hit_accession>%s</Hit_accession>\n", target->name);
fprintf(f, " <Hit_len>%d</Hit_len>\n", target->size);
fprintf(f, " <Hit_hsps>\n");
for (ref = target->axtList; ref != NULL; ref = ref->next)
{
struct axt *axt = ref->axt;
int hspIx = 0;
fprintf(f, " <Hsp>\n");
fprintf(f, " <Hsp_num>%d</Hsp_num>\n", ++hspIx);
fprintf(f, " <Hsp_bit-score>%d</Hsp_bit-score>\n",
blastzScoreToNcbiBits(axt->score));
fprintf(f, " <Hsp_score>%d</Hsp_score>\n",
blastzScoreToNcbiScore(axt->score));
fprintf(f, " <Hsp_evalue>%f</Hsp_evalue>\n",
blastzScoreToNcbiExpectation(axt->score));
fprintf(f, " <Hsp_query-from>%d</Hsp_query-from>\n",
axt->qStart+1);
fprintf(f, " <Hsp_query-to>%d</Hsp_query-to>\n", axt->qEnd);
fprintf(f, " <Hsp_hit-from>%d</Hsp_hit-from>\n",
axt->tStart+1);
fprintf(f, " <Hsp_hit-to>%d</Hsp_hit-to>\n", axt->tEnd);
fprintf(f, " </Hsp>\n");
}
fprintf(f, " </Hit_hsps>\n");
}
fprintf(f, " </Iteration_hits>\n");
fprintf(f, " </Iteration>\n");
fprintf(f, " </BlastOutput_iterations>\n");
fprintf(f, "</BlastOutput>\n");
}
static void printAxtTargetBlastTab(FILE *f, struct axt *axt, int targetSize)
/* Print out target in tabular blast-oriented format. */
{
int s = axt->tStart, e = axt->tEnd;
if (axt->tStrand == '-')
reverseIntRange(&s, &e, targetSize);
if (axt->tStrand == axt->qStrand)
{
fprintf(f, "%d\t", s+1);
fprintf(f, "%d\t", e);
}
else
{
fprintf(f, "%d\t", e);
fprintf(f, "%d\t", s+1);
}
}
static void tabBlastOut(struct axtBundle *abList, int queryIx, boolean isProt,
FILE *f, char *databaseName, int databaseSeqCount,
double databaseLetterCount, char *ourId, boolean withComment)
/* Do NCBI tabular blast output. */
{
char *queryName = abList->axtList->qName;
int querySize = abList->qSize;
struct targetHits *targetList = NULL, *target;
if (withComment)
{
+ // use date from CVS, unless checked out with -kk, then ignore.
char * rcsDate = "$Date$";
char dateStamp[11];
- strncpy (dateStamp, rcsDate+7, 10);
+ if (strlen(rcsDate) > 17)
+ safencpy(dateStamp, sizeof(dateStamp), rcsDate+7, 10);
+ else
+ safecpy(dateStamp, sizeof(dateStamp), "");
dateStamp[10] = 0;
fprintf(f, "# BLAT %s [%s]\n", gfVersion, dateStamp);
fprintf(f, "# Query: %s\n", queryName);
fprintf(f, "# Database: %s\n", databaseName);
fprintf(f, "%s\n",
"# Fields: Query id, Subject id, % identity, alignment length, "
"mismatches, gap openings, q. start, q. end, s. start, s. end, "
"e-value, bit score");
}
/* Print out details on each target. */
targetList = bundleIntoTargets(abList);
for (target = targetList; target != NULL; target = target->next)
{
struct axtRef *ref;
for (ref = target->axtList; ref != NULL; ref = ref->next)
{
struct axt *axt = ref->axt;
int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
int gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
int gapOpens = countGapOpens(axt->qSym, axt->tSym, axt->symCount);
fprintf(f, "%s\t", axt->qName);
fprintf(f, "%s\t", axt->tName);
fprintf(f, "%.2f\t", 100.0 * matches/axt->symCount);
fprintf(f, "%d\t", axt->symCount);
fprintf(f, "%d\t", axt->symCount - matches - gaps);
fprintf(f, "%d\t", gapOpens);
if (axt->qStrand == '-')
{
int s = axt->qStart, e = axt->qEnd;
reverseIntRange(&s, &e, querySize);
fprintf(f, "%d\t", s+1);
fprintf(f, "%d\t", e);
printAxtTargetBlastTab(f, axt, target->size);
}
else
{
fprintf(f, "%d\t", axt->qStart + 1);
fprintf(f, "%d\t", axt->qEnd);
printAxtTargetBlastTab(f, axt, target->size);
}
fprintf(f, "%3.1e\t", blastzScoreToNcbiExpectation(axt->score));
fprintf(f, "%d.0\n", blastzScoreToNcbiBits(axt->score));
}
}
/* Cleanup time. */
targetHitsFreeList(&targetList);
}
void axtBlastOut(struct axtBundle *abList,
int queryIx, boolean isProt, FILE *f,
char *databaseName, int databaseSeqCount, double databaseLetterCount,
char *blastType, char *ourId, double minIdentity)
/* Output a bundle of axt's on the same query sequence in blast format.
* The parameters in detail are:
* ab - the list of bundles of axt's.
* f - output file handle
* databaseSeqCount - number of sequences in database
* databaseLetterCount - number of bases or aa's in database
* blastType - blast/wublast/blast8/blast9/xml
* ourId - optional (may be NULL) thing to put in header
*/
{
if (abList == NULL)
return;
if (sameWord(blastType, "wublast"))
wuBlastOut(abList, queryIx, isProt, f, databaseName,
databaseSeqCount, databaseLetterCount, ourId);
else if (sameWord(blastType, "xml"))
xmlBlastOut(abList, queryIx, isProt, f, databaseName,
databaseSeqCount, databaseLetterCount, ourId);
else if (sameWord(blastType, "blast"))
ncbiBlastOut(abList, queryIx, isProt, f, databaseName,
databaseSeqCount, databaseLetterCount, ourId, minIdentity);
else if (sameWord(blastType, "blast8"))
tabBlastOut(abList, queryIx, isProt, f, databaseName,
databaseSeqCount, databaseLetterCount, ourId, FALSE);
else if (sameWord(blastType, "blast9"))
tabBlastOut(abList, queryIx, isProt, f, databaseName,
databaseSeqCount, databaseLetterCount, ourId, TRUE);
else
errAbort("Unrecognized blastType %s in axtBlastOut", blastType);
}