e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/lib/blastOut.c src/lib/blastOut.c
index 042a3f0..ea36c2c 100644
--- src/lib/blastOut.c
+++ src/lib/blastOut.c
@@ -1,830 +1,833 @@
 /* blastOut.c - stuff to output an alignment in blast format. */
+
+/* Copyright (C) 2011 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "axt.h"
 #include "obscure.h"
 #include "genoFind.h"
 
 
 struct axtRef
 /* A reference to an axt. */
     {
     struct axtRef *next;
     struct axt *axt;
     };
 
 static int axtRefCmpScore(const void *va, const void *vb)
 /* Compare to sort based on score. */
 {
 const struct axtRef *a = *((struct axtRef **)va);
 const struct axtRef *b = *((struct axtRef **)vb);
 return b->axt->score - a->axt->score;
 }
 
 struct targetHits
 /* Collection of hits to a single target. */
     {
     struct targetHits *next;
     char *name;	    	    /* Target name */
     int size;		    /* Target size */
     struct axtRef *axtList; /* List of axts, sorted by score. */
     int score;		    /* Score of best element */
     };
 
 static void targetHitsFree(struct targetHits **pObj)
 /* Free one target hits structure. */
 {
 struct targetHits *obj = *pObj;
 if (obj != NULL)
     {
     freeMem(obj->name);
     slFreeList(&obj->axtList);
     freez(pObj);
     }
 }
 
 static void targetHitsFreeList(struct targetHits **pList)
 /* Free a list of dynamically allocated targetHits's */
 {
 struct targetHits *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     targetHitsFree(&el);
     }
 *pList = NULL;
 }
 
 
 static int targetHitsCmpScore(const void *va, const void *vb)
 /* Compare to sort based on score. */
 {
 const struct targetHits *a = *((struct targetHits **)va);
 const struct targetHits *b = *((struct targetHits **)vb);
 
 return b->score - a->score;
 }
 
 static int blastzToWublastScore(int bzScore)
 /* Convert from 100 points/match blastz score to 5 points/match
  * wu-blast score. */
 {
 return bzScore/19;
 }
 
 static double blastzScoreToWuBits(int bzScore, boolean isProt)
 /* Convert from blastz score to bit score.  The magic number's
  * here are taken from comparing wu-blast scores to axtScores on
  * a couple of sequences.  This doesn't really seem to be a bit
  * score.  It's not very close to 2 bits per base. */
 {
 int wuScore = blastzToWublastScore(bzScore);
 if (isProt)
     return wuScore * 0.3545;
 else
     return wuScore * 0.1553;
 }
 
 static double blastzScoreToWuExpectation(int bzScore, double databaseSize)
 /* I'm puzzled by the wu-blast expectation score.  I would
  * think it would be  databaseSize / (2^bitScore)  but
  * it's not.   I think the best I can do is approximate
  * it with something scaled to be close to this expectation. */
 {
 double logProbOne = -log(2) * bzScore / 32.1948;
 return databaseSize * exp(logProbOne);
 }
 
 static int blastzScoreToNcbiBits(int bzScore)
 /* Convert blastz score to bit score in NCBI sense. */
 {
 return round(bzScore * 0.0205);
 }
 
 static int blastzScoreToNcbiScore(int bzScore)
 /* Conver blastz score to NCBI matrix score. */
 {
 return round(bzScore * 0.0529);
 }
 
 static double blastzScoreToNcbiExpectation(int bzScore)
 /* Convert blastz score to expectation in NCBI sense. */
 {
 double bits = bzScore * 0.0205;
 double logProb = -bits*log(2);
 return 3.0e9 * exp(logProb);
 }
 
 static double expectationToProbability(double e)
 /* Convert expected number of hits to probability of at least
  * one hit.  This is a crude approximation, but actually pretty precise
  * for e < 0.1, which is all that really matters.... */
 {
 if (e < 0.999)
     return e;
 else
     return 0.999;
 }
 
 static int countMatches(char *a, char *b, int size)
 /* Count number of characters that match between a and b. */
 {
 int count = 0;
 int i;
 for (i=0; i<size; ++i)
     if (toupper(a[i]) == toupper(b[i]))
         ++count;
 return count;
 }
 
 static int countGaps(char *a, char *b, int size)
 /* Count number of inserts in either strand. */
 {
 int count = 0;
 int i;
 for (i=0; i<size; ++i)
    {
    if (a[i] == '-')
        ++count;
    if (b[i] == '-')
        ++count;
    }
 return count;
 }
 
 static int countGapOpens(char *a, char *b, int size)
 /* Count number of inserts in either strand. */
 {
 int i, count = 0;
 boolean inGap = FALSE;
 for (i=0; i<size; ++i)
     {
     if (a[i] == '-' || b[i] == '-')
         {
 	if (!inGap)
 	    {
 	    ++count;
 	    inGap = TRUE;
 	    }
 	}
     else
         {
 	inGap = FALSE;
 	}
     }
 return count;
 }
 
 
 static int countPositives(char *a, char *b, int size)
 /* Count positive (not necessarily identical) protein matches. */
 {
 int count = 0;
 int i;
 struct axtScoreScheme *ss = axtScoreSchemeProteinDefault();
 for (i=0; i<size; ++i)
     {
     if (ss->matrix[(int)a[i]][(int)b[i]] > 0)
         ++count;
     }
 return count;
 }
 
 static int plusStrandPos(int pos, int size, char strand, boolean isEnd)
 /* Return position on plus strand, one based. */
 {
 if (strand == '-')
     {
     pos = size - pos;
     if (isEnd)
        ++pos;
     }
 else
     {
     if (!isEnd)
         ++pos;
     }
 return pos;
 }
 
 static int calcDigitCount(struct axt *axt, int tSize, int qSize)
 /* Figure out how many digits needed for blast position display. */
 {
 int tDig, qDig;
 if (axt->qStrand == '-')
     qDig = digitsBaseTen(qSize - axt->qStart + 1);
 else
     qDig = digitsBaseTen(axt->qEnd);
 if (axt->tStrand == '-')
     tDig = digitsBaseTen(tSize - axt->tStart + 1);
 else
     tDig = digitsBaseTen(axt->tEnd);
 return max(tDig, qDig);
 }
 
 static void blastiodAxtOutput(FILE *f, struct axt *axt, int tSize, int qSize, 
 	int lineSize, boolean isProt, boolean isTranslated)
 /* Output base-by-base part of alignment in blast-like fashion. */
 {
 int tOff = axt->tStart;
 int dt = (isTranslated ? 3 : 1);
 int qOff = axt->qStart;
 int lineStart, lineEnd, i;
 int digits = calcDigitCount(axt, tSize, qSize);
 struct axtScoreScheme *ss = NULL;
 
 if (isProt)
     ss = axtScoreSchemeProteinDefault();
 for (lineStart = 0; lineStart < axt->symCount; lineStart = lineEnd)
     {
     lineEnd = lineStart + lineSize;
     if (lineEnd > axt->symCount) lineEnd = axt->symCount;
     fprintf(f, "Query: %-*d ", digits, plusStrandPos(qOff, qSize, axt->qStrand, FALSE));
     for (i=lineStart; i<lineEnd; ++i)
 	{
 	char c = axt->qSym[i];
 	fputc(c, f);
 	if (c != '-')
 	    ++qOff;
 	}
     fprintf(f, " %-d\n", plusStrandPos(qOff, qSize, axt->qStrand, TRUE));
     fprintf(f, "       %*s ", digits, " ");
     for (i=lineStart; i<lineEnd; ++i)
         {
 	char q, t, c;
 	q = axt->qSym[i];
 	t = axt->tSym[i];
 	if (isProt)
 	    {
 	    if (q == t)
 	        c = q;
 	    else if (ss->matrix[(int)q][(int)t] > 0)
 	        c = '+';
 	    else
 	        c = ' ';
 	    }
 	else
 	    c = ((toupper(q) == toupper(t)) ? '|' : ' ');
 
 	fputc(c, f);
 	}
     fprintf(f, "\n");
     fprintf(f, "Sbjct: %-*d ", digits, plusStrandPos(tOff, tSize, axt->tStrand, FALSE));
     for (i=lineStart; i<lineEnd; ++i)
 	{
 	char c = axt->tSym[i];
 	fputc(c, f);
 	if (c != '-')
 	    tOff += dt;
 	}
     fprintf(f, " %-d\n", plusStrandPos(tOff, tSize, axt->tStrand, TRUE));
     fprintf(f, "\n");
     }
 }
 
 static struct targetHits *bundleIntoTargets(struct axtBundle *abList)
 /* BLAST typically outputs everything on the same query and target
  * in one clump.  This routine rearranges axts in abList to do this. */
 {
 struct targetHits *targetList = NULL, *target;
 struct hash *targetHash = newHash(10);
 struct axtBundle *ab;
 struct axtRef *ref;
 
 /* Build up a list of targets in database hit by query sorted by
  * score of hits. */
 for (ab = abList; ab != NULL; ab = ab->next)
     {
     struct axt *axt;
     for (axt = ab->axtList; axt != NULL; axt = axt->next)
 	{
 	target = hashFindVal(targetHash, axt->tName);
 	if (target == NULL)
 	    {
 	    AllocVar(target);
 	    slAddHead(&targetList, target);
 	    hashAdd(targetHash, axt->tName, target);
 	    target->name = cloneString(axt->tName);
 	    target->size = ab->tSize;
 	    }
 	if (axt->score > target->score)
 	    target->score = axt->score;
 	AllocVar(ref);
 	ref->axt = axt;
 	slAddHead(&target->axtList, ref);
 	}
     }
 slSort(&targetList, targetHitsCmpScore);
 for (target = targetList; target != NULL; target = target->next)
     slSort(&target->axtList, axtRefCmpScore);
 
 hashFree(&targetHash);
 return targetList;
 }
 
 static char *nameForStrand(char strand)
 /* Return Plus/Minus for +/- */
 {
 if (strand == '-')
     return "Minus";
 else
     return "Plus";
 }
 
 static char *progType(boolean isProt, struct axtBundle *ab, boolean isUpper)
 /* Return blast 'program' */
 {
 if (!isProt)
     return isUpper ? "BLASTN" : "blastn";
 else
     {
     if (ab->axtList->frame != 0)
         return isUpper ? "TBLASTN" : "tblastn";
     else
         return isUpper ? "BLASTP" : "blastp";
     }
 }
 
 static void wuBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
 	FILE *f, 
 	char *databaseName, int databaseSeqCount, double databaseLetterCount, 
 	char *ourId)
 /* Do wublast-like output at end of processing query. */
 {
 char asciiNum[32];
 struct targetHits *targetList = NULL, *target;
 char *queryName;
 int isRc;
 int querySize = abList->qSize;
 boolean isTranslated = (abList->axtList->frame != 0);
 
 /* Print out stuff that doesn't depend on query or database. */
 if (ourId == NULL)
     ourId = "axtBlastOut";
 fprintf(f, "%s 2.0MP-WashU [%s]\n", progType(isProt, abList, TRUE), ourId);
 fprintf(f, "\n");
 fprintf(f, "Copyright (C) 2000-2002 Jim Kent\n");
 fprintf(f, "All Rights Reserved\n");
 fprintf(f, "\n");
 fprintf(f, "Reference:  Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
 fprintf(f, "\n");
 if (!isProt)
     {
     fprintf(f, "Notice:  this program and its default parameter settings are optimized to find\n");
     fprintf(f, "nearly identical sequences very rapidly.  For slower but more sensitive\n");
     fprintf(f, "alignments please use other methods.\n");
     fprintf(f, "\n");
     }
 
 /* Print query and database info. */
 queryName = abList->axtList->qName;
 fprintf(f, "Query=  %s\n", queryName);
 fprintf(f, "        (%d letters; record %d)\n", abList->qSize, queryIx);
 fprintf(f, "\n");
 fprintf(f, "Database:  %s\n",  databaseName);
 sprintLongWithCommas(asciiNum, databaseLetterCount);
 fprintf(f, "           %d sequences; %s total letters\n",  databaseSeqCount, asciiNum);
 fprintf(f, "Searching....10....20....30....40....50....60....70....80....90....100%% done\n");
 fprintf(f, "\n");
 
 targetList = bundleIntoTargets(abList);
 
 /* Print out summary of hits. */
 fprintf(f, "                                                                     Smallest\n");
 fprintf(f, "                                                                       Sum\n");
 fprintf(f, "                                                              High  Probability\n");
 fprintf(f, "Sequences producing High-scoring Segment Pairs:              Score  P(N)      N\n");
 fprintf(f, "\n");
 for (target = targetList; target != NULL; target = target->next)
     {
     double expectation = blastzScoreToWuExpectation(target->score, databaseLetterCount);
     double p = expectationToProbability(expectation);
     fprintf(f, "%-61s %4d  %8.1e %2d\n", target->name, 
     	blastzToWublastScore(target->score), p, slCount(target->axtList));
     }
 
 /* Print out details on each target. */
 for (target = targetList; target != NULL; target = target->next)
     {
     fprintf(f, "\n\n>%s\n", target->name);
     fprintf(f, "        Length = %d\n", target->size);
     fprintf(f, "\n");
     for (isRc=0; isRc <= 1; ++isRc)
 	{
 	boolean saidStrand = FALSE;
 	char strand = (isRc ? '-' : '+');
 	char *strandName = nameForStrand(strand);
 	struct axtRef *ref;
 	struct axt *axt;
 	for (ref = target->axtList; ref != NULL; ref = ref->next)
 	    {
 	    axt = ref->axt;
 	    if (axt->qStrand == strand)
 		{
 		int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
 		int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
 		if (!saidStrand)
 		    {
 		    saidStrand = TRUE;
 		    if (!isProt)
 			fprintf(f, "  %s Strand HSPs:\n\n", strandName);
 		    }
 		fprintf(f, " Score = %d (%2.1f bits), Expect = %5.1e, P = %5.1e\n",
 		     blastzToWublastScore(axt->score), 
 		     blastzScoreToWuBits(axt->score, isProt),
 		     blastzScoreToWuExpectation(axt->score, databaseLetterCount),
 		     blastzScoreToWuExpectation(axt->score, databaseLetterCount));
 		fprintf(f, " Identities = %d/%d (%d%%), Positives = %d/%d (%d%%)",
 		     matches, axt->symCount, round(100.0 * matches / axt->symCount),
 		     positives, axt->symCount, round(100.0 * positives / axt->symCount));
 		if (isProt)
 		    {
 		    if (axt->frame != 0)
 		        fprintf(f, ", Frame = %c%d", axt->tStrand, axt->frame);
 		    fprintf(f, "\n");
 		    }
 		else
 		    fprintf(f, ", Strand = %s / Plus\n", strandName);
 		fprintf(f, "\n");
 		blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
 		}
 	    }
 	}
     }
 
 /* Cleanup time. */
 targetHitsFreeList(&targetList);
 }
 
 static void ncbiPrintE(FILE *f, double e)
 /* Print a small number NCBI style. */
 {
 if (e <= 0.0)
     fprintf(f, "0.0");
 else if (e <= 1.0e-100)
     {
     char buf[256], *s;
     safef(buf, sizeof(buf), "%e", e);
     s = strchr(buf, 'e');
     if (s == NULL)
 	fprintf(f, "0.0");
     else
         fprintf(f, "%s", s);
     }
 else
     fprintf(f, "%1.0e", e);
 }
 
 /* special global variable needed for Known Genes track building.  Fan 1/21/03 */
 int answer_for_kg;
 static void ncbiBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
 	FILE *f, char *databaseName, int databaseSeqCount, 
 	double databaseLetterCount, char *ourId, double minIdentity)
 /* Do ncbiblast-like output at end of processing query. */
 {
 char asciiNum[32];
 struct targetHits *targetList = NULL, *target;
 char *queryName;
 int querySize = abList->qSize;
 boolean isTranslated = (abList->axtList->frame != 0);
 
 /* Print out stuff that doesn't depend on query or database. */
 if (ourId == NULL)
     ourId = "axtBlastOut";
 fprintf(f, "%s 2.2.11 [%s]\n", progType(isProt, abList, TRUE), ourId);
 fprintf(f, "\n");
 fprintf(f, "Reference:  Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
 fprintf(f, "\n");
 
 /* Print query and database info. */
 queryName = abList->axtList->qName;
 fprintf(f, "Query= %s\n", queryName);
 fprintf(f, "         (%d letters)\n", abList->qSize);
 fprintf(f, "\n");
 fprintf(f, "Database: %s \n",  databaseName);
 sprintLongWithCommas(asciiNum, databaseLetterCount);
 fprintf(f, "           %d sequences; %s total letters\n",  databaseSeqCount, asciiNum);
 fprintf(f, "\n");
 fprintf(f, "Searching.done\n");
 
 targetList = bundleIntoTargets(abList);
 
 /* Print out summary of hits. */
 fprintf(f, "                                                                 Score    E\n");
 fprintf(f, "Sequences producing significant alignments:                      (bits) Value\n");
 fprintf(f, "\n");
 for (target = targetList; target != NULL; target = target->next)
     {
     struct axtRef *ref;
     struct axt *axt;
     int matches;
     double identity, expectation;
     int bit;
     
     for (ref = target->axtList; ref != NULL; ref = ref->next)
 	{
 	axt = ref->axt;
 	
 	matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
 	identity = round(100.0 * matches / axt->symCount);
 	/* skip output if minIdentity not reached */
 	if (identity < minIdentity) continue;
     
     	bit = blastzScoreToNcbiBits(axt->score);
         expectation = blastzScoreToNcbiExpectation(axt->score);
     	fprintf(f, "%-67s  %4d   ", target->name, bit);
     	ncbiPrintE(f, expectation);
     	fprintf(f, "\n");
     	}
     }
 fprintf(f, "\n");
 
 /* Print out details on each target. */
 for (target = targetList; target != NULL; target = target->next)
     {
     struct axtRef *ref;
     struct axt *axt;
     int matches, gaps;
     char *oldName;
     
     int ii = 0;
     double identity;
     oldName = strdup("");
 
     for (ref = target->axtList; ref != NULL; ref = ref->next)
 	{
 	ii++;
 	axt = ref->axt;
 	
 	matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
 	identity = round(100.0 * matches / axt->symCount);
 	
 	/* skip output if minIdentity not reached */
 	if (identity < minIdentity) continue;
         
 	/* print target sequence name and length only once */ 
 	if (!sameWord(oldName, target->name))
 	    {
 	    fprintf(f, "\n\n>%s \n", target->name);
 	    fprintf(f, "          Length = %d\n", target->size);
 	    oldName = strdup(target->name);
 	    }
 
 	fprintf(f, "\n");
 	fprintf(f, " Score = %d bits (%d), Expect = ",
 	     blastzScoreToNcbiBits(axt->score),
 	     blastzScoreToNcbiScore(axt->score));
 	ncbiPrintE(f, blastzScoreToNcbiExpectation(axt->score));
 	fprintf(f, "\n");
 	
 	if (isProt)
 	    {
 	    int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
 	    gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
 	    fprintf(f, " Identities = %d/%d (%d%%),",
 		 matches, axt->symCount, round(100.0 * matches / axt->symCount));
 	    fprintf(f, " Positives = %d/%d (%d%%),",
 		 positives, axt->symCount, round(100.0 * positives / axt->symCount));
 	    fprintf(f, " Gaps = %d/%d (%d%%)\n",
 		 gaps, axt->symCount, round(100.0 * gaps / axt->symCount));
 	    if (axt->frame != 0) 
 		fprintf(f, " Frame = %c%d\n", axt->tStrand, axt->frame);
 	    /* set the special global variable, answer_for_kg.  
    	       This is needed for Known Genes track building.  Fan 1/21/03 */
             answer_for_kg=axt->symCount - matches;
 	    }
 	else
 	    {
 	    fprintf(f, " Identities = %d/%d (%d%%)\n",
 		 matches, axt->symCount, round(100.0 * matches / axt->symCount));
 	    /* blast displays dna searches as +- instead of blat's default -+ */
 	    if (!isTranslated)
 		if ((axt->qStrand == '-') && (axt->tStrand == '+'))
 		    {
 		    reverseIntRange(&axt->qStart, &axt->qEnd, querySize);
 		    reverseIntRange(&axt->tStart, &axt->tEnd, target->size);
 		    reverseComplement(axt->qSym, axt->symCount);
 		    reverseComplement(axt->tSym, axt->symCount);
 		    axt->qStrand = '+';
 		    axt->tStrand = '-';
 		    }
 	    fprintf(f, " Strand = %s / %s\n", nameForStrand(axt->qStrand),
 		nameForStrand(axt->tStrand));
 	    }
 	fprintf(f, "\n");
 	blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
 	}
     }
 
 fprintf(f, "  Database: %s\n", databaseName);
 
 /* Cleanup time. */
 targetHitsFreeList(&targetList);
 }
 
 static void xmlBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
 	FILE *f, char *databaseName, int databaseSeqCount, 
 	double databaseLetterCount, char *ourId)
 /* Do ncbi blast xml-like output at end of processing query. 
  * WARNING - still not completely baked.  Format at NCBI seems
  * to be missing some end tags actually when I checked. -jk */
 {
 char *queryName = abList->axtList->qName;
 int querySize = abList->qSize;
 int hitNum = 0;
 struct targetHits *targetList = NULL, *target;
 
 if (ourId == NULL)
     ourId = "axtBlastOut";
 fprintf(f, "<?xml version=\"1.0\"?>\n");
 fprintf(f, "<!DOCTYPE BlastOutput PUBLIC \"-//NCBI//NCBI BlastOutput/EN\" \"NCBI_BlastOutput.dtd\">\n");
 fprintf(f, "<BlastOutput>\n");
 fprintf(f, "  <BlastOutput_program>%s</BlastOutput_program>\n", 
 	progType(isProt, abList, FALSE));
 fprintf(f, "  <BlastOutput_version>%s 2.2.4 [%s]</BlastOutput_version>\n",
 	progType(isProt, abList, FALSE), ourId);
 fprintf(f, "  <BlastOutput_reference>~Reference: Kent, WJ. (2002) BLAT - The BLAST-like alignment tool</BlastOutput_reference>\n");
 fprintf(f, "  <BlastOutput_db>%s</BlastOutput_db>\n", databaseName);
 fprintf(f, "  <BlastOutput_query-ID>%d</BlastOutput_query-ID>\n", queryIx);
 fprintf(f, "  <BlastOutput_query-def>%s</BlastOutput_query-def>\n", queryName);
 fprintf(f, "  <BlastOutput_query-len>%d</BlastOutput_query-len>\n", querySize);
 
 if (isProt)
     {
     fprintf(f, "  <BlastOutput_param>\n");
     fprintf(f, "    <Parameters_matrix>BLOSUM62</Parameters_matrix>\n");
     fprintf(f, "    <Parameters_expect>0.001</Parameters_expect>\n");
     fprintf(f, "    <Parameters_expect>10</Parameters_expect>\n");
     fprintf(f, "    <Parameters_gap-extend>1</Parameters_gap-extend>\n");
     fprintf(f, "  </BlastOutput_param>\n");
     }
 
 fprintf(f, "  <BlastOutput_iterations>\n");
 fprintf(f, "    <Iteration>\n");
 fprintf(f, "      <Iteration_iter-num>1</Iteration_iter-num>\n");
 fprintf(f, "      <Iteration_hits>\n");
 
 /* Print out details on each target. */
 targetList = bundleIntoTargets(abList);
 for (target = targetList; target != NULL; target = target->next)
     {
     struct axtRef *ref;
     fprintf(f, "      <hit>\n");
     fprintf(f, "        <Hit_num>%d</Hit_num>\n", hitNum);
     fprintf(f, "        <Hit_id>%s</Hit_id>\n", target->name);
     fprintf(f, "        <Hit_accession>%s</Hit_accession>\n", target->name);
     fprintf(f, "        <Hit_len>%d</Hit_len>\n", target->size);
     fprintf(f, "        <Hit_hsps>\n");
     for (ref = target->axtList; ref != NULL; ref = ref->next)
         {
 	struct axt *axt = ref->axt;
 	int hspIx = 0;
 	fprintf(f, "        <Hsp>\n");
 	fprintf(f, "          <Hsp_num>%d</Hsp_num>\n", ++hspIx);
 	fprintf(f, "          <Hsp_bit-score>%d</Hsp_bit-score>\n", 
 		blastzScoreToNcbiBits(axt->score));
 	fprintf(f, "          <Hsp_score>%d</Hsp_score>\n",
 		blastzScoreToNcbiScore(axt->score));
 	fprintf(f, "          <Hsp_evalue>%f</Hsp_evalue>\n",
 	        blastzScoreToNcbiExpectation(axt->score));
 	fprintf(f, "          <Hsp_query-from>%d</Hsp_query-from>\n", 
 		axt->qStart+1);
 	fprintf(f, "          <Hsp_query-to>%d</Hsp_query-to>\n", axt->qEnd);
 	fprintf(f, "          <Hsp_hit-from>%d</Hsp_hit-from>\n", 
 		axt->tStart+1);
 	fprintf(f, "          <Hsp_hit-to>%d</Hsp_hit-to>\n", axt->tEnd);
 	fprintf(f, "        </Hsp>\n");
 	}
 
     fprintf(f, "        </Hit_hsps>\n");
     }
 
 fprintf(f, "      </Iteration_hits>\n");
 fprintf(f, "    </Iteration>\n");
 fprintf(f, "  </BlastOutput_iterations>\n");
 fprintf(f, "</BlastOutput>\n");
 }
 
 static void printAxtTargetBlastTab(FILE *f, struct axt *axt, int targetSize)
 /* Print out target in tabular blast-oriented format. */
 {
 int s = axt->tStart, e = axt->tEnd;
 if (axt->tStrand == '-')
     reverseIntRange(&s, &e, targetSize);
 if (axt->tStrand == axt->qStrand)
     {
     fprintf(f, "%d\t", s+1);
     fprintf(f, "%d\t", e);
     }
 else
     {
     fprintf(f, "%d\t", e);
     fprintf(f, "%d\t", s+1);
     }
 }
 
 static void tabBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
 	FILE *f, char *databaseName, int databaseSeqCount, 
 	double databaseLetterCount, char *ourId, boolean withComment)
 /* Do NCBI tabular blast output. */
 {
 char *queryName = abList->axtList->qName;
 int querySize = abList->qSize;
 struct targetHits *targetList = NULL, *target;
 
 if (withComment)
     {
     // use date from CVS, unless checked out with -kk, then ignore.
     char * rcsDate = "$Date: 2009/02/26 00:05:49 $";
     char dateStamp[11];
     if (strlen(rcsDate) > 17)
         safencpy(dateStamp, sizeof(dateStamp), rcsDate+7, 10);
     else
         safecpy(dateStamp, sizeof(dateStamp), "");
     dateStamp[10] = 0;
     fprintf(f, "# BLAT %s [%s]\n", gfVersion, dateStamp);
     fprintf(f, "# Query: %s\n", queryName);
     fprintf(f, "# Database: %s\n", databaseName);
     fprintf(f, "%s\n", 
     	"# Fields: Query id, Subject id, % identity, alignment length, "
 	"mismatches, gap openings, q. start, q. end, s. start, s. end, "
 	"e-value, bit score");
     }
 
 /* Print out details on each target. */
 targetList = bundleIntoTargets(abList);
 for (target = targetList; target != NULL; target = target->next)
     {
     struct axtRef *ref;
     for (ref = target->axtList; ref != NULL; ref = ref->next)
         {
 	struct axt *axt = ref->axt;
 	int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
 	int gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
 	int gapOpens = countGapOpens(axt->qSym, axt->tSym, axt->symCount);
 	fprintf(f, "%s\t", axt->qName);
 	fprintf(f, "%s\t", axt->tName);
 	fprintf(f, "%.2f\t", 100.0 * matches/axt->symCount);
 	fprintf(f, "%d\t", axt->symCount);
 	fprintf(f, "%d\t", axt->symCount - matches - gaps);
 	fprintf(f, "%d\t", gapOpens);
 	if (axt->qStrand == '-')
 	    {
 	    int s = axt->qStart, e = axt->qEnd;
 	    reverseIntRange(&s, &e, querySize);
 	    fprintf(f, "%d\t", s+1);
 	    fprintf(f, "%d\t", e);
 	    printAxtTargetBlastTab(f, axt, target->size);
 	    }
 	else
 	    {
 	    fprintf(f, "%d\t", axt->qStart + 1);
 	    fprintf(f, "%d\t", axt->qEnd);
 	    printAxtTargetBlastTab(f, axt, target->size);
 	    }
 	fprintf(f, "%3.1e\t", blastzScoreToNcbiExpectation(axt->score));
 	fprintf(f, "%d.0\n", blastzScoreToNcbiBits(axt->score));
 	}
     }
 
 /* Cleanup time. */
 targetHitsFreeList(&targetList);
 }
 
 void axtBlastOut(struct axtBundle *abList, 
 	int queryIx, boolean isProt, FILE *f, 
 	char *databaseName, int databaseSeqCount, double databaseLetterCount, 
 	char *blastType, char *ourId, double minIdentity)
 /* Output a bundle of axt's on the same query sequence in blast format.
  * The parameters in detail are:
  *   ab - the list of bundles of axt's. 
  *   f  - output file handle
  *   databaseSeqCount - number of sequences in database
  *   databaseLetterCount - number of bases or aa's in database
  *   blastType - blast/wublast/blast8/blast9/xml
  *   ourId - optional (may be NULL) thing to put in header
  */
 {
 if (abList == NULL)
     return;
 if (sameWord(blastType, "wublast"))
     wuBlastOut(abList, queryIx, isProt, f, databaseName,
    	databaseSeqCount, databaseLetterCount, ourId);
 else if (sameWord(blastType, "xml"))
     xmlBlastOut(abList, queryIx, isProt, f, databaseName,
         databaseSeqCount, databaseLetterCount, ourId);
 else if (sameWord(blastType, "blast"))
     ncbiBlastOut(abList, queryIx, isProt, f, databaseName,
         databaseSeqCount, databaseLetterCount, ourId, minIdentity);
 else if (sameWord(blastType, "blast8"))
     tabBlastOut(abList, queryIx, isProt, f, databaseName,
     	databaseSeqCount, databaseLetterCount, ourId, FALSE);
 else if (sameWord(blastType, "blast9"))
     tabBlastOut(abList, queryIx, isProt, f, databaseName,
     	databaseSeqCount, databaseLetterCount, ourId, TRUE);
 else
     errAbort("Unrecognized blastType %s in axtBlastOut", blastType);
 }