src/cdnaAli/exonAli/exonAli.c 6e5ee11ca95cd971984038cf65bae00d9c898707

6e5ee11ca95cd971984038cf65bae00d9c898707
galt
  Wed Jun 4 15:40:02 2014 -0700
Since we have git, it is easy to rename errabort.c to errAbort.c without losing any history.
diff --git src/cdnaAli/exonAli/exonAli.c src/cdnaAli/exonAli/exonAli.c
index 2ec9ca7..29d1179 100644
--- src/cdnaAli/exonAli/exonAli.c
+++ src/cdnaAli/exonAli/exonAli.c
@@ -1,970 +1,970 @@
 /* exonAli.c - Allign cDNAs based on assumption that alignments
  * will be clustered into exons. */
 #include "common.h"
 #include "portable.h"
 #include "dnautil.h"
 #include "shaRes.h"
 #include "dnaseq.h"
 #include "nt4.h"
 #include "fa.h"
 #include "wormdna.h"
 #include "htmshell.h"
 #include "cheapcgi.h"
 #include "fuzzyFind.h"
-#include "errabort.h"
+#include "errAbort.h"
 
 long clock1000();
 
 void usage()
 /* Describe how to use program and abort. */
 {
 errAbort("This program aligns cDNA with genomic sequence. "
          "Usage:\n"
          "    exonAli named output cdnaName(s)\n"
          "    exonAli in output listFile\n"
          "    exonAli all output faFile ntDir\n"
          "    exonAli starting output faFile ntDir startingIx [count]\n"
          "    exonAli resume output faFile ntDir\n");
 }
 
 
 FILE *reportFile = NULL;
 boolean isHtml = FALSE;
 
 void reportAndExtra(char *format, va_list args, char *extra)
 /* Print out to stdout and maybe a report file. Possibly
  * include extra string before report. */
 {
 if (reportFile != NULL)
     {
     if (extra != NULL)
         fprintf(reportFile, "%s", extra);
     vfprintf(reportFile, format, args);
     fflush(reportFile);
     }
 if (extra != NULL)
     fprintf(stdout, "%s", extra);
 vfprintf(stdout, format, args);
 if (isHtml)
     fprintf(stdout, "<BR>\n");
 }
 
 void reportWarning(char *format, va_list args)
 /* Print a warning message */
 {
 reportAndExtra(format, args, "###");
 }
 
 void vaReportf(char *format, va_list args)
 /* Print out to stdout and maybe a report file. */
 {
 reportAndExtra(format, args, NULL);
 }
 
 void reportf(char *format, ...)
 /* Print out to stdout and maybe a report file. */
 {
 va_list args;
 va_start(args, format);
 vaReportf(format, args);
 va_end(args);
 }
 
 
 
 #define tileSize 16
 /* # of bases we fit in 32 bits. */
 #define tileSizeShift 4
 /* How far to shift something to multiply it by tile size. */
 
 struct crudeHit
 /* A tileSize exact match between probe and target. */
     {
     int probeOffset;
     int targetOffset;
     boolean isLumped;
     };
 
 
 struct crudeExon
 /* A bunch of hits that hang together on a diagonal. */
     {
     int probeStart;
     int probeEnd;
     int targetStart;
     int targetEnd;
     int hitCount;
     boolean isLumped;
     };
 
 struct crudeGene
 /* A collection of diagonals that seem to hang together
  * like exons. */
     {
     struct nt4Seq *target;
     boolean isRc;
     int probeStart;
     int probeEnd;
     int targetStart;
     int targetEnd;
     int score;
     };
 
 int lumpHitsIntoExons(struct crudeHit *hits, int hitCount, struct crudeExon *exons)
 /* Lump together hits that are within 48 bp of each other and within 2 of
  * the same diagonal into "exons".  The exons array must be hitCount elements
  * in order to accomodate the worst case where no lumping occurs.  */
 {
 int i;
 struct crudeExon *exon = exons;
 int exonCount = 0;
 for (i=0; i<hitCount; ++i)
     hits[i].isLumped = FALSE;
 
 for (;;)
     {
     boolean startedExon = FALSE;
     int diagDiff = 0;
     int tOff = 0;
     int pOff = 0;
     int thisDiff = 0;
     for (i=0; i<hitCount; ++i)
         {
         if (hits[i].isLumped == FALSE)
             {
             pOff = hits[i].probeOffset;
             tOff = hits[i].targetOffset;
             thisDiff = pOff - tOff;
             if (!startedExon)   /* First hit in exon. */
                 {
                 startedExon = TRUE;
                 hits[i].isLumped = TRUE;
                 exon->probeStart = pOff;
                 exon->probeEnd = pOff + tileSize;
                 exon->targetStart = tOff;
                 exon->targetEnd = tOff + tileSize;
                 exon->hitCount = 1;
                 diagDiff = thisDiff;
                 }
             else if (diagDiff - 2 <= thisDiff && thisDiff <= diagDiff + 2
                     && exon->probeEnd - 2 <= pOff && pOff <= exon->probeEnd + 48
                     && exon->targetEnd - 2 <= tOff && tOff <= exon->targetEnd + 48)
                 {
                 hits[i].isLumped = TRUE;
                 exon->probeEnd = pOff + tileSize;
                 exon->targetEnd = tOff + tileSize;
                 exon->hitCount += 1;
                 }
             }
         }
     if (!startedExon)   /* Must be all lumped together. */
         break;
     ++exonCount;
     ++exon;
     }
 
 return exonCount;
 }
 
 int lumpExonsIntoGenes(struct crudeExon *exons, int exonCount, 
     struct crudeGene *genes, struct nt4Seq *target, boolean isRc)
 /* Lump together crude exons into crude genes, and score them. */
 {
 int i;
 struct crudeGene *gene = genes;
 int geneCount = 0;
 
 for (i=0; i<exonCount; ++i)
     exons[i].isLumped = FALSE;
 
 for (;;)
     {
     boolean startedGene = FALSE;
     int lastDiff = 0;
     int tOff = 0;
     int pOff = 0;
     int thisDiff = 0;
     int exonsInThis = 0;
 
     for (i=0; i<exonCount; ++i)
         {
         if (!exons[i].isLumped)
             {
             tOff = exons[i].targetStart;
             pOff = exons[i].probeStart;
             thisDiff = tOff - pOff;
             if (!startedGene)
                 {
                 startedGene = TRUE;
                 exons[i].isLumped = TRUE;
                 lastDiff = thisDiff;
                 exonsInThis = 1;
                 gene->target = target;
                 gene->isRc = isRc;
                 gene->probeStart = exons[i].probeStart;
                 gene->probeEnd = exons[i].probeEnd;
                 gene->targetStart = exons[i].targetStart;
                 gene->targetEnd = exons[i].targetEnd;
                 gene->score = exons[i].hitCount * exons[i].hitCount;
                 }
             else if (gene->targetEnd - 10 <= tOff && tOff <= gene->targetEnd + 25000
                      && gene->probeEnd - 10 <= pOff && pOff <= gene->probeEnd + 64
                      && lastDiff - 2 <= thisDiff)
                 {
                 exons[i].isLumped = TRUE;
                 gene->targetEnd = exons[i].targetEnd;
                 gene->probeEnd = exons[i].probeEnd;
                 gene->score += exons[i].hitCount * exons[i].hitCount;
                 exonsInThis += 1;
                 lastDiff = thisDiff;
                 }
             }
         }
     if (!startedGene)
         break;
     gene->score += exonsInThis;
     ++geneCount;
     ++gene;
     }
 return geneCount;
 }
 
 
 boolean makeGoodTile(DNA *dna, bits32 *retTile)
 /* Turn next 16 base pairs into a tile.  Return FALSE
  * if it wouldn't be a good tile. */
 {
 int i;
 DNA repeater[2*tileSize-1];
 bits32 tile;
 
 /* Make sure that tile isn't part of a pattern. */ 
 memcpy(repeater, dna+1, tileSize-1);
 memcpy(repeater+tileSize-1, dna, tileSize);
 if (memMatch(dna, tileSize, repeater, 2*tileSize-1) != NULL)
     return FALSE;
 
 /* Make sure no N's in tile */
 for (i=0; i<tileSize; ++i)
     {
     if (ntVal[(int)dna[i]] < 0)
         return FALSE;
     }
 
 /* Pack dna into tile, and make sure it's not on our list of
  * bad tiles. */
 tile = packDna16(dna);
 *retTile = tile;
 return TRUE;
 }
 
 struct probeTile
 /* A tile from the probe and the offset where it came from. */
     {
     struct probeTile *next;
     int offset;
     bits32 tile;
     };
 
 #define tileHashBits 16
 #define tileHashSize (1<<tileHashBits)
 #define tileHashMask (tileHashSize-1)
 #define tileHashFunc(tile) ((tile)&tileHashMask)
 
 struct probeTile **makeTileHash()
 {
 struct probeTile **table;
 int tableSize = tileHashSize * sizeof(table[0]);
 
 table = needLargeMem(tableSize);
 memset(table, 0, tableSize);
 return table;
 }
 
 
 struct fastProber
 /* Structure that has hash list of probeSeq tiles for fast searching */
     {
     struct probeTile **hash;
     struct probeTile *probes;
     };
 
 struct fastProber *makeFastProber(struct dnaSeq *probeSeq)
 {
 DNA *probeDna = probeSeq->dna;
 int maxProbeCount = probeSeq->size - tileSize + 1;
 int probeCount = 0;
 struct probeTile *probes;
 struct probeTile *probe;
 struct probeTile **hash;
 int i;
 struct fastProber *fp;
 
 if (maxProbeCount <= 0)
     return NULL;
 
 AllocVar(fp);
 fp->hash = hash = makeTileHash();
 fp->probes = probes = needMem(maxProbeCount * sizeof(probes[0]) );
 for (i=0; i<maxProbeCount; ++i)
     {
     probe = &probes[probeCount];
     if (makeGoodTile(probeDna+i, &probe->tile))
         {
         int hashIx = tileHashFunc(probe->tile);
         probe->offset = i;
         probe->next = hash[hashIx];
         hash[hashIx] = probe;
         ++probeCount;
         }
     }
 return fp;
 }
 
 void freeFastProber(struct fastProber **pFp)
 {
 struct fastProber *fp = *pFp;
 if (fp != NULL)
     {
     freeMem(fp->probes);
     freeMem(fp->hash);
     freez(pFp);
     }
 }
 
 int makeIndividualHits(struct probeTile **hash, bits32 *bases, int baseOffset, int baseWordCount,
     struct crudeHit *hits, int maxHitCount, int hitCount)
 /* Scan a short segment for individual hits. */
 {
 struct probeTile *probe;
 int i;
 for (i=0; i<baseWordCount; ++i)
     {
     if ((probe = hash[tileHashFunc(bases[i])]) != NULL)
         {
         do
             {
             if (bases[i] == probe->tile)
                 {
                 if (hitCount >= maxHitCount)
                     {
                     warn("Too many hits, only taking first %d", maxHitCount);
                     goto END_HIT_LOOP;
                     }
                 hits[hitCount].probeOffset = probe->offset;
                 hits[hitCount].targetOffset = ((i+baseOffset)<<tileSizeShift);
                 ++hitCount;
                 break;
                 }
             probe = probe->next;
             }
         while (probe != NULL);
         }
     }
 END_HIT_LOOP:
 return hitCount;
 }
 
 int makeHits(struct fastProber *fp, struct nt4Seq *target, struct crudeHit *hits,
     int maxHitCount)
 /* Scan entire target for hits to probe. */
 {
 bits32 *bases = target->bases;
 struct probeTile **hash = fp->hash;
 unsigned long acc;
 int hitCount = 0;
 int i;
 int baseWordCount = (target->baseCount>>tileSizeShift);
 bits32 *endBases = bases + baseWordCount;
 int chunkSize = 8;
 int chunkCount = baseWordCount/chunkSize;
 int baseOffset = 0;
 
 for (i=0; i<chunkCount; ++i)
     {
     acc = ((unsigned long)(hash[tileHashFunc(bases[0])])
         |  (unsigned long)(hash[tileHashFunc(bases[1])])
         |  (unsigned long)(hash[tileHashFunc(bases[2])])
         |  (unsigned long)(hash[tileHashFunc(bases[3])])
         |  (unsigned long)(hash[tileHashFunc(bases[4])])
         |  (unsigned long)(hash[tileHashFunc(bases[5])])
         |  (unsigned long)(hash[tileHashFunc(bases[6])])
         |  (unsigned long)(hash[tileHashFunc(bases[7])]));
     if (acc)
         {
         hitCount = makeIndividualHits(hash, bases, baseOffset, chunkSize, hits, maxHitCount, hitCount);
         if (hitCount >= maxHitCount)
             break;
         }
     bases += chunkSize;
     baseOffset += chunkSize;
     }
 if (bases != endBases)
     hitCount = makeIndividualHits(hash, bases, baseOffset, endBases-bases, hits, maxHitCount, hitCount);
 return hitCount;
 }
 
 int cmpHitsTargetFirst(const void *va, const void *vb)
 /* Compare function to sort hit array by ascending
  * target offset followed by ascending probe offset. */
 {
 struct crudeHit *a = (struct crudeHit *)va;
 struct crudeHit *b = (struct crudeHit *)vb;
 long diff;
 if ((diff = a->targetOffset - b->targetOffset) != 0)
     return diff;
 return a->probeOffset - b->probeOffset;
 }
 
 int cmpExonsTargetFirst(const void *va, const void *vb)
 /* Compare function to sort exons by ascending target
  * offset followed by ascending probe offset. */
 {
 struct crudeExon *a = (struct crudeExon *)va;
 struct crudeExon *b = (struct crudeExon *)vb;
 long diff;
 if ((diff = a->targetStart - b->targetStart) != 0)
     return diff;
 return a->probeStart - b->probeStart;
 }
 
 
 #define maxHitsAtOnce 20000
 /* Maximum number of hits to allow on a single scan.
  * If we get more than this need to toss out a bad tile
  * or something... */
 
 
 int findCrudeGenes(struct fastProber *fp, struct nt4Seq *target,
     struct crudeGene *genes, int maxGeneCount, boolean isRc)
 /* Scan target with probe, and arrange hits into crude genes. */
 {
 struct crudeHit *hits;
 struct crudeExon *exons;
 int hitCount, exonCount, geneCount = 0;
 
 if (fp == NULL)
     return 0;
 assert(maxGeneCount >= maxHitsAtOnce);
 hits = needMem(maxHitsAtOnce*sizeof(*hits));
 exons = needMem(maxHitsAtOnce*sizeof(*exons));
 hitCount = makeHits(fp, target, hits, maxHitsAtOnce);
 //hitCount = makeIndividualHits(fp->hash, target->bases, target->baseCount>>tileSizeShift,
 //    hits, maxHitsAtOnce, 0);
 
 if (hitCount > 0)
     {
     qsort(hits, hitCount, sizeof(hits[0]), cmpHitsTargetFirst);
     exonCount = lumpHitsIntoExons(hits, hitCount, exons);
     qsort(exons, exonCount, sizeof(exons[0]), cmpExonsTargetFirst);
     geneCount = lumpExonsIntoGenes(exons, exonCount, genes, target, isRc);
     }
 freeMem(hits);
 freeMem(exons);
 return geneCount;
 }
 
 int cmpGenesByScore(const void *va, const void *vb)
 /* cmp function to sort leaving highest scores first. */
 {
 struct crudeGene *a = (struct crudeGene *)va;
 struct crudeGene *b = (struct crudeGene *)vb;
 return b->score - a->score;
 }
 
 int countGenesBetter(struct crudeGene *genes, int geneCount, int cutoff)
 /* Count number of genes (in sorted list) with scores better than cutoff */
 {
 int i;
 for (i=0; i<geneCount; ++i)
     {
     if (genes[i].score <= cutoff)
         break;
     }
 return i;
 }
 
 int filterPoorGenes(struct crudeGene *genes, int geneCount)
 /* Sort gene list by score and remove bottom scorers. 
  * This is gauranteed to get rid of half of genes on list or 
  * more. */
 {
 int bestScore, worstScore;
 int newGeneCount;
 int halfGeneCount = geneCount/2;
 
 qsort(genes, geneCount, sizeof(genes[0]), cmpGenesByScore);
 bestScore = genes[0].score;
 worstScore = genes[geneCount-1].score;
 
 /* If scores are clustered with half or more at the top,
  * we have to be crude and just lop off bottom half of scores. */
 if (bestScore == genes[halfGeneCount].score)
     {
     warn("Bunches of genes, all scoring the same. Program is "
          "throwing out half out of necessity.\n");
     return geneCount/2;
     }
 
 /* Drop bottom score until have gotten rid of at least half. */
 newGeneCount = geneCount;
 while (newGeneCount > halfGeneCount)
     {
     worstScore = genes[newGeneCount-1].score;
     newGeneCount = countGenesBetter(genes, newGeneCount, worstScore);
     }
 return newGeneCount;
 }
 
 void findAliEnds(struct ffAli *ali, DNA *needle, DNA *hay,
     long *retNeedleStart, long *retNeedleEnd,
     long *retHayStart, long *retHayEnd)
 {
 DNA *hStart = NULL;
 DNA *hEnd = NULL;
 DNA *nStart = NULL;
 DNA *nEnd = NULL;
 boolean first = TRUE;
 
 while (ali->left) ali = ali->left;
 for (;ali != NULL; ali = ali->right)
     {
     if (first)
         {
         hStart = ali->hStart;
         hEnd = ali->hEnd;
         nStart = ali->nStart;
         nEnd = ali->nEnd;
         first = FALSE;
         }
     else
         {
         if (ali->hStart < hStart)
             hStart = ali->hStart;
         if (ali->hEnd > hEnd)
             hEnd = ali->hEnd;
         if (ali->nStart < nStart)
             nStart = ali->nStart;
         if (ali->nEnd > nEnd)
             nEnd = ali->nEnd;
         }
     }
 *retHayStart = hStart - hay;
 *retNeedleStart = nStart - needle;
 *retHayEnd = hEnd - hay;
 *retNeedleEnd = nEnd - needle;
 }
 
 
 
 boolean findFineAlignment(struct dnaSeq *probe, struct crudeGene *crudeGene, 
         struct ffAli **retAli, int *retAliScore, 
         DNA **retUnpacked, int *retUnpackedSize,
         long *retNeedleStart, long *retNeedleEnd,
         long *retHayStart, long *retHayEnd)
 /* Call fuzzy finder on the crude gene alignment to make a
  * complete alignment. */
 {
 struct nt4Seq *target;
 int tarStart, tarEnd;
 int unpackedSize;
 DNA *unpacked;
 struct ffAli *ali = NULL;
 static int outside = 250;
 int aliScore;
 long hayStart, hayEnd, needleStart, needleEnd;
 
 target = crudeGene->target;
 
 tarStart = crudeGene->targetStart - outside;
 if (tarStart < 0) tarStart = 0;
 tarEnd = crudeGene->targetEnd + outside;
 if (tarEnd > target->baseCount) tarEnd = target->baseCount;
 
 unpackedSize = tarEnd - tarStart;
 unpacked = nt4Unpack(target, tarStart, unpackedSize);
 
 if (crudeGene->isRc)
     reverseComplement(probe->dna, probe->size);
 ali = ffFind(probe->dna, probe->dna + probe->size, 
     unpacked, unpacked + unpackedSize, ffTight);
 aliScore = ffScoreCdna(ali);
 if (crudeGene->isRc)
     reverseComplement(probe->dna, probe->size);
 if (ali != NULL)
     {
     findAliEnds(ali, probe->dna, unpacked, 
         &needleStart, &needleEnd, &hayStart, &hayEnd);    
     hayStart += tarStart;
     hayEnd += tarStart;
     }
 //#define SHOW_ALI
 #ifdef SHOW_ALI
 if (hayStart == 4801255 && hayEnd == 4801707)
     {
     char hayName[50];
     sprintf(hayName, "%s:%d-%d %c", target->name, tarStart, tarEnd);
     ffShowAli(ali, "probe", probe->dna, 0, 
         hayName, unpacked, (tarStart),
         crudeGene->isRc ? '-' : '+');
     }
 #endif /* SHOW_ALI */
 
 *retAli = ali;
 *retAliScore = aliScore;
 *retUnpacked = unpacked;
 *retUnpackedSize = unpackedSize;
 *retNeedleStart = needleStart;
 *retNeedleEnd = needleEnd;
 *retHayStart = hayStart;
 *retHayEnd = hayEnd;
 return (ali != NULL);
 }
 
 void myBlast(struct dnaSeq *probe, char *probeName, struct nt4Seq *chrome[], int chromeCount)
 {
 int oneGeneCount;
 int i;
 int decentAlignments = 0;
 int chromeIx;
 int geneCount = 0;
 int maxGeneCount = 2*maxHitsAtOnce;
 long startTime, endTime;
 int isRc;
 struct nt4Seq *target;
 struct crudeGene *crudeGenes = needMem(maxGeneCount*sizeof(*crudeGenes));
 struct fastProber *fps[2];
 
 /* Time how long it takes to allign things. */
 startTime = clock1000();
 
 fps[0] = makeFastProber(probe);
 reverseComplement(probe->dna, probe->size);
 fps[1] = makeFastProber(probe);
 reverseComplement(probe->dna, probe->size);
 for (chromeIx = 0; chromeIx < chromeCount; ++chromeIx)
     {
     target = chrome[chromeIx];
     for (isRc = 0; isRc <= 1; ++isRc)
         {
         /* Get crude idea of genes on one strand. */
         oneGeneCount = findCrudeGenes(fps[isRc], target, 
             crudeGenes+geneCount, maxGeneCount-geneCount, isRc);
             
         /* Increment gene count and if necessary compact list. */
         geneCount += oneGeneCount;
         if (maxGeneCount - geneCount < maxHitsAtOnce)
             {
             geneCount = filterPoorGenes(crudeGenes, geneCount);
             if (maxGeneCount - geneCount < maxHitsAtOnce)
                 {
                 warn("Too many genes. Moving on to the next.\n");
                 break;
                 }
             }
         }
     if (maxGeneCount - geneCount < maxHitsAtOnce)
         break;
     }
 
 /* Sort genes by initial promise and get rid of some trash. */
     {
     int bestScore, worstScore;
     qsort(crudeGenes, geneCount, sizeof(crudeGenes[0]), cmpGenesByScore);
     bestScore = crudeGenes[0].score;
     worstScore = crudeGenes[geneCount-1].score;
     if (bestScore > worstScore)
         {
         int cutOff = bestScore/10;
         geneCount = countGenesBetter(crudeGenes, geneCount, cutOff);
         }
     }
 
 /* Do fine alignments. */
 for (i=0; i<geneCount; ++i)
     {
     struct ffAli *ali = NULL;
     int aliScore;
     DNA *unpackedDna = NULL;
     int unpackedSize;
     struct crudeGene *cg = crudeGenes+i;
     long hayStart, hayEnd;
     long needleStart, needleEnd;
 
     if (findFineAlignment(probe, cg, 
         &ali, &aliScore, &unpackedDna, &unpackedSize,
         &needleStart, &needleEnd, &hayStart, &hayEnd))
         {
         reportf("%s %d-%d hits %s %d-%d cs %d score %d strand %c\n", 
             probeName, needleStart, needleEnd,
             cg->target->name, hayStart, hayEnd,
             cg->score, aliScore, cg->isRc ? '-' : '+');
         ++decentAlignments;
         ffFreeAli(&ali);
         freez(&unpackedDna);
         }
     }
 freeFastProber(&fps[0]);
 freeFastProber(&fps[1]);
 endTime = clock1000();
 reportf("%d alignments of %s in %f seconds\n\n", decentAlignments, probeName, 
     (endTime-startTime)*0.001);
 freeMem(crudeGenes);
 }
 
 
 char *lastCdnaReported(char *reportFileName)
 /* Open report file and scan for last cDNA blasted. */
 {
 FILE *f = mustOpen(reportFileName, "r");
 char linebuf[512];
 char *words[16];
 int wordCount;
 char *lastName = NULL;
 static char lastNameBuf[256];
 
 for (;;)
     {
     if (fgets(linebuf, sizeof(linebuf), f) == NULL)
         break;
     wordCount = chopString(linebuf, whiteSpaceChopper, words, ArraySize(words));
     if (wordCount > 4 && (strcmp("alignments", words[1]) == 0))
         {
         strncpy(lastNameBuf, words[3], sizeof(lastNameBuf));
         lastName = lastNameBuf;
         }
     }
 return lastName;
 }
 
 void reportBlasting(int ix, struct dnaSeq *cdna)
 {
 char *cdnaName = cdna->name;
 reportf("%d Blasting %s %d bases\n", ix, cdnaName, cdna->size);
 }
 
 void  doInFile(char *inFileName, struct nt4Seq *nt4s[], int nt4Count)
 {
 FILE *inFile = mustOpen(inFileName, "r");
 char lineBuf[256];
 char *words[10];
 char *cdnaName;
 int wordCount;
 struct dnaSeq *cdna;
 int ix = 0;
 
 while (fgets(lineBuf, sizeof(lineBuf), inFile) != NULL)
     {
     wordCount = chopString(lineBuf, whiteSpaceChopper, words, ArraySize(words));
     if (wordCount == 0) /* Skip blank lines. */
         continue;
     ++ix;
     cdnaName = words[0];
     if (!wormCdnaSeq(cdnaName, &cdna, NULL))
         errAbort("Couldn't find cdna %s\n", cdnaName);
     else
         {
         reportBlasting(ix, cdna);
         myBlast(cdna, cdnaName, nt4s, nt4Count);
         }
     freeDnaSeq(&cdna);
     }
 }
 
 FILE *estFile;
 
 int loadNt4s(char *dir, struct nt4Seq ***retNt4s)
 /* Load up genome stored 2 bits to a nucleotide. */
 {
 struct slName *nameList, *name;
 struct nt4Seq **nt4s;
 int count;
 int i;
 char fileName[512];
 char chromName[256];
 char fullChromName[256];
 long start, end;
 
 nameList = listDir(dir, "*.nt4");
 count = slCount(nameList);
 if (count < 1)
     errAbort("Couldn't find any .nt4 files in %s", dir);
 nt4s = needMem(count*sizeof(nt4s[0]) );
 start = clock1000();
 for (name = nameList, i=0; name != NULL; name = name->next, i+=1)
     {
     char *end;
 
     sprintf(fileName, "%s/%s", dir, name->name);
     
     /* Cut off .nt4 suffix. */
     strcpy(chromName, name->name);
     end = strrchr(chromName, '.');
     assert(end != NULL);
     *end = 0;
     
     sprintf(fullChromName, "Chromosome %s", chromName);
     nt4s[i] = loadNt4(fileName, fullChromName);
     }
 end = clock1000();
 printf("%4.2f seconds loading %d .nt4 files\n", (end-start)*0.001, count);
 *retNt4s = nt4s;
 return count;
 }
 
 void startEsts(char *estFileName)
 /* Start searching ESTs */
 {
 estFile = mustOpen(estFileName, "rb");
 }
 
 struct dnaSeq *nextEst()
 /* Get next EST */
 {
 return faReadOneDnaSeq(estFile, NULL, TRUE);
 }
 
 int main(int argc, char *argv[])
 /* Set things up to batch process all cdnas in data base.
  * Print results to console and to a report file.
  * Allow user to resume interrupted batch process. */
 {
 char *command;
 struct dnaSeq *cdna = NULL;
 struct nt4Seq **nt4s;
 int nt4Count;
 int i;
 char *reportFileName;
 
 if (argc < 3)
     usage();
 
 dnaUtilOpen();
 
 pushWarnHandler(reportWarning);
 
 
 
 command = argv[1];
 reportFileName = argv[2];
 
 /* Simple "named" command doesn't get appended to report file. */
 if (!differentWord(command, "named"))
     {
     int i;
     nt4Count = loadNt4s(wormChromDir(), &nt4s);
     for (i=3; i<argc; ++i)
         {
         char *cdnaName = argv[i];
         if (!wormCdnaSeq(cdnaName, &cdna, NULL))
             warn("Couldn't find cdna %s\n", cdnaName);
         else
             {
             reportBlasting(i-1, cdna);
             myBlast(cdna, cdnaName, nt4s, nt4Count);
             }
         freeDnaSeq(&cdna);
         }
     }
 else if (!differentWord(command, "in"))
     {
     nt4Count = loadNt4s(wormChromDir(), &nt4s);
     reportFile = mustOpen(reportFileName, "a");
     doInFile(argv[3], nt4s, nt4Count);
     }
 /* All others do.  Most use an iterator. */
 else
     {
     struct dnaSeq *cdna;
     char *cdnaName;
     char *seekUntilName = NULL;
     char *faFileName;
     int seekUntilIx = -1;
     int endIx = 0x7fffffff;
     int dbIx = 0;
     char *ntDir;
 
     if (argc < 5)
         usage();
     faFileName = argv[3];
     ntDir = argv[4];
 
     reportFile = mustOpen(reportFileName, "a");
 
     if (!differentWord(command, "starting"))
         {
         char *numStr;
         if (argc != 6 && argc != 7)
             usage();
         numStr = argv[5];
         if (!isdigit(numStr[0]))
             usage();
         seekUntilIx = atoi(numStr)-1;
         if (argc == 7)
             {
             numStr = argv[6];
             if (!isdigit(numStr[0]))
                 usage();
             endIx = seekUntilIx + atoi(numStr);
             }
         warn("Starting at %d\n\n", seekUntilIx+1);
         }
     else if (!differentWord(command, "resume"))
         {
         seekUntilName = lastCdnaReported(reportFileName);
         warn("Resuming after %s\n\n", seekUntilName);
         }
     else if (!differentWord(command, "all"))
         {
         }
     else
         {
         usage();
         }
     
     nt4Count = loadNt4s(ntDir, &nt4s);
     startEsts(faFileName);
 
     /* Possibly skip over some. */
     if (seekUntilName != NULL)
         {
         while ((cdna = nextEst()) != NULL)
             {
             cdnaName = cdna->name;
             ++dbIx;
             if (!differentWord(cdnaName, seekUntilName))
                 break;
             freeDnaSeq(&cdna);
             }
         if (cdna == NULL)
             errAbort("Couldn't find %s in cDNA database", seekUntilName);
         freeDnaSeq(&cdna);
         }
     if (seekUntilIx > 0)
         {
         for (dbIx = 0; dbIx < seekUntilIx; ++dbIx)
             {
             if ((cdna = nextEst()) == NULL)
                 errAbort("Can't seek to %d, there's only %d", seekUntilIx+1, dbIx+1);
             freeDnaSeq(&cdna);
             }
         }
 
     /* Main loop */
     while ((cdna = nextEst()) != NULL)
         {
         if (dbIx >= endIx)
             break;
         ++dbIx;
         cdnaName = cdna->name;
         reportBlasting(dbIx, cdna);
         myBlast(cdna, cdnaName, nt4s, nt4Count);
         freeDnaSeq(&cdna);
         }
     }
 for (i=0; i<nt4Count; ++i)
     freeNt4(&nt4s[i]);
 freez(&nt4s);
 carefulClose(&reportFile);
 return 0;
 }