src/idbQuery/idbQuery.c 6e5ee11ca95cd971984038cf65bae00d9c898707

6e5ee11ca95cd971984038cf65bae00d9c898707
galt
  Wed Jun 4 15:40:02 2014 -0700
Since we have git, it is easy to rename errabort.c to errAbort.c without losing any history.
diff --git src/idbQuery/idbQuery.c src/idbQuery/idbQuery.c
index 48f75cd..d72b825 100644
--- src/idbQuery/idbQuery.c
+++ src/idbQuery/idbQuery.c
@@ -1,1824 +1,1824 @@
 
 /* idbQuery.c - This cgi script queries the Intronerator Data Base for
  * things that match what the user has set up in idbQuery.html. */
 #include "common.h"
 #include "portable.h"
-#include "errabort.h"
+#include "errAbort.h"
 #include "localmem.h"
 #include "wormdna.h"
 #include "cheapcgi.h"
 #include "htmshell.h"
 #include "gdf.h"
 #include "cda.h"
 #include "xa.h"
 
 FILE *debugOut;
 
 char **chromNames;
 int chromCount;
 
 /* Local memory */
 struct lm *localMem;
 #define lAlloc(size) (lmAlloc(localMem, (size) ))
 
 struct liteFeature
 /* A feature without many features - ha! 
  * Lists of this type tend to be short lived and in local memory. */
     {
     struct liteFeature *next;
     int start, end;
     char strand;
     };
 
 struct idbFeature
 /* A feature with more feature.  
  * lists of this type are longer lived and in global memory. */
     {
     struct idbFeature *next;
     int start, end;
     char strand;
     char *chrom;
     char *name;
     char *orfName;
     };
 
 struct liteFeature *liteFreeList = NULL;  /* Free liteFeatures ready for reuse */
 
 struct liteFeature *newLiteFeature(int start, int end, char strand)
 /* Return a new lite feature. */
 {
 struct liteFeature *feat;
 if (liteFreeList != NULL)
     {
     feat = liteFreeList;
     liteFreeList = liteFreeList->next;
     zeroBytes(feat, sizeof(*feat));
     }
 else
     feat = lAlloc(sizeof(*feat));
 feat->start = start;
 feat->end = end;
 feat->strand = strand;
 return feat;
 }
 
 void freeLiteFeature(struct liteFeature *feat)
 /* Free a single lite feature. */
 {
 slAddHead(&liteFreeList, feat);
 }
 
 void freeLiteFeatureList(struct liteFeature **pList)
 /* Free lite features list (return to free list). */
 {
 struct liteFeature *feat, *next;
 for (feat = *pList; feat != NULL; feat = next)
     {
     next = feat->next;
     slAddHead(&liteFreeList, feat);
     }
 *pList = NULL;
 }
 
 int cmpLiteFeatures(const void *va, const void *vb)
 /* Compare two lite features. */
 {
 struct liteFeature **pA = (struct liteFeature **)va;
 struct liteFeature **pB = (struct liteFeature **)vb;
 struct liteFeature *a = *pA, *b = *pB;
 int diff;
 
 if ((diff = a->strand - b->strand) != 0)
     return diff;
 if ((diff = a->start - b->start) != 0)
     return diff;
 return a->end - b->end;
 }
 
 struct idbFeature *newIdbFeature(char *name, char *chrom, int start, int end, char strand)
 /* Return a new idbFeature. */
 {
 struct idbFeature *feat;
 AllocVar(feat);
 feat->name = cloneString(name);
 feat->chrom = wormOfficialChromName(chrom);
 feat->start = start;
 feat->end = end;
 feat->strand = strand;
 return feat;
 }
 
 int cmpIdbFeatures(const void *va, const void *vb)
 /* Compare two idb features. */
 {
 struct idbFeature **pA = (struct idbFeature **)va;
 struct idbFeature **pB = (struct idbFeature **)vb;
 struct idbFeature *a = *pA, *b = *pB;
 int diff;
 
 if ((diff = strcmp(a->chrom, b->chrom)) != 0)
     return diff;
 if ((diff = a->start - b->start) != 0)
     return diff;
 return a->end - b->end;
 }
 
 struct idbFeature *inIdbList(struct idbFeature *list, char *chrom, int start, int end, char strand)
 /* Return feature with matching start and end, or NULL if no match. */
 {
 struct idbFeature *el;
 for (el = list; el != NULL; el = el->next)
     if (el->start == start && el->end == end && el->strand == strand && sameString(chrom, el->chrom))
         return el;
 return NULL;
 }
 
 void freeIdbFeature(struct idbFeature *feat)
 /* Free a single feature. */
 {
 if (feat->orfName != feat->name)
     freeMem(feat->orfName);
 freeMem(feat->name);
 freeMem(feat);
 }
 
 void freeIdbFeatureList(struct idbFeature **pFeatList)
 /* Free a whole features list. */
 {
 struct idbFeature *feat, *next;
 for (feat = *pFeatList; feat != NULL; feat = next)
     {
     next = feat->next;
     freeIdbFeature(feat);
     }
 *pFeatList = NULL;
 }
 
 enum sourceTypes
     {
     stRecursive, stGenome, stChromosomes, stCosmids, stOrfs,
     };
 
 struct cgiChoice sourceChoices[] =
     {
         {"recursive", stRecursive,},
         {"genome", stGenome,},
         {"chromosomes", stChromosomes, },
         {"cosmids", stCosmids, },
         {"orfs", stOrfs,},
     };
 
 enum regionTypes
     {
     rtAll, rtExons, rtIntrons, rtIntergenic, rtSkippedExon, rtSkippedIntron, 
     rtAlt3Intron, rtAlt5Intron,
     };
 
 struct cgiChoice regionChoices[] =
     {
         {"", rtAll},
         {" ", rtAll},
         {"exons", rtExons},
         {"introns", rtIntrons},
         {"intergenic", rtIntergenic},
         {"skipped exon", rtSkippedExon},
         {"skipped intron", rtSkippedIntron},
         {"alt 5' intron", rtAlt5Intron},
         {"alt 3' intron", rtAlt3Intron},
         {"alt5", rtAlt5Intron},
         {"alt3", rtAlt3Intron},
     };
 
 
 enum relativeToTypes
     {
     rttRegion, rttStart, rttEnd,
     };
 
 struct cgiChoice relativeToChoices[] =
     {
         {"Region", rttRegion, },
         {"Start", rttStart, },
         {"End", rttEnd, },
     };
 
 enum cbHomologyTypes
     {
     cbtDontCare, cbtAny, cbtHigh,
     };
 
 enum logicTypes
     {
     logicPos, logicNeg,
     };
 
 struct cgiChoice logicChoices[] =
     {
         {"", logicPos,},
         {" ", logicPos,},
         {"Not", logicNeg},
         {"Invert", logicNeg},
     };
 
 struct cgiChoice cbHomologyChoices[] =
     {
         {" ", cbtDontCare, },
         {"", cbtDontCare, },
         {"Any", cbtAny, },
         {"High", cbtHigh, },
     };
 
 enum cWhereTypes
     {
     cwAnywhere, cwMostly, cwThroughout, cwPiecemeal,
     };
 
 struct cgiChoice cWhereChoices[] = 
     {
         {"Anywhere", cwAnywhere,},
         {"Mostly", cwMostly,},
         {"Throughout", cwThroughout,}, 
         {"Piecemeal", cwPiecemeal,},
     };
 
 enum cdnaHitsTypes
     {
     cdtDontCare, cdtFullLength, cdtEst, cdtEither,
     };
 
 struct cgiChoice cdnaHitsChoices[] = 
     {
         {" ", cdtDontCare, },
         {"", cdtDontCare, },
         {"Full Length", cdtFullLength, },
         {"FullLength", cdtFullLength, },
         {"EST", cdtEst, },
         {"Either", cdtEither, },
     };
 
 enum cdnaStageTypes 
     {
     cstEither, cstMixed, cstEmbryo, cstMixedOnly, cstEmbryoOnly,
     };
 
 struct cgiChoice cdnaStageChoices[] =
     {
         {"Either", cstEither, },
         {"Embryo", cstEmbryo, },
         {"Mixed", cstMixed, },
         {"Embryo Only", cstEmbryoOnly, },
         {"embryoOnly", cstEmbryoOnly, },
         {"Mixed Only", cstMixedOnly, },
         {"mixedOnly", cstMixedOnly, },
     };
 
 enum gpTypes
 /* Gene prediction types. */
     {
     gpDontCare, gpCds, gpCoding,
     };
 
 struct cgiChoice gpChoices[] = 
     {
         {"", gpDontCare, },
         {" ", gpDontCare, },
         {"CDS", gpCds, },
         {"Coding", gpCoding, },
     };
 
 enum wgpTypes
 /* Which Gene prediction types. */
     {
     wgpAceDb, wgpGenie,
     };
 
 struct cgiChoice wgpChoices[] =
     {
         {"AceDB", wgpAceDb, },
         {"Genie", wgpGenie, },
     };
 
 enum outputTypes
     {
     otFasta, otNameOnly, otRecursive, otHyperlinked
     };
 
 struct cgiChoice outputChoices[] =
     {
     {"Fasta", otFasta, }, 
     {"Name Only", otNameOnly, },
     {"nameOnly", otNameOnly, },
     {"Recursive", otRecursive, },
     {"Hyperlinked", otHyperlinked, },
     };
 
 enum capTypes 
     {
     ctCoding, ctAll, cdNone,
     };
 
 struct cgiChoice capChoices[] =
     {
     {"Coding", ctCoding,},
     {"All", ctAll, },
     {"None", cdNone,},
     };
 
 boolean wildMatchAny(char *name, char *wildWords[], int wildCount, char usedFlags[])
 /* Return true if name matches any of wildWords. */
 {
 int i;
 for (i=0; i<wildCount; ++i)
     {
     if (wildMatch(wildWords[i], name))
         {
         usedFlags[i] = TRUE;
         return TRUE;
         }
     }
 return FALSE;
 }
 
 struct idbFeature *getMatchingC2Names(char *c2FileName, char *wildWords[], int wildCount, char usedFlags[])
 /* Get all names from c2g or c2c file that match words, which may have
  * wildcards. */
 {
 char pathName[512];
 FILE *f;
 char line[256];
 int lineCount=0;
 char *words[3];
 int wordCount;
 struct idbFeature *featList = NULL, *feat;
 char *chrom;
 int start, end;
 char *name;
 
 sprintf(pathName, "%s%s", wormFeaturesDir(), c2FileName);
 f = mustOpen(pathName, "r");
 while (fgets(line, sizeof(line), f))
     {
     ++lineCount;
     wordCount = chopLine(line, words);
     if (wordCount == 0)
         continue;
     if (wordCount < 3)
         errAbort("Short line %d in %s\n", lineCount, pathName);
     name = words[2];
     if (wildMatchAny(name, wildWords, wildCount, usedFlags))
         {
         wormParseChromRange(words[0], &chrom, &start, &end);
         feat = newIdbFeature(name, chrom, start, end, words[1][0]);
         slAddHead(&featList, feat);
         }
     }
 fclose(f);
 return featList;
 }
 
 struct idbFeature *getCosmidNames(char *wildWords[], int wildCount, char usedFlags[])
 /* Get all cosmid names that match words */
 {
 struct idbFeature *list;
 
 list = getMatchingC2Names("c2c", wildWords, wildCount, usedFlags);
 slReverse(&list);
 return list;
 }
 
 struct idbFeature *getOrfNames(char *wildWords[], int wildCount, char usedFlags[])
 /* Get all ORF or gene names that match words */
 {
 struct idbFeature *featList, *feat;
 FILE *f;
 char synFileName[512];
 char line[256];
 int lineCount=0;
 char *words[5];
 int wordCount;
 char *geneName, *orfName;
 
 /* Get ORF list and fill in orfName pointers. */
 featList = getMatchingC2Names("c2g", wildWords, wildCount, usedFlags);
 for (feat = featList; feat != NULL; feat = feat->next)
     feat->orfName = feat->name;
 
 /* Add in genes from synonym list. */
 sprintf(synFileName, "%s%s", wormFeaturesDir(), "syn");
 f = mustOpen(synFileName, "r");
 while (fgets(line, sizeof(line), f))
     {
     ++lineCount;
     wordCount = chopLine(line, words);
     if (wordCount == 0)
         continue;
     if (wordCount < 2)
         errAbort("Short line %d of %s\n", lineCount, synFileName);
     geneName = words[0];
     if (wildMatchAny(geneName, wildWords, wildCount, usedFlags))
         {
         int start, end;
         char strand;
         char *chrom;
 
         orfName = words[1];
         if (!wormGeneRange(orfName, &chrom, &strand, &start, &end) )
             {
             errAbort("Can't find %s (which should be same as %s). Sorry, the database is messed up!",
                 orfName, geneName);
             }
         feat = newIdbFeature(geneName, chrom, start, end, strand);
         feat->orfName = cloneString(orfName);
         slAddHead(&featList, feat);
         }
     }
 fclose(f); 
 
 slReverse(&featList);
 return featList;
 }
 
 void explainRecursiveInput(int lineCount)
 /* Explain how recursive input should look and exit. */
 {
 errAbort("The program can't cope with line %d of the input.\n"
          "Each line should be of form:\n"
          "   >name chromosome chrom:start-end strand\n"
          "Where 'name' is the name of the sequence; 'chromosome' is\n"
          "just the word 'chromosome'; 'chrom' is a worm chromosome, either\n"
          "'i', 'ii', 'iii', 'iv', 'v', 'x', or 'm'; 'start' is the starting\n"
          "position in the chromosome; 'end' is the ending position in the\n"
          "chromosome; and 'strand' is either '+' or '-'\n"
          "An example of this is:\n"
          "    >F36A2 chromosome i:8332665-8355411 +\n"
          "Normally you generate input by selecting 'recursive' for the\n"
          "output type, and then reusing the output as input for the next\n"
          "round.", lineCount);
 }
 
 struct idbFeature *getRecursiveInput(char *rawText)
 /* Parse recursive data. */
 {
 struct idbFeature *idbList = NULL, *idb;
 char *line, *nextLine;
 int lineCount=0;
 char *words[6];
 int wordCount;
 
 /* Break up input to a line at a time. */
 for (line = rawText; line != NULL; line = nextLine)
     {
     nextLine = strchr(line, '\n');
     if (nextLine != NULL)
         *nextLine++ = 0;
     ++lineCount;
 
     /* Chop up lines, and process non-blank lines. */
     wordCount = chopLine(line, words);
     if (wordCount > 0)
         {
         int start, end;
         char *chrom;
         char strand;
         char *name;
 
         if (wordCount < 4 || !sameWord(words[1], "chromosome") || words[0][0] != '>')
             explainRecursiveInput(lineCount);
         name = words[0]+1;
         if (!wormParseChromRange(words[2], &chrom, &start, &end))
             explainRecursiveInput(lineCount);
         strand = words[3][0];
         if (strand != '+' && strand != '-')
             explainRecursiveInput(lineCount);
         idb = newIdbFeature(name, chrom, start, end, strand);
         slAddHead(&idbList, idb);
         }
     }
 slReverse(&idbList);
 return idbList;
 }
 
 struct idbFeature *getSequenceNames(int sourceType, char *rawNames)
 /* Return a list of names from rawNames. */
 {
 int i;
 struct idbFeature *list = NULL;
 char *words[256];
 char usedFlags[256];
 int wordCount;
 
 /* Full genome request is an easy special case.  Deal with it
  * here. */
 if (sourceType == stGenome)
     {
     for (i=0; i<chromCount; ++i)
         {
         char *name = chromNames[i];
         slAddTail(&list, newIdbFeature(name, name, 0, wormChromSize(name), '+'));
         }
     return list;
     }
 else if (sourceType == stRecursive)
     return getRecursiveInput(rawNames);
 
 /* Everyone else will want the rawNames list chopped into words. */
 wordCount = chopLine(rawNames, words);
 if (wordCount < 1)
     errAbort("Please go back and enter some sequence names or select Full Genome.");
 zeroBytes(usedFlags, wordCount);
 
 if (sourceType == stChromosomes)
     {
     /* Deal with chromosome request. */
     for (i=0; i<chromCount; ++i)
         {
         char *name = chromNames[i];
         if (wildMatchAny(name, words, wordCount, usedFlags))
             slAddTail(&list, newIdbFeature(name, name, 0, wormChromSize(name), '+'));
         }
     }
 else if (sourceType == stCosmids)
     {
     list = getCosmidNames(words, wordCount, usedFlags);
     }
 else if (sourceType == stOrfs)
     {
     list = getOrfNames(words, wordCount, usedFlags);
     }
 for (i=0; i<wordCount; ++i)
     {
     if (!usedFlags[i])
         warn("%s not found", words[i]);
     }
 return list;
 }
  
 void printSequence(FILE *f, char *seq, int seqSize, 
     int lineSize, int wordSize, boolean numberLines, int startNumber)
 /* Print sequence to file possibly with line breaks and word breaks and
  * numbers at the end of each line. Pass in zero for line size if you don't
  * want any line breaks; zero in wordSize for no word breaks. */
 {
 int wordRing, lineRing;
 int i;
 
 if (lineSize == 0)
     lineSize = seqSize+1;
 if (wordSize == 0)
     wordSize = seqSize+1;
 wordRing = wordSize;
 lineRing = lineSize;
 
 for (i=0; i<seqSize; ++i)
     {
     fputc(seq[i], f);
     if (--wordRing <= 0)
         {
         fputc(' ', f);
         wordRing = wordSize;
         }
     if (--lineRing <= 0)
         {
         if (numberLines)
             fprintf(f, " %d", startNumber+i+1);
         fputc('\n', f);
         lineRing = lineSize;
         }
     }
 if (lineRing != lineSize)
     {
     if (numberLines)
         fprintf(f, " %d", startNumber+i);
     fputc('\n', f);
     }
 }
 
 void findGdf(char **retGdfDir, struct wormGdfCache **retCache)
 /* Go figure out which gene-finder's predictions to use. */
 {
 int wgpType = cgiOneChoice("gpWhich", wgpChoices, ArraySize(wgpChoices));
 
 if (wgpType == wgpAceDb)
     {
     *retGdfDir = wormSangerDir();
     *retCache = &wormSangerGdfCache;
     }
 else
     {
     *retGdfDir = wormGenieDir();
     *retCache = &wormGenieGdfCache;
     }
 }
 
 void outputList(struct idbFeature *featList, int outputType,
     int lineSize, int wordSize,
     boolean numberLines, int capType)
 /* Write out features list. */
 {
 struct idbFeature *feat;
 DNA *dna;
 int featSize;
 
 if (featList == NULL)
     errAbort("Sorry, no matching features found.");
 for (feat = featList; feat != NULL; feat = feat->next)
     {
     featSize = feat->end - feat->start;
     if (featSize > 0)
         {
         if (outputType == otNameOnly)
             printf("%s\n", feat->name);
         else if (outputType == otHyperlinked)
             {
             printf("<A HREF=\"../cgi-bin/tracks.exe?where=%s:%d-%d\">%s</A>\n",
                 feat->chrom, feat->start, feat->end, feat->name);
             }
         else
 	    {
             fprintf(stdout, ">%s chromosome %s:%d-%d %c\n", 
                 feat->name, feat->chrom, feat->start, feat->end, feat->strand);
             fprintf(debugOut, ">%s chromosome %s:%d-%d %c\n", 
                 feat->name, feat->chrom, feat->start, feat->end, feat->strand);
 	    }
         if (outputType == otFasta)
             {
             dna = wormChromPart(feat->chrom, feat->start, featSize);
             if (capType == ctAll)
                 toUpperN(dna, featSize);
             else if (capType == ctCoding)
                 {
                 char *gdfDir;
                 struct wormGdfCache *gdfCache;
                 struct wormFeature *geneList, *gene;
 
                 findGdf(&gdfDir, &gdfCache);
                 geneList = wormSomeGenesInRange(feat->chrom, feat->start, feat->end, gdfDir);
                 for (gene = geneList; gene != NULL; gene = gene->next)
                     {
                     char *name = gene->name;
                     if (!wormIsNamelessCluster(name))
                         {
                         struct gdfGene *gdf = wormGetSomeGdfGene(name, gdfCache);
                         gdfUpcExons(gdf, gene->start, dna, featSize, feat->start);
                         gdfFreeGene(gdf);
                         }
                     }
                 slFreeList(&geneList);
                 }
             if (feat->strand == '-')
                 reverseComplement(dna, featSize);
             printSequence(stdout, dna, featSize, lineSize, wordSize, numberLines, 0);
             printSequence(debugOut, dna, featSize, lineSize, wordSize, numberLines, 0);
             freez(&dna);
             }
         }
     }
 }
 
 
 struct idbFeature *breakIntoIntronsOrExons(struct idbFeature *oldList, boolean doIntrons)
 /* Create a new features list containing exons from old features list. */
 {
 struct idbFeature *newList = NULL, *oFeat, *nFeat;
 char nameBuf[128];
 char *gdfDir;
 struct wormGdfCache *gdfCache;
 struct wormFeature *geneList, *gene;
 struct gdfGene *gdf;
 int i;
 int startIx, endIx;
 char *ieName = (doIntrons ? "intron" : "exon");
 int ieCount;
 char *geneName;
 
 findGdf(&gdfDir, &gdfCache);
 for (oFeat = oldList; oFeat != NULL; oFeat = oFeat->next)
     {
     geneList = wormSomeGenesInRange(oFeat->chrom, oFeat->start, oFeat->end, gdfDir);
     for (gene = geneList; gene != NULL; gene = gene->next)
         {
         if (oFeat->orfName && differentString(oFeat->orfName, gene->name) )
             continue;
         if (wormIsNamelessCluster(gene->name))
             continue;
         gdf = wormGetSomeGdfGene(gene->name, gdfCache);
         if (oFeat->orfName)
             geneName = oFeat->name;
         else
             geneName = gdf->name;
         startIx = 0;
         endIx = gdf->dataCount-2;
         if (doIntrons)
             {
             startIx += 1;
             endIx -= 1;
             }
         if (gdf->strand == '+')
             {
             for (i=startIx, ieCount = 1; i<=endIx; i+=2, ieCount+=1)
                 {
                 sprintf(nameBuf, "%s_%s_%d", geneName, ieName, ieCount);
                 nFeat = newIdbFeature(nameBuf, oFeat->chrom, 
                     gdf->dataPoints[i].start, gdf->dataPoints[i+1].start, gdf->strand);
                 slAddHead(&newList, nFeat);
                 }            
             }
         else
             {
             for (i=endIx, ieCount = 1; i>=startIx; i-=2, ieCount+=1)
                 {
                 sprintf(nameBuf, "%s_%s_%d", geneName, ieName, ieCount);
                 nFeat = newIdbFeature(nameBuf, oFeat->chrom, 
                     gdf->dataPoints[i].start, gdf->dataPoints[i+1].start, gdf->strand);
                 slAddHead(&newList, nFeat);
                 }            
             }
         gdfFreeGene(gdf);
         }
     slFreeList(&geneList);
     }
 slReverse(&newList);
 return newList;
 }
 
 struct idbFeature *igFeature(char *chrom, int start, int end)
 /* Make up a new feature for an intergenic region. */
 {
 char nameBuf[128];
 assert(start < end);
 assert(start >= 0 && end >= 0);
 sprintf(nameBuf, "IG_%s_%d_%d", chrom, start, end);
 return newIdbFeature(nameBuf, chrom, start, end, '+');
 }
 
 
 struct idbFeature *intergenicRegions(struct idbFeature *oldList, int sourceType)
 /* Get intergenic regions of features. */
 {
 struct idbFeature *newList = NULL, *oFeat, *nFeat;
 struct wormFeature *geneList, *gene;
 char *gdfDir;
 struct wormGdfCache *gdfCache;
 int geneEnd;
 char *chrom;
 
 findGdf(&gdfDir, &gdfCache);
 if (sourceType == stOrfs)
     warn("Usually it's a mistake to select intergenic regions on ORF features.");
 
 for (oFeat = oldList; oFeat != NULL; oFeat = oFeat->next)
     {
     chrom = oFeat->chrom;
     geneList = wormSomeGenesInRange(oFeat->chrom, oFeat->start, oFeat->end, gdfDir);
     if ((gene = geneList) == NULL)
         {
         /* Entire region is intragenic */
         nFeat = igFeature(chrom, oFeat->start, oFeat->end);
         slAddHead(&newList, nFeat);
         }
     else
         {
         /* Do region before first gene. */
         geneEnd = oFeat->start;
         if (oFeat->start < gene->start)
             {
             nFeat = igFeature(chrom, oFeat->start, gene->start);
             slAddHead(&newList, nFeat);
             geneEnd = gene->end;
             gene = gene->next;
             }
         /* Do regions between genes. */
         for (; gene != NULL; gene = gene->next)
             {
             if (gene->start > geneEnd)
                 {
                 nFeat = igFeature(chrom, geneEnd, gene->start);
                 slAddHead(&newList, nFeat);
                 }
             geneEnd = max(geneEnd, gene->end);
             }
         /* Do region after last gene. */
         if (geneEnd < oFeat->end)
             {
             nFeat = igFeature(chrom, geneEnd, oFeat->end);
             slAddHead(&newList, nFeat);
             }        
         slFreeList(&geneList);
         }
     }
 slReverse(&newList);
 return newList;
 }
 
 enum intronConsts {minIntronSize = 24, minGoodEnd = 5, minExonSize = 12};
 
 boolean isIntronBetween(struct cdaBlock *block, struct cdaBlock *nextBlock)
 /* Return TRUE if gap between block and nextBlock in n is an intron. */
 {
 int nGap, hGap, thisSize, nextSize;
 
 nGap = nextBlock->nStart - block->nEnd;
 hGap = nextBlock->hStart - block->hEnd;
 thisSize = block->nEnd - block->nStart;
 nextSize = nextBlock->nEnd - nextBlock->nStart;
 return (nGap == 0 && hGap >= minIntronSize && thisSize >= minExonSize && nextSize >= minExonSize
     && block->endGood > minGoodEnd && nextBlock->startGood > minGoodEnd);
 }
 
 boolean hasIntron(struct cdaAli *cda)
 /* Returns TRUE if the cda has an intron. */
 {
 struct cdaBlock *b = cda->blocks;
 int blockCount = cda->blockCount;
 int i;
 
 for (i=1; i<blockCount; ++i)
     {
     if (isIntronBetween(b, b+1))
         return TRUE;
     ++b;
     }
 return FALSE;
 }
    
 struct liteFeature *liteSkipIrrelevant(struct liteFeature *listSeg, struct liteFeature *feat,
     int extraSpace)
 /* Skip parts of listSeg that couldn't matter to feat. Assumes
  * listSeg is sorted on start. Returns first possibly relevant
  * item of listSeg. */
 {
 struct liteFeature *seg = listSeg;
 char strand = feat->strand;
 int start = feat->start - extraSpace;
 int skipCount = 0;
 
 /*Skip past wrong strand. */
 while (seg != NULL && seg->strand  < strand)
     {  
     seg = seg->next;
     ++skipCount;
     }
 /* Skip past stuff that we've passed on this chromosome. */
 while (seg != NULL && seg->strand == strand && seg->end < start)
     {
     seg = seg->next;
     ++skipCount;
     }
 return seg;
 }
 
 struct liteFeature *liteNextOverlap(struct liteFeature *listSeg, struct liteFeature *liteFeature, int extra)
 /* Assuming that listSeg is sorted on start, search through list
  * until find something that overlaps with liteFeature, or have gone
  * past where liteFeature could be. Returns overlapping liteFeature or NULL. */
 {
 struct liteFeature *seg = listSeg;
 char strand = liteFeature->strand;
 int start = liteFeature->start;
 int end = liteFeature->end;
 
 while (seg != NULL && seg->strand == strand && seg->start <= end + extra)
     {
     if (!(seg->start - extra >= end || seg->end + extra <= start))
         return seg;
     seg = seg->next;
     }
 return NULL;
 }
 
 void dumpLiteList(char *name, struct liteFeature *list)
 /* Print out lite list for debugging. */
 {
 struct liteFeature *el;
 printf("%s\n", name);
 for (el = list; el != NULL; el = el->next)
     printf("%d-%d %c\n", el->start, el->end, el->strand);
 }
 
 struct idbFeature *filterLite(struct idbFeature *filteredList, struct liteFeature *aList, struct liteFeature *bList,
     char *featName, char *featChrom, char *typeName, 
     boolean (*filter)(struct liteFeature *a, struct liteFeature *b))
 /* Return subset of a that overlaps with b, and also passes through filter. */
 {
 struct liteFeature *relevantB = bList;
 struct liteFeature *a, *b;
 
 for (a = aList; a != NULL; a = a->next)
     {
     relevantB = liteSkipIrrelevant(relevantB, a, 0);
     for (b = relevantB; (b = liteNextOverlap(b, a, 0)) != NULL; b = b->next)
         {
         if (filter(a,b))
             {
             char nameBuf[128];
             struct idbFeature *idbFeat;
             int start = a->start, end = a->end;
 
             sprintf(nameBuf, "%s_%s_%s_%d_%d", featName, typeName, featChrom, start, end);
             idbFeat = newIdbFeature(nameBuf, featChrom, start, end, a->strand);
             slAddHead(&filteredList, idbFeat);
             }                
         }
     }
 return filteredList;
 }
 
 boolean encompassedFilter(struct liteFeature *a, struct liteFeature *b)
 /* Return TRUE if a is encompassed by b */
 {
 return (b->start <= a->start && a->end <= b->end);
 }
 
 struct idbFeature *skippedExons(struct idbFeature *featList, int sourceType)
 /* Get all the exons that are sometimes spliced in and sometimes not. */
 {
 struct idbFeature *feat;
 struct idbFeature *skippedExonList = NULL;
 
 for (feat = featList; feat != NULL; feat = feat->next)
     {
     struct cdaAli *cdaList, *cda;
     struct liteFeature *exList = NULL, *ex;
     struct liteFeature *inList = NULL, *in;
 
     cdaList = wormCdaAlisInRange(feat->chrom, feat->start, feat->end);
     cdaCoalesceFast(cdaList);
     /* Build up list of exons with known bounds (introns on either side) 
      * and list of introns. */
     for (cda = cdaList; cda != NULL; cda = cda->next)
         {
         int i;
         char cloneStrand = cdaCloneStrand(cda);
         if (sourceType != stOrfs || cloneStrand == feat->strand)
             {
             struct cdaBlock *blocks;
             int endBlock;
 
             endBlock = cda->blockCount-1;
             blocks = cda->blocks;
             /* Add to exon list. */
             for (i=1; i<endBlock; ++i)
                 {
                 struct cdaBlock *b = blocks+i;
                 if (isIntronBetween(b-1, b) && isIntronBetween(b, b+1))
                     {
                     ex = newLiteFeature(b->hStart, b->hEnd, cloneStrand);
                     slAddHead(&exList, ex);
                     }
                 }
             /* Add to intron list. */
             for (i=0; i<endBlock; ++i)
                 {
                 struct cdaBlock *b = blocks+i;
                 struct cdaBlock *nb = b+1;
                 if (isIntronBetween(b, nb))
                     {
                     in = newLiteFeature(b->hEnd, nb->hStart, cloneStrand);
                     slAddHead(&inList, in);
                     }
                 }
             }
         } 
     slUniqify(&exList, cmpLiteFeatures, freeLiteFeature);
     slUniqify(&inList, cmpLiteFeatures, freeLiteFeature);
         
     /* Go through and check for introns that cover exons completely.
      * Any such exons are occassionally skipped */
     skippedExonList = filterLite(skippedExonList, exList, inList, feat->name, feat->chrom, 
         "skip_ex", encompassedFilter);
     
     freeLiteFeatureList(&exList);
     freeLiteFeatureList(&inList);
     cdaFreeAliList(&cdaList);
     }
 slUniqify(&skippedExonList, cmpIdbFeatures, freeIdbFeature);
 return skippedExonList;
 }
 
 struct idbFeature *skippedIntrons(struct idbFeature *featList, int sourceType)
 /* Get all the exons that are sometimes spliced in and sometimes not. */
 {
 struct idbFeature *feat;
 struct idbFeature *skippedIntronList = NULL;
 
 for (feat = featList; feat != NULL; feat = feat->next)
     {
     struct cdaAli *cdaList, *cda;
     struct liteFeature *inList = NULL, *in;
     struct liteFeature *exList = NULL, *ex;
 
     cdaList = wormCdaAlisInRange(feat->chrom, feat->start, feat->end);
     cdaCoalesceFast(cdaList);
     /* Build up list of introns and exons.  */
     for (cda = cdaList; cda != NULL; cda = cda->next)
         {
         int i;
         char cloneStrand = cdaCloneStrand(cda);
         if (sourceType != stOrfs || cloneStrand == feat->strand)
             {
             struct cdaBlock *blocks;
             int endBlock;
             boolean nonGenomic = FALSE;
 
             /* Build up intron list. */
             endBlock = cda->blockCount-1;
             blocks = cda->blocks;
             for (i=0; i<endBlock; ++i)
                 {
                 struct cdaBlock *b = blocks+i;
                 struct cdaBlock *nb = b+1;
                 if (isIntronBetween(b, nb))
                     {
                     int start = b->hEnd, end = nb->hStart;
                     in = newLiteFeature(start, end, cloneStrand);
                     slAddHead(&inList, in);
                     nonGenomic = TRUE;
                     }
                 }
             /* Build up exon list too. Only include cDNAs that have
              * an intron to avoid genomic contamination. */
             if (nonGenomic)
                 {
                 for (i=0; i<=endBlock; ++i)
                     {
                     struct cdaBlock *b = blocks+i;
                     ex = newLiteFeature(b->hStart, b->hEnd, cloneStrand);
                     slAddHead(&exList, ex);
                     }
                 }
             }
         }    
     
     slUniqify(&inList, cmpLiteFeatures, freeLiteFeature);
     slUniqify(&exList, cmpLiteFeatures, freeLiteFeature);
 
     /* Go through and check for exons that cover introns completely.
      * Any such introns are occassionally skipped */
     skippedIntronList = filterLite(skippedIntronList, inList, exList, feat->name, feat->chrom, 
         "skip_in", encompassedFilter);
     
     freeLiteFeatureList(&inList);
     freeLiteFeatureList(&exList);
     cdaFreeAliList(&cdaList);
     }
 slUniqify(&skippedIntronList, cmpIdbFeatures, freeIdbFeature);
 return skippedIntronList;
 }
 
 boolean alt3Filter(struct liteFeature *a, struct liteFeature *b)
 /* Return TRUE if 5' end is same but 3' end is different. */
 {
 if (a->strand == '+')
     return (b->start == a->start && intAbs(a->end - b->end) >= 3);
 else
     return (b->end == a->end && intAbs(a->start - b->start) >= 3);
 }
 
 boolean alt5Filter(struct liteFeature *a, struct liteFeature *b)
 /* Return TRUE if 3' end is same but 5' end is different. */
 {
 if (a->strand == '+')
     return (b->end == a->end && intAbs(a->start - b->start) >= 3);
 else
     return (b->start == a->start && intAbs(a->end - b->end) >= 3);
 }
 
 struct idbFeature *altEnd(struct idbFeature *featList, int sourceType,
      boolean (*filter)(struct liteFeature *a, struct liteFeature *b), char *typeName)
 /* Return introns that are alt3 or alt5 depending on filter passed in. */
 {
 struct idbFeature *feat;
 struct idbFeature *altIntronList = NULL;
 
 for (feat = featList; feat != NULL; feat = feat->next)
     {
     struct cdaAli *cdaList, *cda;
     struct liteFeature *inList = NULL, *in;
 
     cdaList = wormCdaAlisInRange(feat->chrom, feat->start, feat->end);
     cdaCoalesceFast(cdaList);
 
     /* Build up list of introns   */
     for (cda = cdaList; cda != NULL; cda = cda->next)
         {
         int i;
         char cloneStrand = cdaCloneStrand(cda);
         if (sourceType != stOrfs || cloneStrand == feat->strand)
             {
             struct cdaBlock *blocks;
             int endBlock;
 
             /* Build up intron list. */
             endBlock = cda->blockCount-1;
             blocks = cda->blocks;
             for (i=0; i<endBlock; ++i)
                 {
                 struct cdaBlock *b = blocks+i;
                 struct cdaBlock *nb = b+1;
                 if (isIntronBetween(b, nb))
                     {
                     int start = b->hEnd, end = nb->hStart;
                     in = newLiteFeature(start, end, cloneStrand);
                     slAddHead(&inList, in);
                     }
                 }
             }
         }    
     slUniqify(&inList, cmpLiteFeatures, freeLiteFeature);
 
     altIntronList = filterLite(altIntronList, inList, inList, feat->name, feat->chrom, 
         typeName, filter);
     
     freeLiteFeatureList(&inList);
     cdaFreeAliList(&cdaList);
     }
 slUniqify(&altIntronList, cmpIdbFeatures, freeIdbFeature);
 return altIntronList;
 }
 
 
 struct idbFeature *filterRegions(struct idbFeature *featList, int featType, int sourceType)
 /* Do region processing - which can be pretty wild. */
 {
 /* If they want it all just return current list. */
 if (featType == rtAll)
     return featList;
 switch (featType)
     {
     case rtAll:
         return featList;
     case rtExons:
         return breakIntoIntronsOrExons(featList, FALSE);
     case rtIntrons:
         return breakIntoIntronsOrExons(featList, TRUE);
     case rtIntergenic:
         return intergenicRegions(featList, sourceType);
     case rtSkippedExon:
         return skippedExons(featList, sourceType);
     case rtSkippedIntron:
         return skippedIntrons(featList, sourceType);
     case rtAlt3Intron:
         return altEnd(featList, sourceType, alt3Filter, "alt3");
     case rtAlt5Intron:
         return altEnd(featList, sourceType, alt5Filter, "alt5");
     default:
         errAbort("Sorry, that region type isn't implemented yet");
     }
     return NULL;
 }
 
 struct idbFeature *offsetFeatures(struct idbFeature *oldList, int type, int start, int end)
 /* Add offsets to features list. (Removes any empties it creates.) */
 {
 struct idbFeature *feat, *next;
 struct idbFeature *newList = NULL;
 if (start == 0 && end == 0)
     return oldList;
 for (feat = oldList; feat != NULL; feat = next)
     {
     next = feat->next;
     if (feat->strand == '+')
         {
         switch(type)
             {
             case rttRegion:
                 feat->start += start;
                 feat->end += end;
                 break;
             case rttStart:
                 feat->end = feat->start + end;
                 feat->start = feat->start + start;
                 break;
             case rttEnd:
                 feat->start = feat->end + start;
                 feat->end = feat->end + end;
                 break;
             }    
         }
     else
         {
         int fStart = feat->start;
         int fEnd = feat->end;
         int fSize;
         switch(type)
             {
             case rttRegion:
                 feat->start -= end;
                 feat->end -= start;                
                 break;
             case rttStart:
                 fSize = end - start;
                 feat->start = fEnd - end;
                 feat->end = feat->start + fSize;
                 break;
             case rttEnd:
                 fSize = end - start;
                 feat->start = fStart - end;
                 feat->end = feat->start + fSize;
                 break;
             }    
         }
     if (feat->start < feat->end)
         {
         slAddHead(&newList, feat);
         }
     else
         {
         freeIdbFeature(feat);
         }
     }
 slReverse(newList);
 return newList;
 }
 
 boolean cdaIsEst(struct cdaAli *cda)
 /* Returns TRUE if it's an EST type name ending in .3 or .5 */
 {
 char *s;
 if ((s = strrchr(cda->name,'.')) == NULL)
     return FALSE;
 return (s[1] == '3' || s[1] == '5');
 }
 
 void  setByCda(char *hits, int featStart, int featEnd, struct cdaAli *cda, boolean isIntron)
 /* Set parts of hits corresponding to exons or introns depending if startOffset is 0 or 1. */
 {
 struct cdaBlock *blocks = cda->blocks;
 int blockCount = cda->blockCount;
 int i;
 
 if (isIntron)
     blockCount -= 1;
 for (i = 0; i<blockCount; ++i)
     {
     int bStart, bEnd;
     int start, end, size;
     struct cdaBlock *b = blocks+i;
     if (isIntron)
         {
         struct cdaBlock *nb = b+1;
         bStart = b->hEnd;
         bEnd = nb->hStart;
         }
     else
         {
         bStart = b->hStart;
         bEnd = b->hEnd;
         }
     start = max(featStart, bStart);
     end = min(featEnd, bEnd);
     size = end - start;
     if (size > 0)
         memset(hits + start - featStart, 0xff, size);
     }
 }
 
 struct liteFeature *makeLiteRuns(char *hits, int featStart, int featEnd)
 /* Convert byte-by-byte hit discription to a liteList - where the runs that
  * are set will be on the list, and the runs of zeros between elements of 
  * the lite list. */
 {
 struct liteFeature *liteList = NULL, *lite;
 int size = featEnd - featStart + 1;     /* We added a zero tag to hits. */
 char lastHit = 0, hit;
 int lastStart = 0;
 int i;
 for (i=0; i<size; ++i)
     {
     hit = hits[i];
     if (hit && !lastHit)
         lastStart = i;
     else if (!hit && lastHit)
         {
         lite = newLiteFeature(lastStart+featStart, i+featStart, '+');
         slAddHead(&liteList, lite);
         }
     lastHit = hit;
     }
 slReverse(&liteList);
 return liteList;
 }
 
 double hitRatio(char *hits, int size)
 /* Return % of hits array that's set. */
 {
 int setCount = 0;
 int i;
 
 if (size <= 0)
     return 0.0;
 for (i=0; i<size; ++i)
     {
     if (hits[i]) ++setCount;
     }
 return (double)setCount/size;    
 }
 
 void addLiteIntersection(struct idbFeature **pIdbList, struct liteFeature *intersectList, 
     struct idbFeature *feature, char *nameBase)
 /* Add intersections of intersectList with feature to IdbList */
 {
 char nameBuf[512];
 int partCount = 0;
 int start, end, size;
 int fStart = feature->start;
 int fEnd = feature->end;
 struct liteFeature *intersect;
 
 if (feature->strand == '-')
     slReverse(&intersectList);
 for (intersect = intersectList; intersect != NULL; intersect = intersect->next)
     {
     start = max(fStart, intersect->start);
     end = min(fEnd, intersect->end);
     size = end - start;
     if (size > 0)
         {
         struct idbFeature *idb;
         sprintf(nameBuf, "%s_%s_%d", feature->name, nameBase, ++partCount);
         idb = newIdbFeature(nameBuf, feature->chrom, start, end, feature->strand);
         slAddHead(pIdbList, idb);
         }
     }
 if (feature->strand == '-')
     slReverse(&intersectList);
 }
 
 void invertHits(char *hits, int size)
 /* Invert logic of hit array. */
 {
 char *endHits = hits+size;
 for ( ; hits < endHits; ++hits)
     {
     if (*hits)
         *hits = 0;
     else
         *hits = (char)0xFF;
     }
 }
 
 void addAnywhere(int whereType, char *hits, int logicType, struct idbFeature *feat, char *nameBase, struct idbFeature **pNewList)
 /* Add in sections defined by set parts of hit list. */
 {
 int featSize = feat->end - feat->start;
 if (logicType == logicNeg)
     invertHits(hits, featSize);
 switch (whereType)
     {
     case cwAnywhere:
         if (hitRatio(hits, featSize) >= 0.00001)
             {
             slAddHead(pNewList, feat);
             }
         break;
     case cwMostly:
         if (hitRatio(hits, featSize) >= 0.50001)
             {
             slAddHead(pNewList, feat);
             }
         break;
     case cwThroughout:
         if (hitRatio(hits, featSize) >= 0.99999)
             {
             slAddHead(pNewList, feat);
             }
         break;
     case cwPiecemeal: /* break it up! */
         {
         struct liteFeature *liteList = makeLiteRuns(hits, feat->start, feat->end);
         addLiteIntersection(pNewList, liteList, feat, nameBase);
         freeLiteFeatureList(&liteList);
         }
     }
 }
 
 struct idbFeature *restrictOneCdnaType(struct idbFeature *oldList, 
     int cdnaHitType, int cdnaStageType, int cdnaWhereType, int logicType, int regionType)
 /* Restrict features list to areas that have cDNA coverage. */
 {
 struct idbFeature *newList = NULL, *feat, *next;
 
 for (feat = oldList; feat != NULL; feat = next)
     {
     struct cdaAli *cdaList;
     struct cdaAli *cda;
     int featStart = feat->start;
     int featEnd = feat->end;
     int featSize = featEnd - featStart;
     char *hits = needLargeMem(featSize+1); /* Extra zero at end will simplify things */
 
     memset(hits, 0, featSize+1);    
     next = feat->next;
     cdaList = wormCdaAlisInRange(feat->chrom, feat->start, feat->end);
     cdaCoalesceFast(cdaList);
     for (cda = cdaList; cda != NULL; cda = cda->next)
         {
         boolean isEmbryonic = cda->isEmbryonic;
         boolean isEst = cdaIsEst(cda);
         if ((cdnaStageType == cstEither || (cdnaStageType == cstMixed && !isEmbryonic) || (cdnaStageType == cstEmbryo && isEmbryonic))
          && (cdnaHitType == cdtEither || (cdnaHitType == cdtEst && isEst) || (cdnaHitType == cdtFullLength && !isEst) ) )
             {
             /* Here there's a relevant cDNA.  What we do with it depends on region type.
              * if looking at introns or exons go into details of cdna alignment.  Other-
              * wise just get whole thing. */
             switch (regionType)
                 {
                 case rtAll:
                 case rtIntergenic:
                     {
                     int start, end, size;
                     start = max(featStart, cda->chromStart);
                     end = min(featEnd, cda->chromEnd);
                     size = end - start;
                     if (size > 0)
                         memset(hits + start - featStart, 0xff, size);
                     break;
                     }
                 case rtExons:
                 case rtSkippedExon:
                     {
                     setByCda(hits, featStart, featEnd, cda, FALSE);
                     break;
                     }
                 case rtIntrons:
                 case rtSkippedIntron:
                 case rtAlt3Intron:
                 case rtAlt5Intron:
                     {
                     setByCda(hits, featStart, featEnd, cda, TRUE);
                     break;
                     }
                 default:
                     errAbort("Unknown region type %d\n", regionType);
                     break;
                 }
             }
         }
     addAnywhere(cdnaWhereType, hits, logicType, feat, "cdna", &newList);
     freeMem(hits);
     cdaFreeAliList(&cdaList);
     }
 slReverse(&newList);
 return newList;
 }
 
 struct idbFeature *restrictCdnaDiff(struct idbFeature *oldList, 
     int cdnaHitType, int cdnaStageType, int cdnaWhereType, int logicType, int regionType)
 /* Restrict features list to areas that have cDNA coverage in one stage but not another. */
 {
 struct idbFeature *newList = NULL, *feat, *next;
 
 if (logicType == logicNeg)
     errAbort("Sorry, the Embryo Only and Mixed Only cDNA options don't work under inverted logic.");
 if (cdnaWhereType != cwAnywhere)
     errAbort("Sorry, the Embryo Only and Mixed Only cDNA options only works 'Anywhere', "
              " not 'Mostly,' or 'Throughout' or 'Piecemeal'");
 for (feat = oldList; feat != NULL; feat = next)
     {
     struct cdaAli *cdaList;
     struct cdaAli *cda;
     boolean gotAny = FALSE;
     boolean gotMixed = FALSE, gotEmbryo = FALSE, gotEst = FALSE, gotFullLength = FALSE;
     
     next = feat->next;
     cdaList = wormCdaAlisInRange(feat->chrom, feat->start, feat->end);
 
     for (cda = cdaList; cda != NULL; cda = cda->next)
         {
         gotAny = TRUE;
         if (cda->isEmbryonic)
             gotEmbryo = TRUE;
         else
             gotMixed = TRUE;
         if (cdaIsEst(cda))
             gotEst = TRUE;
         else
             gotFullLength = TRUE;
         }
     /* See if stage is appropriate. */
     if (((cdnaStageType == cstEither && gotAny) || 
          (cdnaStageType == cstMixedOnly && gotMixed && !gotEmbryo) ||
          (cdnaStageType == cstEmbryoOnly && gotEmbryo && !gotMixed) )
      && ((cdnaHitType == cdtEither && gotAny) ||
          (cdnaHitType == cdtEst && gotEst) ||
          (cdnaHitType == cdtFullLength && gotFullLength)))
         {
         slAddHead(&newList, feat);
         }
     else
         {
         freeIdbFeature(feat);
         }  
     cdaFreeAliList(&cdaList);
     }
 slReverse(&newList);
 return newList;
 }
 
 struct idbFeature *restrictToCdna(struct idbFeature *oldList, 
     int cdnaHitType, int cdnaStageType, int cdnaWhereType, int logicType, int regionType)
 /* Restrict features list to areas that have cDNA coverage or complex/partial sort. */
 {
 if (cdnaHitType == cdtDontCare)
     return oldList;
 if (cdnaStageType == cstEmbryoOnly || cdnaStageType == cstMixedOnly)
     return restrictCdnaDiff(oldList, cdnaHitType, cdnaStageType, cdnaWhereType, logicType, regionType);
 else 
     return restrictOneCdnaType(oldList, cdnaHitType, cdnaStageType, cdnaWhereType, logicType, regionType);
 }
 
 struct liteFeature *liteXaList(struct xaAli *xaList, int featStart, int featEnd, int cbHomologyType)
 /* Return a lite features list corresponding the the parts of xaList of
  * the right homology. */
 {
 struct liteFeature *liteList = NULL, *lite;
 struct xaAli *xa;
 char *hSym;
 char *tSym;
 int symCount = 0;
 int startNtIx = 0;
 int curNtIx = 0;
 int i = 0;
 char nt;
 char ok, lastOk;
 char okTable[256];
 
 /* Here's a little state machine that puts something on the lite
  * list every time the hidden symbol goes from an ok one to a
  * not-ok one.  It's slightly complicated by having to keep track
  * of where in the target (C. elegans) sequence we are in spite
  * of insert characters. */
 zeroBytes(okTable, sizeof(okTable));
 okTable['1'] = okTable['2'] = okTable['3'] = okTable['H'] = TRUE;
 if (cbHomologyType == cbtAny)
     okTable['L'] = TRUE;
 for (xa = xaList; xa != NULL; xa = xa->next)
     {
     hSym = xa->hSym;
     tSym = xa->tSym;
     curNtIx = xa->tStart;
     symCount = xa->symCount;
     lastOk = FALSE;
     for (i=0; i<=symCount; ++i)  /* We depend on the zero tag! */
         {
         nt = tSym[i];
         if (nt != '-')
             {
             ok = okTable[(int)hSym[i]];
             if (ok && !lastOk)
                 {
                 startNtIx = curNtIx;
                 }
             else if (!ok && lastOk)
                 {
                 int start = max(startNtIx, featStart);
                 int end = min(curNtIx, featEnd);
                 if (start < end)
                     {
                     lite = newLiteFeature(startNtIx, curNtIx, '+');
                     slAddHead(&liteList, lite);
                     }
                 }
             ++curNtIx;
             lastOk = ok;
             }
         }
     }
 slUniqify(&liteList, cmpLiteFeatures, freeLiteFeature);
 return liteList;
 }
 
 struct idbFeature *filterCbHomology(struct idbFeature *oldList, int cbHomologyType, int logicType, int cbWhereType)
 /* Filter out all but selected level of homology. */
 {
 struct idbFeature *newList = NULL, *feat, *next;
 struct xaAli *xaList;
 struct liteFeature *xfList, *xf;
 char *xDir;
 char fileName[512];
 FILE *data;
 FILE **indexes;
 FILE *index;
 char *cb = "cbriggsae";
 int chromIx;
 int i;
 
 /* If user doesn't care just return old list. */
 if (cbHomologyType == cbtDontCare)
     return oldList;
 
 indexes = needMem(chromCount * sizeof(indexes));
 xDir = wormXenoDir();
 sprintf(fileName, "%s%s/all%s", xDir, cb, xaAlignSuffix());
 data = xaOpenVerify(fileName);
 
 for (feat = oldList; feat != NULL; feat = next)
     {
     int featStart = feat->start;
     int featEnd = feat->end;
     int featSize = featEnd - featStart;
     char *hits = needLargeMem(featSize+1);
     memset(hits, 0, featSize+1);
 
     next = feat->next;
     chromIx = wormChromIx(feat->chrom);
     if ((index = indexes[chromIx]) == NULL)
         {
         sprintf(fileName, "%s%s/%s%s", xDir, cb, feat->chrom, xaChromIxSuffix());
         indexes[chromIx] = index = xaIxOpenVerify(fileName);
         }
     xaList = xaRdRange(index, data, featStart, featEnd, FALSE);
     xfList = liteXaList(xaList, featStart, featEnd, cbHomologyType);    
     for (xf = xfList; xf != NULL; xf = xf->next)
         {
         int start = max(xf->start, featStart);
         int end = min(xf->end, featEnd);
         int size = end-start;
         if (size > 0)
             memset(hits + start-featStart, 0xff, size);
         }
     addAnywhere(cbWhereType, hits, logicType, feat, "hom", &newList);
     freeLiteFeatureList(&xfList);
     xaAliFreeList(&xaList);
     freeMem(hits);
     }
 slReverse(&newList);
 
 /* Clean up time. */
 fclose(data);
 for (i=0; i<chromCount; ++i)
     carefulClose(&indexes[i]);
 freeMem(indexes);
 
 return newList;
 }
 
 struct idbFeature *filterGenePredictions(struct idbFeature *oldList, int gpType, int logicType, int gpWhere)
 /* Filter based on gene predictions if any. */
 {
 char *gdfDir;
 struct wormGdfCache *gdfCache;
 struct idbFeature *newList = NULL, *feat, *nextFeat;
 
 if (gpType == gpDontCare)
     return oldList;
 findGdf(&gdfDir, &gdfCache);
 for (feat = oldList; feat != NULL; feat = nextFeat)
     {
     struct wormFeature *geneList, *gene;
     int featStart = feat->start;
     int featEnd = feat->end;
     int featSize = featEnd - featStart;
     char *hits = needLargeMem(featSize+1); /* Extra zero at end will simplify things */
 
     memset(hits, 0, featSize+1);    
     nextFeat = feat->next;
     geneList = wormSomeGenesInRange(feat->chrom, feat->start, feat->end, gdfDir);
     for (gene = geneList; gene != NULL; gene = gene->next)
         {
         int start, end, size;
         switch (gpType)
             {
             case gpCds:
                 {
                 start = max(gene->start, featStart);
                 end = min(gene->end, featEnd);
                 size = end-start;
                 if (size > 0)
                     memset(hits+start-featStart, 0xff, size);
                 break;
                 }
             case gpCoding:
                 {
                 struct gdfGene *gdf = wormGetSomeGdfGene(gene->name, gdfCache);
                 struct gdfDataPoint *dp = gdf->dataPoints;
                 int dpCount = gdf->dataCount;
                 int i;
 
                 for (i=0; i<dpCount; i += 2)
                     {
                     start = max(dp[i].start, featStart);
                     end = min(dp[i+1].start, featEnd);
                     size = end-start;
                     if (size > 0)
                         memset(hits+start-featStart, 0xff, size);
                     }
                 gdfFreeGene(gdf);
                 break;
                 }
             }
         }
     addAnywhere(gpWhere, hits, logicType, feat, "gp", &newList);
     slFreeList(&geneList);
     freeMem(hits);
     }
 slReverse(&newList);
 return newList;
 }
 
 
 void dumpFeatList(char *name, struct idbFeature *featList)
 {
 struct idbFeature *feat;
 
 printf("Feature List %s\n", name);
 for (feat = featList; feat != NULL; feat = feat->next)
     {
     printf("  %s %s:%d-%d %c\n", feat->name, feat->chrom, feat->start, feat->end, feat->strand);
     }
 }
 
 void crashAbort()
 {
 char *pt = NULL;
 *pt = 0;
 }
 
 void doMiddle()
 /* Write out the main part of the .html */
 {
 int sourceType, offsetType, regionType;
 int cdnaHitType, cdnaWhereType,  cdnaStageType;
 int cbHomologyType, cbWhereType;
 int logicType;
 int gpType, gpWhere;
 struct idbFeature *featList;
 char *rawNames;
 int startOffset, endOffset;
 long startTime, endTime;
 boolean reportTime = FALSE;
 
 //pushAbortHandler(crashAbort);
 printf("<TT><PRE>");
 wormChromNames(&chromNames, &chromCount);
 
 /* Get the source sequence names and original ranges. */
 sourceType = cgiOneChoice("source", sourceChoices, ArraySize(sourceChoices));
 rawNames = cgiString("seqNames");
 startTime = clock1000();
 featList = getSequenceNames(sourceType, rawNames);
 endTime = clock1000();
 if (reportTime) printf("%4.3f seconds to getSequenceNames\n", 0.001*(endTime-startTime) );
 
 //dumpFeatList("After source", featList);
 
 /* Break up and filter features based on region. */
 startTime = clock1000();
 regionType = cgiOneChoice("region", regionChoices, ArraySize(regionChoices));
 featList = filterRegions(featList, regionType, sourceType);
 endTime = clock1000();
 if (reportTime) printf("%4.3f seconds to filterRegions\n", 0.001*(endTime-startTime) );
 
 /* Offset features. */
 startTime = clock1000();
 offsetType = cgiOneChoice("relativeTo", relativeToChoices, ArraySize(relativeToChoices));
 startOffset = cgiOptionalInt("startOffset", 0);
 endOffset = cgiOptionalInt("endOffset", 0);
 featList = offsetFeatures(featList, offsetType, startOffset, endOffset);
 endTime = clock1000();
 if (reportTime) printf("%4.3f seconds to offsetFeatures\n", 0.001*(endTime-startTime) );
 
 /* Restrict to C. briggsae homologous regions. */
 startTime = clock1000();
 cbWhereType = cgiOneChoice("cbWhere", cWhereChoices, ArraySize(cWhereChoices));
 cbHomologyType = cgiOneChoice("cbHomology", cbHomologyChoices, ArraySize(cbHomologyChoices));
 logicType = cgiOneChoice("notCb", logicChoices, ArraySize(logicChoices));
 featList = filterCbHomology(featList, cbHomologyType, logicType, cbWhereType);
 endTime = clock1000();
 if (reportTime) printf("%4.3f seconds to filterCbHomology\n", 0.001*(endTime-startTime) );
 
 /* Restrict to cDNA hits. */
 startTime = clock1000();
 cdnaWhereType = cgiOneChoice("cdnaWhere", cWhereChoices, ArraySize(cWhereChoices));
 cdnaHitType = cgiOneChoice("cdnaHits", cdnaHitsChoices, ArraySize(cdnaHitsChoices));
 cdnaStageType = cgiOneChoice("cdnaStage", cdnaStageChoices, ArraySize(cdnaStageChoices));
 logicType = cgiOneChoice("notCdna", logicChoices, ArraySize(logicChoices));
 featList = restrictToCdna(featList, cdnaHitType, cdnaStageType, cdnaWhereType, logicType, regionType);
 endTime = clock1000();
 if (reportTime) printf("%4.3f seconds to restrictToCdna\n", 0.001*(endTime-startTime) );
 
 /* Restrict to gene prediction hits. */
 startTime = clock1000();
 gpWhere = cgiOneChoice("gpWhere", cWhereChoices, ArraySize(cWhereChoices));
 gpType = cgiOneChoice("gpType", gpChoices, ArraySize(gpChoices));
 logicType = cgiOneChoice("notGp", logicChoices, ArraySize(logicChoices));
 featList = filterGenePredictions(featList, gpType, logicType, gpWhere);
 endTime = clock1000();
 if (reportTime) printf("%4.3f seconds to filterGenePredictions\n", 0.001*(endTime-startTime) );
 
 startTime = clock1000();
 outputList(featList, cgiOneChoice("output", outputChoices, ArraySize(outputChoices)),
     cgiOptionalInt("lineSize",0), cgiOptionalInt("wordSize",0), 
     cgiBoolean("lineNumbers"), cgiOneChoice("capType", capChoices, ArraySize(capChoices)));
 endTime = clock1000();
 if (reportTime) printf("%4.3f seconds to outputList\n", 0.001*(endTime-startTime) );
 }
 
 int main(int argv, char *argc[])
 {
 debugOut = mustOpen("/usr/local/apache/trash/uglyOut.fa", "w");
 dnaUtilOpen();
 localMem = lmInit(0);
 htmShell("Relative Sequence Data", doMiddle, NULL);
 return 0;
 }