1cc8c6d3d7f2357898ad366c94b1f33a403a2aee
angie
  Thu Dec 8 14:14:33 2016 -0800
Libifying isAllDigits from a handful of places into common.c.

diff --git src/lib/wormdna.c src/lib/wormdna.c
index 99518f0..4f88cde 100644
--- src/lib/wormdna.c
+++ src/lib/wormdna.c
@@ -1,1211 +1,1200 @@
 /* wormDna - Stuff for finding worm DNA and annotation features.
  * This is pretty much the heart of the cobbled-together 'database'
  * behind the intronerator. 
  *
  * This file is copyright 2002 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include "dnautil.h"
 #include "dnaseq.h"
 #include "fa.h"
 #include "gdf.h"
 #include "nt4.h"
 #include "snof.h"
 #include "wormdna.h"
 #include "cda.h"
 #include "sig.h"
 #include "dystring.h"
 
 
 static char *jkwebDir = NULL;
 
 static char *cdnaDir = NULL;
 static char *featDir = NULL;
 static char *nt4Dir = NULL;
 static char *sangerDir = NULL;
 static char *genieDir = NULL;
 static char *xenoDir = NULL;
 
 static void getDirs()
 /* Look up the directories where our data is stored. */
 {
 if (jkwebDir == NULL)
     {
     char buf[512];
     
     /* Look up directory where directory pointer files are stored
      * in environment string if it's there. */
     if ((jkwebDir = getenv("JKWEB")) == NULL)
         jkwebDir = "";
 
     sprintf(buf, "%scdna.dir", jkwebDir);
     firstWordInFile(buf, buf, sizeof(buf));
     cdnaDir = cloneString(buf);
 
     sprintf(buf, "%sfeat.dir", jkwebDir);
     firstWordInFile(buf, buf, sizeof(buf));
     featDir = cloneString(buf);
 
     sprintf(buf, "%snt4.dir", jkwebDir);
     firstWordInFile(buf, buf, sizeof(buf));
     nt4Dir = cloneString(buf);
 
     sprintf(buf, "%ssanger/", featDir); 
     sangerDir = cloneString(buf);
 
     sprintf(buf, "%sgenie/", featDir);
     genieDir = cloneString(buf);
 
     sprintf(buf, "%sxeno.dir", jkwebDir);
     firstWordInFile(buf, buf, sizeof(buf));
     xenoDir = cloneString(buf);
     }
 }
 
 char *wormFeaturesDir()
 /* Return the features directory. (Includes trailing slash.) */
 {
 getDirs();
 return featDir;
 }
 
 char *wormChromDir()
 /* Return the directory with the chromosomes. */
 {
 getDirs();
 return nt4Dir;
 }
 
 char *wormCdnaDir()
 /* Return directory with cDNA data. */
 {
 getDirs();
 return cdnaDir;
 }
 
 char *wormSangerDir()
 /* Return directory with Sanger specific gene predictions. */
 {
 getDirs();
 return sangerDir;
 }
 
 char *wormGenieDir()
 /* Return directory with Genie specific gene predictions. */
 {
 getDirs();
 return genieDir;
 }
 
 char *wormXenoDir()
 /* Return directory with cross-species alignments. */
 {
 getDirs();
 return xenoDir;
 }
 
 static char *chromIds[] = {"i", "ii", "iii", "iv", "v", "x", "m", };
 
 void wormChromNames(char ***retNames, int *retNameCount)
 /* Get list of worm chromosome names. */
 {
 *retNames = chromIds;
 *retNameCount = ArraySize(chromIds);
 }
 
 int wormChromIx(char *name)
 /* Return index of worm chromosome. */
 {
 return stringIx(name, chromIds);
 }
 
 char *wormChromForIx(int ix)
 /* Given ix, return worm chromosome official name. */
 {
 assert(ix >= 0 && ix <= ArraySize(chromIds));
 return chromIds[ix];
 }
 
 char *wormOfficialChromName(char *name)
 /* This returns a pointer to our official string for the chromosome name.
  * (This allows some routines to do direct pointer comparisons rather
  * than string comparisons.) */
 {
 int ix = wormChromIx(name);
 if (ix < 0) return NULL;
 return chromIds[ix];
 }
 
 
 static struct snof *cdnaSnof = NULL;
 static FILE *cdnaFa = NULL;
 
 static void wormCdnaCache()
 /* Set up to read cDNAs */
 {
 getDirs();
 if (cdnaSnof == NULL)
     {
     char buf[512];
 
     sprintf(buf, "%s%s", cdnaDir, "allcdna");
     cdnaSnof = snofMustOpen(buf);
     sprintf(buf, "%s%s", cdnaDir, "allcdna.fa");
     cdnaFa = mustOpen(buf, "rb");
     }
 }
 
 void wormCdnaUncache()
 /* Tear down structure for reading cDNAs. */
 {
 snofClose(&cdnaSnof);
 carefulClose(&cdnaFa);
 freez(&cdnaDir);
 }
 
 void wormFreeCdnaInfo(struct wormCdnaInfo *ci)
 /* Free the mother string in the cdnaInfo.  (The info structure itself normally isn't
  * dynamically allocated. */
 {
 freeMem(ci->motherString);
 zeroBytes(ci, sizeof(*ci));
 }
 
 static char *realInfoString(char *s)
 /* Returns NULL if s is just "?", the NULL placeholder. */
 {
 if (s[0] == '?' && s[1] == 0) return NULL;
 return s;
 }
 
 static void parseRestOfCdnaInfo(char *textInfo, struct wormCdnaInfo *retInfo)
 /* Parse text info string into a binary structure retInfo. */
 {
 int wordCount;
 char *words[32];
 char *s;
 
 wordCount = chopString(textInfo, "|", words, ArraySize(words));
 if (wordCount < 8)
     errAbort("Expecting at least 8 fields in cDNA database, got %d", wordCount);
 if ((s = realInfoString(words[0])) != NULL)
     retInfo->orientation = s[0];
 retInfo->gene = realInfoString(words[1]);
 retInfo->product = realInfoString(words[2]);
 if ((s = realInfoString(words[3])) != NULL)
     {
     char *parts[2];
     int partCount;
     partCount = chopString(s, ".", parts, ArraySize(parts));
     if (partCount == 2)
         {
         retInfo->knowStart = retInfo->knowEnd = TRUE;
         if (parts[0][0] == '<')
             {
             retInfo->knowStart = FALSE;
             parts[0] += 1;
             }
         if (parts[1][0] == '>')
             {
             retInfo->knowEnd = FALSE;
             parts[1] += 1;
             }
         retInfo->cdsStart = atoi(parts[0]);
         retInfo->cdsEnd = atoi(parts[1]);
         }
     }
 if ((s = realInfoString(words[4])) != NULL)
     {
     if (sameString("embryo", s))
         retInfo->isEmbryonic = TRUE;
     else if (sameString("adult", s))
         retInfo->isAdult = TRUE;
     }
 if ((s = realInfoString(words[5])) != NULL)
     {
     if (sameString("herm", s))
         retInfo->isHermaphrodite = TRUE;
     else if (sameString("male", s))
         retInfo->isMale = TRUE;
     }
 
 if ((s = realInfoString(words[6])) != NULL)
     {
     /* Reserved. Unused currently */
     }
 retInfo->description = realInfoString(words[7]);
 }
 
 void wormFaCommentIntoInfo(char *faComment, struct wormCdnaInfo *retInfo)
 /* Process line from .fa file containing information about cDNA into binary
  * structure. */
 {
 if (retInfo)
     {
     char *s;
     zeroBytes(retInfo, sizeof(*retInfo));
     /* Separate out first word and use it as name. */
     s = strchr(faComment, ' ');
     if (s == NULL)
         errAbort("Expecting lots of info, just got %s", faComment);
     *s++ = 0;
     retInfo->name = faComment+1;
     retInfo->motherString = faComment;
 
     parseRestOfCdnaInfo(s, retInfo);
     }
 }
 
 boolean wormCdnaInfo(char *name, struct wormCdnaInfo *retInfo)
 /* Get info about cDNA sequence. */
 {
 char commentBuf[512];
 char *comment;
 long offset;
 
 wormCdnaCache();
 if (!snofFindOffset(cdnaSnof, name, &offset))
     return FALSE;
 fseek(cdnaFa, offset, SEEK_SET);
 mustGetLine(cdnaFa, commentBuf, sizeof(commentBuf));
 if (commentBuf[0] != '>')
     errAbort("Expecting line starting with > in cDNA fa file.\nGot %s", commentBuf);
 comment = cloneString(commentBuf);
 wormFaCommentIntoInfo(comment, retInfo);
 return TRUE;
 }
 
 boolean wormCdnaSeq(char *name, struct dnaSeq **retDna, struct wormCdnaInfo *retInfo)
 /* Get a single worm cDNA sequence. Optionally (if retInfo is non-null) get additional
  * info about the sequence. */
 {
 long offset;
 char *faComment;
 char **pFaComment = (retInfo == NULL ? NULL : &faComment);
 
 wormCdnaCache();
 if (!snofFindOffset(cdnaSnof, name, &offset))
     return FALSE;
 fseek(cdnaFa, offset, SEEK_SET);
 if (!faReadNext(cdnaFa, name, TRUE, pFaComment, retDna))
     return FALSE;
 wormFaCommentIntoInfo(faComment, retInfo);
 return TRUE;
 }
 
 struct wormFeature *newWormFeature(char *name, char *chrom, int start, int end, char typeByte)
 /* Allocate a new feature. */
 {
 int size = sizeof(struct wormFeature) + strlen(name);
 struct wormFeature *feat = needMem(size);
 feat->chrom = chrom;
 feat->start = start;
 feat->end = end;
 feat->typeByte = typeByte;
 strcpy(feat->name, name);
 return feat;
 }
 
 
 static struct wormFeature *scanChromOffsetFile(char *dir, char *suffix, 
     bits32 signature, int nameOffset, char *chromId, int start, int end,
     int addEnd)
 /* Scan a chrom.pgo or chrom.cdo file for names of things that are within
  * range. */
 {
 FILE *f;
 char fileName[512];
 bits32 sig, nameSize, entryCount;
 int entrySize;
 int *entry;
 char *name;
 bits32 i;
 struct wormFeature *list = NULL, *el;
 char *typePt;
 char typeByte;
 
 sprintf(fileName, "%s%s%s", dir, chromId, suffix);
 f = mustOpen(fileName, "rb");
 mustReadOne(f, sig);
 if (sig != signature)
     errAbort("Bad signature on %s", fileName);
 mustReadOne(f, entryCount);
 mustReadOne(f, nameSize);
 entrySize = nameSize + nameOffset;
 entry = needMem(entrySize + 1);
 name = (char *)entry;
 name += nameOffset;
 typePt = name-1;
 for (i=0; i<entryCount; ++i)
     {
     mustRead(f, entry, entrySize);
     if (entry[0] > end)
         break;
     if (entry[1] < start)
         continue;
     typeByte = *typePt;
     el = newWormFeature(name, chromId, entry[0], entry[1]+addEnd, typeByte);
     slAddHead(&list, el);
     }
 slReverse(&list);
 fclose(f);
 freeMem(entry);
 return list;
 }
 
 struct wormFeature *wormCdnasInRange(char *chromId, int start, int end)
 /* Get all cDNAs that overlap the range. freeDnaSeqList the returned
  * list when you are through with it. */
 {
 /* This routine looks through the .CDO files made by cdnaOff
  */
 getDirs();
 return scanChromOffsetFile(cdnaDir, ".cdo", cdoSig, 2*sizeof(int)+1, 
     chromId, start, end, 0);
 }
 
 struct wormFeature *wormSomeGenesInRange(char *chromId, int start, int end, char *gdfDir)
 /* Get info on genes that overlap range in a particular set of gene predictions. */
 {
 return scanChromOffsetFile(gdfDir, ".pgo", pgoSig, 2*sizeof(int)+1,
     chromId, start, end, 0);
 }
 
 struct wormFeature *wormGenesInRange(char *chromId, int start, int end)
 /* Get names of all genes that overlap the range. */
 {
 /* This routine looks through the .PGO files made by makePgo
  */
 getDirs();
 return wormSomeGenesInRange(chromId, start, end, sangerDir);
 }
 
 struct wormFeature *wormCosmidsInRange(char *chromId, int start, int end)
 /* Get names of all genes that overlap the range. */
 {
 /* This routine looks through the .COO files made by makePgo
  */
 getDirs();
 return scanChromOffsetFile(featDir, ".coo", pgoSig, 2*sizeof(int)+1,
     chromId, start, end, 1);
 }
 
 FILE *wormOpenGoodAli()
 /* Opens good alignment file and reads signature. 
  * (You can then cdaLoadOne() it.) */
 {
 char fileName[512];
 getDirs();
 sprintf(fileName, "%sgood.ali", cdnaDir);
 return cdaOpenVerify(fileName);
 }
 
 struct cdaAli *wormCdaAlisInRange(char *chromId, int start, int end)
 /* Return list of cdna alignments that overlap range. */
 {
 struct cdaAli *list = NULL, *el;
 char fileName[512];
 FILE *ixFile, *aliFile;
 bits32 sig;
 int s, e;
 long fpos;
 
 aliFile = wormOpenGoodAli();
 
 sprintf(fileName, "%s%s.alx", cdnaDir, chromId);
 ixFile = mustOpen(fileName, "rb");
 mustReadOne(ixFile, sig);
 if (sig != alxSig)
     errAbort("Bad signature on %s", fileName);
 
 for (;;)
     {
     if (!readOne(ixFile, s))
         break;
     mustReadOne(ixFile, e);
     mustReadOne(ixFile, fpos);
     if (e <= start)
         continue;
     if (s >= end)
         break;
     AllocVar(el);
     fseek(aliFile, fpos, SEEK_SET);
     el = cdaLoadOne(aliFile);
     if (el == NULL)
         errAbort("Truncated cdnaAli file");
     slAddHead(&list, el);
     }
 slReverse(&list);
 fclose(aliFile);
 fclose(ixFile);
 return list;
 }
 
 boolean nextWormCdnaAndInfo(struct wormCdnaIterator *it, struct dnaSeq **retSeq, 
     struct wormCdnaInfo *retInfo)
 /* Return next sequence and associated info from database. */
 {
 char *faComment;
 
 if (!faReadNext(it->faFile, "unknown", TRUE, &faComment, retSeq))
     return FALSE;
 wormFaCommentIntoInfo(faComment, retInfo);
 return TRUE;
 }
 
 struct dnaSeq *nextWormCdna(struct wormCdnaIterator *it)
 /* Return next sequence in database */
 {
 return faReadOneDnaSeq(it->faFile, "unknown", TRUE);
 }
 
 boolean wormSearchAllCdna(struct wormCdnaIterator **retSi)
 /* Set up to search entire database or worm cDNA */
 {
 char buf[512];
 struct wormCdnaIterator *it;
 
 it = needMem(sizeof(*it));
 getDirs();
 sprintf(buf, "%s%s", cdnaDir, "allcdna.fa");
 it->faFile = mustOpen(buf, "rb");
 *retSi = it;
 return TRUE;
 }
 
 void freeWormCdnaIterator(struct wormCdnaIterator **pIt)
 /* Free up iterator returned by wormSearchAllCdna() */
 {
 struct wormCdnaIterator *it = *pIt;
 if (it != NULL)
     {
     carefulClose(&it->faFile);
     freez(pIt);
     }
 }
 
 static boolean isAllAlpha(char *s)
 /* Returns TRUE if every character in string is a letter. */
 {
 char c;
 while ((c = *s++) != 0)
     {
     if (!isalpha(c)) return FALSE;
     }
 return TRUE;
 }
 
-static boolean isAllDigit(char *s)
-/* Returns TRUE if every character in string is a digit. */
-{
-char c;
-while ((c = *s++) != 0)
-    {
-    if (!isdigit(c)) return FALSE;
-    }
-return TRUE;
-}
-
 boolean wormIsOrfName(char *in)
 /* Check to see if the input is formatted correctly to be
  * an ORF. */
 {
 return strchr(in, '.') != NULL;
 }
 
 boolean wormIsGeneName(char *name)
 /* See if it looks like a worm gene name - that is
  *   abc-12
  * letters followed by a dash followed by a number. */
 {
 char buf[128];
 int partCount;
 strncpy(buf, name, sizeof(buf));
 partCount = chopString(buf, "-", NULL, 0);
 if (partCount == 2)
     {
     char *parts[2];
     chopString(buf, "-", parts, 2);
-    return isAllAlpha(parts[0]) && isAllDigit(parts[1]);
+    return isAllAlpha(parts[0]) && isAllDigits(parts[1]);
     }
 else
     {
     return FALSE;
     }
 }
 
 struct slName *wormGeneToOrfNames(char *name)
 /* Returns list of cosmid.N type ORF names that are known by abc-12 type name. */
 {
 struct slName *synList = NULL;
 char synFileName[512];
 FILE *synFile;
 char lineBuf[128];
 int nameLen = strlen(name);
 
 /* genes are supposed to be lower case. */
 tolowers(name);
 
 /* Open synonym file and loop through each line of it */
 sprintf(synFileName, "%ssyn", wormFeaturesDir());
 if ((synFile = fopen(synFileName, "r")) == NULL)
     errAbort("Can't find synonym file '%s'. (errno: %d)\n", synFileName, errno);
 while (fgets(lineBuf, ArraySize(lineBuf), synFile))
     {
     /* If first part of line matches chop up line. */
     if (strncmp(name, lineBuf, nameLen) == 0)
 	{
 	char *syns[32];
 	int count;
 	count = chopString(lineBuf, whiteSpaceChopper, syns, ArraySize(syns));
 
 	/* Looks like we got a synonym.  Add all the aliases. */
 	if (strcmp(name, syns[0]) == 0)
 	    {
 	    int i;
 	    for (i=1; i<count; ++i)
                 slAddTail(&synList, newSlName(syns[i]));
 	    break;
 	    }
 	}
     }
 fclose(synFile);
 return synList;
 }
 
 char *wormGeneFirstOrfName(char *geneName)
 /* Return first ORF synonym to gene. */
 {
 struct slName *synList = wormGeneToOrfNames(geneName);
 char *name;
 if (synList == NULL)
     return NULL;
 name = cloneString(synList->name);
 slFreeList(&synList);
 return name;
 }
 
 boolean wormGeneForOrf(char *orfName, char *geneNameBuf, int bufSize)
 /* Look for gene type (unc-12 or something) synonym for cosmid.N name. */
 {
 FILE *f;
 char fileName[512];
 char lineBuf[512];
 int nameLen = strlen(orfName);
 boolean ok = FALSE;
 
 sprintf(fileName, "%sorf2gene", wormFeaturesDir());
 f = mustOpen(fileName, "r");
 while (fgets(lineBuf, sizeof(lineBuf), f))
     {
     if (strncmp(lineBuf, orfName, nameLen) == 0 && lineBuf[nameLen] == ' ')
         {
         char *words[2];
         chopLine(lineBuf, words);  // ignore return wordCount
         assert((int)strlen(words[1]) < bufSize);
         strncpy(geneNameBuf, words[1], bufSize);
         ok = TRUE;
         break;
         }
     }
 fclose(f);
 return ok;
 }
 
 boolean wormInfoForGene(char *orfName, struct wormCdnaInfo *retInfo)
 /* Return info if any on ORF, or NULL if none exists. freeMem() return value. */
 {
 FILE *f;
 char fileName[512];
 char lineBuf[512];
 int nameLen;
 char *info = NULL;
 char *synName = NULL;
 int lineCount = 0;
 
 /* Make this one work for orfs as well as gene names */
 if (wormIsGeneName(orfName))
     {
     synName = wormGeneFirstOrfName(orfName);
     if (synName != NULL)
         orfName = synName;
     }
 sprintf(fileName, "%sorfInfo", wormFeaturesDir());
 nameLen = strlen(orfName);
 f = mustOpen(fileName, "r");
 while (fgets(lineBuf, sizeof(lineBuf), f))
     {
     ++lineCount;
     if (strncmp(lineBuf, orfName, nameLen) == 0 && lineBuf[nameLen] == ' ')
         {
         info = cloneString(lineBuf);
         break;
         }
     }
 freeMem(synName);
 fclose(f);
 if (info == NULL)
     return FALSE;
 wormFaCommentIntoInfo(info, retInfo);
 return TRUE;;
 }
 
 boolean getWormGeneDna(char *name, DNA **retDna, boolean upcExons)
 /* Get the DNA associated with a gene.  Optionally upper case exons. */
 {
 struct gdfGene *g;
 struct slName *syn = NULL;
 long lstart, lend;
 int start, end;
 int dnaSize;
 DNA *dna;
 struct wormGdfCache *gdfCache;
 
 /* Translate biologist type name to cosmid.N name */
 if (wormIsGeneName(name))
     {
     syn = wormGeneToOrfNames(name);
     if (syn != NULL)
         name = syn->name;
     }
 if (strncmp(name, "g-", 2) == 0)
     gdfCache = &wormGenieGdfCache;
 else
     gdfCache = &wormSangerGdfCache;
 if ((g = wormGetSomeGdfGene(name, gdfCache)) == NULL)
     return FALSE;
 gdfGeneExtents(g, &lstart, &lend);
 start = lstart;
 end = lend;
 /* wormClipRangeToChrom(chromIds[g->chromIx], &start, &end); */
 dnaSize = end-start;
 *retDna = dna = wormChromPart(chromIds[g->chromIx], start, dnaSize);
 
 gdfOffsetGene(g, -start);
 if (g->strand == '-')
     {
     reverseComplement(dna, dnaSize);
     gdfRcGene(g, dnaSize);
     }
 if (upcExons)
     {
     int i;
     struct gdfDataPoint *pt = g->dataPoints;
     for (i=0; i<g->dataCount; i += 2)
         {
         toUpperN(dna + pt[i].start, pt[i+1].start - pt[i].start);
         }
     }
 gdfFreeGene(g);
 return TRUE;
 }
 
 boolean getWormGeneExonDna(char *name, DNA **retDna)
 /* Get the DNA associated with a gene, without introns.  */
 {
 struct gdfGene *g;
 struct slName *syn = NULL;
 long lstart, lend;
 int start, end;
 int dnaSize;
 DNA *dna;
 int i;
 struct gdfDataPoint *pt = NULL;
 struct wormGdfCache *gdfCache;
 struct dyString *dy = newDyString(1000);
 /* Translate biologist type name to cosmid.N name */
 if (wormIsGeneName(name))
     {
     syn = wormGeneToOrfNames(name);
     if (syn != NULL)
         name = syn->name;
     }
 if (strncmp(name, "g-", 2) == 0)
     gdfCache = &wormGenieGdfCache;
 else
     gdfCache = &wormSangerGdfCache;
 if ((g = wormGetSomeGdfGene(name, gdfCache)) == NULL)
     return FALSE;
 gdfGeneExtents(g, &lstart, &lend);
 start = lstart;
 end = lend;
 /*wormClipRangeToChrom(chromIds[g->chromIx], &start, &end);*/
 dnaSize = end-start;
 dna = wormChromPart(chromIds[g->chromIx], start, dnaSize);
 
 gdfOffsetGene(g, -start);
 if (g->strand == '-')
     {
     reverseComplement(dna, dnaSize);
     gdfRcGene(g, dnaSize);
     }
 pt = g->dataPoints;
 for (i=0; i<g->dataCount; i += 2)
     {
     dyStringAppendN(dy, (dna+pt[i].start), (pt[i+1].start - pt[i].start));
     }
 *retDna = cloneString(dy->string);
 dyStringFree(&dy);
 gdfFreeGene(g);
 return TRUE;
 }
 
 static void makeChromFileName(char *chromId, char *buf)
 {
 getDirs();
 sprintf(buf, "%s%s.nt4", nt4Dir, chromId);
 }
 
 void wormLoadNt4Genome(struct nt4Seq ***retNt4Seq, int *retNt4Count)
 /* Load up entire packed worm genome into memory. */
 {
 int count = ArraySize(chromIds);
 struct nt4Seq **nt4s = needMem(count*sizeof(*nt4s));
 int i;
 char fileName[512];
 
 for (i=0; i<count; ++i)
     {
     makeChromFileName(chromIds[i], fileName);
     nt4s[i] = loadNt4(fileName, chromIds[i]);
     }
 *retNt4Seq = nt4s;
 *retNt4Count = count;
 }
 
 void wormFreeNt4Genome(struct nt4Seq ***pNt4Seq)
 /* Free up packed worm genome. */
 {
 struct nt4Seq **seqs;
 int i;
 if ((seqs = *pNt4Seq) == NULL)
     return;
 for (i=0; i<ArraySize(chromIds); ++i)
     freeNt4(&seqs[i]);
 freez(pNt4Seq);
 }
 
 int wormChromSize(char *chrom)
 /* Return size of worm chromosome. */
 {
 static int sizes[ArraySize(chromIds)];
 int ix;
 int size;
 
 if ((ix = wormChromIx(chrom)) < 0)
     errAbort("%s isn't a chromosome", chrom);
 size = sizes[ix];
 
 /* If we don't know it already have to get it from file. */
 if (size == 0)
     {
     char fileName[512];
     makeChromFileName(chromIds[ix], fileName);
     size = sizes[ix] = nt4BaseCount(fileName);
     }
 return size;
 }
 
 
 DNA *wormChromPart(char *chromId, int start, int size)
 /* Return part of a worm chromosome. */
 {
 char fileName[512];
 makeChromFileName(chromId, fileName);
 return nt4LoadPart(fileName, start, size);
 }
 
 DNA *wormChromPartExonsUpper(char *chromId, int start, int size)
 /* Return part of a worm chromosome with exons in upper case. */
 {
 DNA *dna = wormChromPart(chromId, start, size);
 struct wormFeature *geneFeat = wormGenesInRange(chromId, start, start+size);
 struct wormFeature *feat;
 
 for (feat = geneFeat; feat != NULL; feat = feat->next)
     {
     char *name = feat->name;
     if (!wormIsNamelessCluster(name))
         {
         struct gdfGene *gene = wormGetGdfGene(name);
         gdfUpcExons(gene, feat->start, dna, size, start);
         gdfFreeGene(gene);
         }
     }
 slFreeList(&geneFeat);
 return dna;
 }
 
 void wormClipRangeToChrom(char *chrom, int *pStart, int *pEnd)
 /* Make sure that we stay inside chromosome. */
 {
 int chromEnd = wormChromSize(chrom);
 int temp;
 
 /* Swap ends if reversed. */
 if (*pStart > *pEnd)
     {
     temp = *pEnd;
     *pEnd = *pStart;
     *pStart = temp;
     }
 /* Generally speaking try to slide the range covered by
  * start-end inside the chromosome rather than just
  * truncating an end. */
 if (*pStart < 0)
     {
     *pEnd -= *pStart;
     *pStart = 0;
     }
 if (*pEnd > chromEnd)
     {
     *pStart -= *pEnd - chromEnd;
     *pEnd = chromEnd;
     }
 /* This handles case where the range is larger than the chromosome. */
 if (*pStart < 0)
     *pStart = 0;
 }
 
 boolean wormParseChromRange(char *in, char **retChromId, int *retStart, int *retEnd)
 /* Chop up a string representation of a range within a chromosome and put the
  * pieces into the return variables. Return FALSE if it isn't formatted right. */
 {
 char *words[5];
 int wordCount;
 char *chromId;
 char buf[128];
 
 strncpy(buf, in, sizeof(buf));
 wordCount = chopString(buf, "- \t\r\n:", words, ArraySize(words));
 if (wordCount != 3)
     return FALSE;
 chromId = wormOfficialChromName(words[0]);
 if (chromId == NULL)
     return FALSE;
 if (!isdigit(words[1][0]) || !isdigit(words[2][0]))
     return FALSE;
 *retChromId = chromId;
 *retStart = atoi(words[1]);
 *retEnd = atoi(words[2]);
 wormClipRangeToChrom(chromId, retStart, retEnd);
 return TRUE;
 }
 
 boolean wormIsChromRange(char *in)
 /* Check to see if the input is formatted correctly to be
  * a range of a chromosome. */
 {
 char *chromId;
 int start, end;
 boolean ok;
 ok =  wormParseChromRange(in, &chromId, &start, &end);
 return ok;
 }
 
 boolean wormFixupOrfName(char *name)
 /* Turn something into a proper cosmid.# style name. Return FALSE if it can't be done. */
 {
 char *dot = strrchr(name, '.');
 if (dot == NULL)
     return FALSE;
 toUpperN(name, dot-name);   /* First part always upper case. */
 if (!isdigit(dot[1]))          /* Nameless cluster - just leave following digits be. */
     return TRUE;
 else
     tolowers(dot+1);        /* Suffix is lower case. */
 return TRUE;
 }
 
 boolean wormIsAltSplicedName(char *name)
 /* Is name in right form to be an isoform? */
 {
 char *dot = strrchr(name, '.');
 if (dot == NULL)
     return FALSE;
 if (!isdigit(dot[1]))
     return FALSE;
 return isalpha(dot[strlen(dot)-1]);
 }
 
 static void makeIsoformBaseName(char *name)
 {
 if (wormIsAltSplicedName(name))
     name[strlen(name)-1] = 0;
 }
 
 static boolean findAltSpliceRange(char *name, struct snof *snof, FILE *f, 
     char **retChrom, int *retStart, int *retEnd, char *retStrand)
 /* Return range of chromosome covered by a gene and all of it's isoforms. */
 {
 char baseName[64];
 char bName[64];
 int snIx, maxIx;
 int start = 0x7fffffff;
 int end = -start;
 char lineBuf[128];
 char *words[3];
 int wordCount;
 int baseNameSize;
 
 strcpy(baseName, name);
 makeIsoformBaseName(baseName);
 baseNameSize = strlen(baseName);
 if (!snofFindFirstStartingWith(snof, baseName, baseNameSize, &snIx))
     return FALSE;
 maxIx = snofElementCount(snof);
 for (;snIx < maxIx; ++snIx)
     {
     long offset;
     char *geneName;
 
     snofNameOffsetAtIx(snof, snIx, &geneName, &offset);
     if (strncmp(geneName, baseName, baseNameSize) != 0)
         break;
     strcpy(bName, geneName);
     makeIsoformBaseName(bName);
     if (sameString(baseName, bName))
         {
         int s, e;
         fseek(f, offset, SEEK_SET);
         mustGetLine(f, lineBuf, sizeof(lineBuf));
         wordCount = chopLine(lineBuf, words);
         assert(wordCount == 3);
         wormParseChromRange(words[0], retChrom, &s, &e);
         *retStrand = words[1][0];
         if (start > s)
             start = s;
         if (end < e)
             end = e;
         }
     }
 *retStart = start;
 *retEnd = end;
 return TRUE;
 }
 
 
 boolean wormGeneRange(char *name, char **retChrom, char *retStrand, int *retStart, int *retEnd)
 /* Return chromosome position of a chrom range, gene, ORF, cosmid, or nameless cluster. */
 {
 static struct snof *c2gSnof = NULL, *c2cSnof = NULL;
 static FILE *c2gFile = NULL, *c2cFile = NULL;
 long offset;
 char fileName[512];
 struct slName *syn = NULL;
 boolean ok;
 
 if (wormIsChromRange(name))
     {
     *retStrand = '.';
     return wormParseChromRange(name, retChrom, retStart, retEnd);
     }
 
 getDirs();
 
 /* Translate biologist type name to cosmid.N name */
 if (wormIsGeneName(name))
     {
     syn = wormGeneToOrfNames(name);
     if (syn != NULL)
 	{
         name = syn->name;
 	}
     }
 if (wormFixupOrfName(name)) /* See if ORF, and if so make nice. */
     {
     if (c2gSnof == NULL)
         {
         sprintf(fileName, "%sc2g", featDir);
         c2gSnof = snofMustOpen(fileName);
         sprintf(fileName, "%sc2g", featDir);
         c2gFile = mustOpen(fileName, "rb");
         }
     ok = findAltSpliceRange(name, c2gSnof, c2gFile, retChrom, retStart, retEnd, retStrand);
     }
 else    /* Lets say it's a cosmid. */
     {
     char lineBuf[128];
     char *words[3];
     int wordCount;
     touppers(name);
     if (c2cSnof == NULL)
         {
         sprintf(fileName, "%sc2c", featDir);
         c2cSnof = snofMustOpen(fileName);
         sprintf(fileName, "%sc2c", featDir);
         c2cFile = mustOpen(fileName, "rb");
         }
     if (!snofFindOffset(c2cSnof, name, &offset) )
         return FALSE;
     fseek(c2cFile, offset, SEEK_SET);
     mustGetLine(c2cFile, lineBuf, sizeof(lineBuf));
     wordCount = chopLine(lineBuf, words);
     assert(wordCount == 3);
     assert(strcmp(words[2], name) == 0);
     assert(wormIsChromRange(words[0]));
     *retStrand = words[1][0];
     ok = wormParseChromRange(words[0], retChrom, retStart, retEnd);
     }
 slFreeList(&syn);
 return ok;
 }
 
 boolean wormIsNamelessCluster(char *name)
 /* Returns true if name is of correct format to be a nameless cluster. */
 {
 char *e = strrchr(name, '.');
 if (e == NULL)
     return FALSE;
 if (e[1] != 'N')
     return FALSE;
 if (!isdigit(e[2]))
     return FALSE;
 return TRUE;
 }
 
 DNA *wormGetNamelessClusterDna(char *name)
 /* Get DNA associated with nameless cluster */
 {
 char *chrom;
 int start, end;
 char strand;
 if (!wormGeneRange(name, &chrom, &strand, &start, &end))
     errAbort("Can't find %s in database", name);
 return wormChromPart(chrom, start, end-start);
 }
 
 struct wormGdfCache wormSangerGdfCache = {&sangerDir,NULL,NULL};
 struct wormGdfCache wormGenieGdfCache = {&genieDir,NULL,NULL};
 struct wormGdfCache *defaultGdfCache = &wormSangerGdfCache;
 
 
 static void wormCacheSomeGdf(struct wormGdfCache *cache)
 /* Cache one gene prediction set. */
 {
 if (cache->snof == NULL)
     {
     char fileName[512];
     char *dir;
     bits32 sig;
     getDirs();
     dir = *(cache->pDir);
     sprintf(fileName, "%sgenes", dir);
     cache->snof = snofMustOpen(fileName);
     sprintf(fileName, "%sgenes.gdf", dir);
     cache->file = mustOpen(fileName, "rb");
     mustReadOne(cache->file, sig);
     if (sig != glSig)
         errAbort("%s is not a good file", fileName);
     }
 }
 
 #if 0 /* unused */
 static void wormCacheGdf()
 /* Set up for fast access to GDF file entries. */
 {
 wormCacheSomeGdf(defaultGdfCache);
 }
 #endif
 
 void wormUncacheSomeGdf(struct wormGdfCache *cache)
 /* Uncache some gene prediction set. */
 {
 snofClose(&cache->snof);
 carefulClose(&cache->file);
 }
 
 void wormUncacheGdf()
 /* Free up resources associated with fast GDF access. */
 {
 wormUncacheSomeGdf(defaultGdfCache);
 }
 
 struct gdfGene *wormGetSomeGdfGene(char *name, struct wormGdfCache *cache)
 /* Get a single gdfGene of given name. */
 {
 long offset;
 
 wormCacheSomeGdf(cache);
 if (!snofFindOffset(cache->snof, name, &offset) )
     return NULL;
 fseek(cache->file, offset, SEEK_SET);
 return gdfReadOneGene(cache->file);
 }
 
 struct gdfGene *wormGetGdfGene(char *name)
 /* Get a single gdfGene of given name. */
 {
 return wormGetSomeGdfGene(name, defaultGdfCache);
 }
 
 struct gdfGene *wormGetSomeGdfGeneList(char *baseName, int baseNameSize, struct wormGdfCache *cache)
 /* Get all gdfGenes that start with a given name. */
 {
 int snIx;
 int maxIx;
 struct snof *snof;
 FILE *f;
 struct gdfGene *list = NULL, *el;
 
 wormCacheSomeGdf(cache);
 snof = cache->snof;
 f = cache->file;
 if (!snofFindFirstStartingWith(snof, baseName, baseNameSize, &snIx))
     return NULL;
 
 maxIx = snofElementCount(snof);
 for (;snIx < maxIx; ++snIx)
     {
     long offset;
     char *geneName;
 
     snofNameOffsetAtIx(snof, snIx, &geneName, &offset);
     if (strncmp(geneName, baseName, baseNameSize) != 0)
         break;
     fseek(f, offset, SEEK_SET);
     el = gdfReadOneGene(f);
     slAddTail(&list, el);
     }
 slReverse(&list);
 return list;
 }
 
 struct gdfGene *wormGetGdfGeneList(char *baseName, int baseNameSize)
 /* Get all gdfGenes that start with a given name. */
 {
 return wormGetSomeGdfGeneList(baseName, baseNameSize, defaultGdfCache);
 }
 
 struct gdfGene *wormGdfGenesInRange(char *chrom, int start, int end, 
     struct wormGdfCache *geneFinder)
 /* Get list of genes in range according to given gene finder. */
 {
 char *dir = NULL;
 struct gdfGene *gdfList = NULL, *gdf;
 struct wormFeature *nameList, *name;
 
 if (geneFinder == &wormSangerGdfCache)
     dir = wormSangerDir();
 else if (geneFinder == &wormGenieGdfCache)
     dir = wormGenieDir();
 else
     errAbort("Unknown geneFinder line %d of %s", __LINE__, __FILE__);
 
 nameList = wormSomeGenesInRange(chrom, start, end, dir);
 for (name = nameList; name != NULL; name = name->next)
     {
     char *n = name->name;
     if (!wormIsNamelessCluster(n))
         {
         gdf = wormGetSomeGdfGene(n, geneFinder);
         slAddHead(&gdfList, gdf);
         }
     }
 slFreeList(&nameList);
 slReverse(&gdfList);
 return gdfList;
 }