a44421a79fb36cc2036fe116b97ea3bc9590cd0c
braney
  Fri Dec 2 09:34:39 2011 -0800
removed rcsid (#295)
diff --git src/lib/nib.c src/lib/nib.c
index 92917c0..d885513 100644
--- src/lib/nib.c
+++ src/lib/nib.c
@@ -1,517 +1,516 @@
 /* Nib - nibble (4 bit) representation of nucleotide sequences. 
  *
  * This file is copyright 2002 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include "hash.h"
 #include "dnautil.h"
 #include "dnaseq.h"
 #include "nib.h"
 #include "sig.h"
 
-static char const rcsid[] = "$Id: nib.c,v 1.24 2007/03/13 20:43:05 kent Exp $";
 
 static char *findNibSubrange(char *fileName)
 /* find the colon starting a nib seq name/subrange in a nib file name, or NULL
  * if none */
 {
 char *baseName = strrchr(fileName, '/');
 baseName = (baseName == NULL) ? fileName : baseName+1;
 return strchr(baseName, ':');
 }
 
 static void parseSubrange(char *subrange, char *name, 
 	unsigned *start, unsigned *end)
 /* parse the subrange specification */
 {
 char *rangePart = strchr(subrange+1, ':');
 if (rangePart != NULL)
     {
     /* :seqId:start-end form */
     *rangePart = '\0';
     strcpy(name, subrange+1);
     *rangePart = ':';
     rangePart++;
     }
 else
     {
     /* :start-end form */
     rangePart = subrange+1;
     strcpy(name, ""); 
     }
 if ((sscanf(rangePart, "%u-%u", start, end) != 2) || (*start > *end))
     errAbort("can't parse nib file subsequence specification: %s",
              subrange);
 }
 
 void nibParseName(unsigned options, char *fileSpec, char *filePath,
                          char *name, unsigned *start, unsigned *end)
 /* Parse the nib name, getting the file name, seq name to use, and
  * optionally the start and end positions. Zero is return for start
  * and end if they are not specified. Return the path to the file
  * and the name to use for the sequence. */
 {
 char *subrange = findNibSubrange(fileSpec);
 if (subrange != NULL)
     {
     *subrange = '\0';
     parseSubrange(subrange, name, start, end);
     strcpy(filePath, fileSpec);
     *subrange = ':';
     if (strlen(name) == 0)
         {
         /* no name in spec, generate one */
         if (options & NIB_BASE_NAME)
             splitPath(filePath, NULL, name, NULL);
         else
             strcpy(name, filePath);
         sprintf(name+strlen(name), ":%u-%u", *start, *end);
         }
     }
 else
     {
     *start = 0;
     *end = 0;
     strcpy(filePath, fileSpec);
     if (options & NIB_BASE_NAME)
         splitPath(fileSpec, NULL, name, NULL);
     else
         strcpy(name, fileSpec);
     }
 }
 
 void nibOpenVerify(char *fileName, FILE **retFile, int *retSize)
 /* Open file and verify it's in good nibble format. */
 {
 bits32 size;
 bits32 sig;
 FILE *f = fopen(fileName, "rb");
 char buffer[512];
 char buffer2[512];
 char buffer3[512];
 
 if (f == NULL)
     {
     /* see if nib is down a few directories ala faSplit -outDirDepth */
     char *ptr = NULL;
     char *dir, *file;
     struct stat statBuf;
 
     /* divide fileName into file and directory components */
     safef(buffer, sizeof(buffer), "%s", fileName);
     if ((ptr = strrchr(buffer, '/')) != NULL)
 	{
 	*ptr++ = 0;
 	dir = buffer;
 	file = ptr;
 	}
     else
 	{
 	dir = "";
 	file = buffer;
 	}
     
     buffer3[0] = 0;
     /* start at the end of the fileName (minus .nib) */
     for(ptr = &file[strlen(file) - 5]; ; )
 	{
 	strcpy(buffer2, buffer3);
 	if (isdigit(*ptr))
 	    {
 	    /* if we have a digit in the fileName, see if there is a directory with this name */
 	    safef(buffer3, sizeof(buffer3), "%c/%s",*ptr,buffer2);
 	    ptr--;
 	    }
 	else
 	    /* we've run out of digits in the fileName, just add 0's */
 	    safef(buffer3, sizeof(buffer3), "0/%s",buffer2);
 
 	/* check to see if this directory exists */
 	safef(buffer2, sizeof(buffer2), "%s/%s", dir, buffer3);
 	if (stat(buffer2, &statBuf) < 0)
 	    break;
 
 	/* directory exists, see if our file is down there */
 	safef(buffer2, sizeof(buffer2), "%s/%s/%s", dir, buffer3, file);
 	if  ((f = fopen(buffer2, "rb")) != NULL)
 	    break;
 	}
     if (f == NULL)
 	errAbort("Can't open %s to read: %s", fileName,  strerror(errno));
     }
 dnaUtilOpen();
 mustReadOne(f, sig);
 mustReadOne(f, size);
 if (sig != nibSig)
     {
     sig = byteSwap32(sig);
     size = byteSwap32(size);
     if (sig != nibSig)
 	errAbort("%s is not a good .nib file.",  fileName);
     }
 *retSize = size;
 *retFile = f;
 }
 
 static struct dnaSeq *nibInput(int options, char *fileName, char *seqName,
                                FILE *f, int seqSize, int start, int size)
 /* Load part of an open .nib file. */
 {
 int end;
 DNA *d;
 int bVal;
 DNA *valToNtTbl = ((options &  NIB_MASK_MIXED) ? valToNtMasked : valToNt);
 struct dnaSeq *seq;
 Bits* mask = NULL;
 int bytePos, byteSize;
 int maskIdx = 0;
 
 assert(start >= 0);
 assert(size >= 0);
 
 end = start+size;
 if (end > seqSize)
     errAbort("nib read past end of file (%d %d) in file: %s", 
 	     end, seqSize, (fileName != NULL ? fileName : "(NULL)"));
 
 AllocVar(seq);
 seq->size = size;
 seq->name = cloneString(seqName);
 seq->dna = d = needLargeMem(size+1);
 if (options & NIB_MASK_MIXED)
     seq->mask = mask = bitAlloc(size);
 
 bytePos = (start>>1);
 fseek(f, bytePos + 2*sizeof(bits32), SEEK_SET);
 if (start & 1)
     {
     bVal = getc_unlocked(f);
     if (bVal < 0)
 	{
 	errAbort("Read error 1 in %s", fileName);
 	}
     *d++ = valToNtTbl[(bVal&0xf)];
     size -= 1;
     if (mask != NULL)
         {
         if ((bVal&0xf&MASKED_BASE_BIT) == 0)
             bitSetOne(mask, maskIdx);
         maskIdx++;
         }
     }
 byteSize = (size>>1);
 while (--byteSize >= 0)
     {
     bVal = getc_unlocked(f);
     if (bVal < 0)
 	errAbort("Read error 2 in %s", fileName);
     d[0] = valToNtTbl[(bVal>>4)];
     d[1] = valToNtTbl[(bVal&0xf)];
     d += 2;
     if (mask != NULL)
         {
         if (((bVal>>4)&0xf) == 0)
             bitSetOne(mask, maskIdx);
         if ((bVal&0xf) == 0)
             bitSetOne(mask, maskIdx+1);
         maskIdx += 2;
         }
     }
 if (size&1)
     {
     bVal = getc_unlocked(f);
     if (bVal < 0)
 	errAbort("Read error 3 in %s", fileName);
     *d++ = valToNtTbl[(bVal>>4)];
     if (mask != NULL)
         {
         if ((bVal>>4) == 0)
             bitSetOne(mask, maskIdx);
         maskIdx++;
         }
     }
 *d = 0;
 return seq;
 }
 
 static void nibOutput(int options, struct dnaSeq *seq, char *fileName)
 /* Write out file in format of four bits per nucleotide, with control over
  * handling of masked positions. */
 {
 UBYTE byte;
 DNA *dna = seq->dna;
 int dVal1, dVal2;
 bits32 size = seq->size;
 int byteCount = (size>>1);
 bits32 sig = nibSig;
 int *ntValTbl = ((options & NIB_MASK_MIXED) ? ntValMasked : ntVal5);
 Bits* mask = ((options & NIB_MASK_MAP) ? seq->mask : NULL);
 int maskIdx = 0;
 FILE *f = mustOpen(fileName, "w");
 
 assert(sizeof(bits32) == 4);
 
 writeOne(f, sig);
 writeOne(f, seq->size);
 
 printf("Writing %d bases in %d bytes\n", seq->size, ((seq->size+1)/2) + 8);
 while (--byteCount >= 0)
     {
     dVal1 = ntValTbl[(int)dna[0]];
     dVal2 = ntValTbl[(int)dna[1]];
     /* Set from mask, remember bit in character is opposite sense of bit
      * in mask. */
     if (mask != NULL)
         {
         if (!bitReadOne(mask, maskIdx))
             dVal1 |= MASKED_BASE_BIT;
         if (!bitReadOne(mask, maskIdx+1))
             dVal2 |= MASKED_BASE_BIT;
         maskIdx += 2;
         }
     byte = (dVal1<<4) | dVal2;
     if (putc(byte, f) < 0)
 	{
 	perror("");
 	errAbort("Couldn't write all of %s", fileName);
 	}
     dna += 2;
     }
 if (size & 1)
     {
     dVal1 = ntValTbl[(int)dna[0]];
     if ((mask != NULL) && !bitReadOne(mask, maskIdx))
         dVal1 |= MASKED_BASE_BIT;
     byte = (dVal1<<4);
     putc(byte, f);
     }
 carefulClose(&f);
 }
 
 struct dnaSeq *nibLdPartMasked(int options, char *fileName, FILE *f, int seqSize, int start, int size)
 /* Load part of an open .nib file, with control over handling of masked
  * positions. */
 {
 char nameBuf[512];
 safef(nameBuf, sizeof(nameBuf), "%s:%d-%d", fileName, start, start+size);
 return nibInput(options, fileName, nameBuf, f, seqSize, start, size);
 }
 
 struct dnaSeq *nibLdPart(char *fileName, FILE *f, int seqSize, int start, int size)
 /* Load part of an open .nib file. */
 {
 return nibLdPartMasked(0, fileName, f, seqSize, start, size);
 }
 
 struct dnaSeq *nibLoadPartMasked(int options, char *fileName, int start, int size)
 /* Load part of an .nib file, with control over handling of masked positions */
 {
 struct dnaSeq *seq;
 FILE *f;
 int seqSize;
 nibOpenVerify(fileName, &f, &seqSize);
 seq = nibLdPartMasked(options, fileName, f, seqSize, start, size);
 fclose(f);
 return seq;
 }
 
 struct dnaSeq *nibLoadPart(char *fileName, int start, int size)
 /* Load part of an .nib file. */
 {
 return nibLoadPartMasked(0, fileName, start, size);
 }
 
 struct dnaSeq *nibLoadAllMasked(int options, char *fileName)
 /* Load part of a .nib file, with control over handling of masked
  * positions. Subranges of nib files may specified in the file name
  * using the syntax:
  *    /path/file.nib:seqid:start-end
  * or\n"
  *    /path/file.nib:start-end
  * With the first form, seqid becomes the id of the subrange, with the second
  * form, a sequence id of file:start-end will be used.
  */
 {
 struct dnaSeq *seq;
 FILE *f;
 int seqSize;
 char filePath[PATH_LEN];
 char name[PATH_LEN];
 unsigned start, end;
 
 nibParseName(options, fileName, filePath, name, &start, &end);
 nibOpenVerify(filePath, &f, &seqSize);
 if (end == 0)
     end = seqSize;
 seq = nibInput(options, fileName, name, f, seqSize, start, end-start);
 fclose(f);
 return seq;
 }
 
 struct dnaSeq *nibLoadAll(char *fileName)
 /* Load part of an .nib file. */
 {
 return nibLoadAllMasked(0, fileName);
 }
 
 void nibWriteMasked(int options, struct dnaSeq *seq, char *fileName)
 /* Write out file in format of four bits per nucleotide, with control over
  * handling of masked positions. */
 {
     nibOutput(options, seq, fileName);
 }
 
 void nibWrite(struct dnaSeq *seq, char *fileName)
 /* Write out file in format of four bits per nucleotide. */
 {
     nibWriteMasked(0, seq, fileName);
 }
 
 struct nibStream
 /* Struct to help write a nib file one base at a time. 
  * The routines that do this aren't very fast, but they
  * aren't used much currently. */
     {
     struct nibStream *next;
     char *fileName;	/* Name of file - allocated here. */
     FILE *f;		/* File handle. */
     bits32 size;	/* Current size. */
     UBYTE byte;		/* Two nibble's worth of data. */
     };
 
 struct nibStream *nibStreamOpen(char *fileName)
 /* Create a new nib stream.  Open file and stuff. */
 {
 struct nibStream *ns;
 FILE *f;
 
 dnaUtilOpen();
 AllocVar(ns);
 ns->f = f = mustOpen(fileName, "wb");
 ns->fileName = cloneString(fileName);
 
 /* Write header - initially zero.  Will fix it up when we close. */
 writeOne(f, ns->size);
 writeOne(f, ns->size);
 
 return ns;
 }
 
 void nibStreamClose(struct nibStream **pNs)
 /* Close a nib stream.  Flush last nibble if need be.  Fix up header. */
 {
 struct nibStream *ns = *pNs;
 FILE *f;
 bits32 sig = nibSig;
 if (ns == NULL)
     return;
 f = ns->f;
 if (ns->size&1)
     writeOne(f, ns->byte);
 fseek(f,  0L, SEEK_SET);
 writeOne(f, sig);
 writeOne(f, ns->size);
 fclose(f);
 freeMem(ns->fileName);
 freez(pNs);
 }
 
 void nibStreamOne(struct nibStream *ns, DNA base)
 /* Write out one base to nibStream. */
 {
 UBYTE ub = ntVal5[(int)base];
 
 if ((++ns->size&1) == 0)
     {
     ub += ns->byte;
     writeOne(ns->f, ub);
     }
 else
     {
     ns->byte = (ub<<4);
     }
 }
 
 void nibStreamMany(struct nibStream *ns, DNA *dna, int size)
 /* Write many bases to nibStream. */
 {
 int i;
 for (i=0; i<size; ++i)
     nibStreamOne(ns, *dna++);
 }
 
 boolean nibIsFile(char *fileName)
 /* Return TRUE if file is a nib file. */
 {
 boolean isANib;
 char *subrange = findNibSubrange(fileName);
 if (subrange != NULL)
     *subrange = '\0';
 isANib = endsWith(fileName, ".nib") || endsWith(fileName, ".NIB");
 if (subrange != NULL)
     *subrange = ':';
 return isANib;
 }
 
 boolean nibIsRange(char *fileName)
 /* Return TRUE if file specifies a subrange of a nib file. */
 {
 boolean isANib;
 char *subrange = findNibSubrange(fileName);;
 if (subrange == NULL)
     return FALSE;
 *subrange = '\0';
 isANib = endsWith(fileName, ".nib") || endsWith(fileName, ".NIB");
 *subrange = ':';
 return isANib;
 }
 
 struct nibInfo *nibInfoNew(char *path)
 /* Make a new nibInfo with open nib file. */
 {
 struct nibInfo *nib;
 AllocVar(nib);
 nib->fileName = cloneString(path);
 nibOpenVerify(path, &nib->f, &nib->size);
 return nib;
 }
 
 void nibInfoFree(struct nibInfo **pNib)
 /* Free up nib info and close file if open. */
 {
 struct nibInfo *nib = *pNib;
 if (nib != NULL)
     {
     carefulClose(&nib->f);
     freeMem(nib->fileName);
     freez(pNib);
     }
 }
 
 struct nibInfo *nibInfoFromCache(struct hash *hash, char *nibDir, char *nibName)
 /* Get nibInfo on nibDir/nibName.nib from cache, filling cache if need be. */
 {
 struct nibInfo *nib;
 char path[PATH_LEN];
 safef(path, sizeof(path), "%s/%s.nib", nibDir, nibName);
 nib = hashFindVal(hash, path);
 if (nib == NULL)
     {
     nib = nibInfoNew(path);
     hashAdd(hash, path, nib);
     }
 return nib;
 }
 
 int nibGetSize(char* nibFile)
 /* Get the sequence length of a nib */
 {
 FILE* fh;
 int size;
 
 nibOpenVerify(nibFile, &fh, &size);
 carefulClose(&fh);
 return size;
 }