a44421a79fb36cc2036fe116b97ea3bc9590cd0c
braney
  Fri Dec 2 09:34:39 2011 -0800
removed rcsid (#295)
diff --git src/hg/lib/qaSeq.c src/hg/lib/qaSeq.c
index a661013..b30810b 100644
--- src/hg/lib/qaSeq.c
+++ src/hg/lib/qaSeq.c
@@ -1,551 +1,550 @@
 /* qaSeq.c - read and write quality scores. */
 
 
 #include "common.h"
 #include "portable.h"
 #include "linefile.h"
 #include "hash.h"
 #include "fa.h"
 #include "rle.h"
 #include "qaSeq.h"
 #include "sig.h"
 
-static char const rcsid[] = "$Id: qaSeq.c,v 1.5 2006/03/09 18:26:58 angie Exp $";
 
 void qaSeqFree(struct qaSeq **pQa)
 /* Free up qaSeq. */
 {
 struct qaSeq *qa;
 if ((qa = *pQa) == NULL)
     return;
 freeMem(qa->name);
 freeMem(qa->dna);
 freeMem(qa->qa);
 freez(pQa);
 }
 
 void qaSeqFreeList(struct qaSeq **pList)
 /* Free a list of QA seq's. */
 {
 struct qaSeq *el, *next;
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     qaSeqFree(&el);
     }
 *pList = NULL;
 }
 
 void qaFake(struct qaSeq *qa, int badTailSize, int poorTailSize)
 /* Fill in fake quality info. */
 {
 int badTailVal = 3;
 int poorTailVal = 15;
 int goodVal = 60;
 int size = qa->size;
 UBYTE *q = qa->qa;
 UBYTE *s, *e;
 
 if (size <= 2*badTailSize)
     memset(q, badTailVal, size);
 else
      {
      memset(q, badTailVal, badTailSize);
      memset(q+size-badTailSize, badTailVal, badTailSize);
      s = q + badTailSize;
      e = q + size - badTailSize;
      if (size <= 2*(badTailSize + poorTailSize))
          {
 	 memset(s, poorTailVal, e - s);
 	 }
      else
          {
 	 memset(s, poorTailVal, poorTailSize);
 	 memset(e-poorTailSize, poorTailVal, poorTailSize);
 	 s += poorTailSize;
 	 e -= poorTailSize;
 	 memset(s, goodVal, e - s);
 	 }
      }
 }    
 
 struct qaSeq *qaMakeConstant(char *name, int val, int size)
 /* Allocate and fill in constant quality info. */
 {
 struct qaSeq *qa;
 AllocVar(qa);
 qa->name = cloneString(name);
 qa->qa = needLargeMem(size+1);
 qa->size = size;
 memset(qa->qa, val, size);
 return qa;
 }
 
 void qaMakeFake(struct qaSeq *qa)
 /* Allocate and fill in fake quality info. */
 {
 qa->qa = needLargeMem(qa->size+1);
 qaFake(qa, 100, 100);
 }
 
 static boolean allSame(BYTE *b, int size, BYTE val)
 /* Return TRUE if next size bytes are all val. */
 {
 int i;
 for (i=0; i<size; ++i)
     if (b[i] != val)
         return FALSE;
 return TRUE;
 }
 
 boolean qaIsGsFake(struct qaSeq *qa)
 /* Return TRUE if quality info appears to be faked by 
  * Greg Schuler. (So we can refake it our way...) */
 {
 int size = qa->size;
 BYTE *q = (BYTE*)(qa->qa);
 
 /* Short sequences Greg fakes well enough. */
 if (size < 400)
     return FALSE;
 if (!allSame(q+200, size-400, 40))
     return FALSE;
 if (!allSame(q, 6, 10))
     return FALSE;
 if (!allSame(q+size-6, 6, 10))
      return FALSE;
 if (!allSame(q+6, 7, 11))
     return FALSE;
 return TRUE;  
 }
 
 static int snOnes[256], snTens[256];
 
 static int readShortNum(char *s, struct lineFile *lf) 
 /* Read in a number between 0 and 99 quickly. */
 {
 if (s[1] == 0)
     return snOnes[(int)s[0]];
 if (s[2] == 0)
     return snTens[(int)s[0]] + snOnes[(int)s[1]];
 else
     {
     warn("Expecting small number got %s line %d of %s", 
          s, lf->lineIx, lf->fileName);
     return 0;
     }	    
 }
 
 static void initShortNum()
 /* Initialize tables for fast conversion of ascii to integer. */
 {
 static boolean initted = FALSE;
 if (!initted)
     {
     int i;
     int tens = 0;
     for (i=0; i<10; ++i)
         {
 	snOnes['0'+i] = i;
         snTens['0'+i] = tens;
 	tens += 10;
 	}
     }
 }
 
 boolean qaFastReadNext(struct lineFile *lf, UBYTE **retQ, int *retSize, char **retName)
 /* Read in next QA entry as fast as we can. Return FALSE at EOF. 
  * The returned QA info and name will be overwritten by the next call
  * to this function. */
 {
 char *line, *words[256], *s = NULL;
 static char name[256];
 static int bufSize = 0;
 static UBYTE *buf;
 int bufIx = 0;
 int lineSize, wordCount;
 int i;
 
 initShortNum();
 /* Read to first non-space line.  Complain if it doesn't start with '>' */
 while (lineFileNext(lf, &line, &lineSize))
     {
     wordCount = chopLine(line, words);
     if (wordCount != 0)
         {
 	s = words[0];
 	if (s[0] != '>')
 	    errAbort("Expecting '>' line %d of %s", lf->lineIx, lf->fileName);
 	if (s[1] != 0)
 	    s += 1;
 	else
 	    {
 	    if (wordCount == 1)
 	        errAbort("Expecting '>name' line %d of %s", lf->lineIx, lf->fileName);
 	    else
 	        s = words[1];
 	    }
         break;
 	}
     }
 if (s == NULL)
     return FALSE;
 strncpy(name, s, sizeof(name));    	
 
 /* Read numbers until next '>' line or end of file. */
 /* Read to first non-space line.  Complain if it doesn't start with '>' */
 while (lineFileNext(lf, &line, &lineSize))
     {
     if (line[0] == '>')
         {
         lineFileReuse(lf);
 	break;
 	}
     wordCount = chopLine(line, words);
     for (i=0; i<wordCount; ++i)
         {
 	if (bufIx >= bufSize)
 	    {
 	    int newSize = bufSize + bufSize;
 	    if (newSize == 0)
 	         newSize = 16*1024;
             buf = needMoreMem(buf, bufSize, newSize);
 	    bufSize = newSize;
 	    }
 	buf[bufIx++] = readShortNum(words[i], lf);
 	}
     }
 *retQ = buf;
 *retSize = bufIx;
 *retName = name;
 return TRUE;
 }
 
 static void attatchQaInfo(struct hash *hash, char *name, UBYTE *q, int size)
 /* Attatch quality info of given size to qa in hash. */
 {
 struct qaSeq *qa;
 if ((qa = hashFindVal(hash, name)) == NULL)
     errAbort("%s is in qa but not fa", name);
 if (size != qa->size)
     errAbort("%s is %d bases in fa but is %d bases in qa",
 	    name, size, qa->size);
 if (qa->qa != NULL)
     warn("Duplicate quality info in %s, ignoring all but first", name);
 else
     qa->qa = q;
 }
 
 static void checkAllPresent(struct qaSeq *qaList)
 /* Make sure all qa's in list have quality data. */
 {
 struct qaSeq *qa;
 
 for (qa = qaList; qa != NULL; qa = qa->next)
      {
      if (qa->qa == NULL)
 	 {
 	 warn("No quality info for %s", qa->name);
 	 qaMakeFake(qa);
 	 }
      }
 }
 
 static void fillInQa(char *qaName, struct hash *hash, struct qaSeq *qaList)
 /* Hash contains qaSeq's with DNA sequence but no
  * quality info.  Fill in quality info from .qa file. */
 {    
 struct lineFile *lf = lineFileOpen(qaName, TRUE);
 struct qaSeq seq;
 
 while (qaFastReadNext(lf, &seq.qa, &seq.size, &seq.name))
     {
     seq.qa = cloneMem(seq.qa, seq.size+1);
     attatchQaInfo(hash, seq.name, seq.qa, seq.size);
     }
 lineFileClose(&lf);
 checkAllPresent(qaList);
 }
 
 static void fillInQac(char *qacName, struct hash *hash, struct qaSeq *qaList)
 /* Hash contains qaSeq's with DNA sequence but no
  * quality info.  Fill in quality info from .qac file. */
 { 
 boolean isSwapped;   
 FILE *f = qacOpenVerify(qacName, &isSwapped);
 struct qaSeq *qa;
 
 while ((qa = qacReadNext(f, isSwapped)) != NULL)
     {
     /* Transfer qa->qa to hash. */
     attatchQaInfo(hash, qa->name, qa->qa, qa->size);
     qa->qa = NULL;
     qaSeqFree(&qa);
     }
 fclose(f);
 checkAllPresent(qaList);
 }
 
 static struct qaSeq *qaFaRead(char *qaName, char *faName, boolean mustReadQa)
 /* Read both QA(C) and FA files. */
 {
 FILE *f = NULL;
 struct qaSeq *qaList = NULL, *qa;
 struct hash *hash = newHash(0);
 struct qaSeq seq;
 
 /* Read in all the .fa files. */
 f = mustOpen(faName, "r");
 while (faFastReadNext(f, &seq.dna, &seq.size, &seq.name))
     {
     if (hashLookup(hash, seq.name) != NULL)
         {
 	warn("Duplicate %s, ignoring all but first.", seq.name);
 	continue;
 	}
     AllocVar(qa);
     hashAdd(hash, seq.name, qa);
     qa->name = cloneString(seq.name);
     qa->dna = cloneMem(seq.dna, seq.size+1);
     qa->size = seq.size;
     slAddHead(&qaList, qa);
     }
 fclose(f);
 
 /* Read in corresponding .qa files and make sure they correspond.
  * If no file exists then fake it. */
 if (qaName)
     {
     if (!mustReadQa && !fileExists(qaName))
 	{
 	warn("No quality file %s", qaName);
 	for (qa = qaList; qa != NULL; qa = qa->next)
 	     qaMakeFake(qa);
 	}
     else
 	{
 	if (isQacFile(qaName))
 	    fillInQac(qaName, hash, qaList);
 	else
 	    fillInQa(qaName, hash, qaList);
 	}
     }
 freeHash(&hash);
 slReverse(&qaList);
 return qaList;
 }
 
 struct qaSeq *qaMustReadBoth(char *qaName, char *faName)
 /* Read both QA(C) and FA files into qaSeq structure or die trying. */
 {
 return qaFaRead(qaName, faName, TRUE);
 }
 
 struct qaSeq *qaReadBoth(char *qaName, char *faName)
 /* Attempt to read both QA and FA files into qaSeq structure.
  * FA file must exist, but if qaFile doesn't exist it will just
  * make it up. */
 {
 return qaFaRead(qaName, faName, FALSE);
 }
 
 
 struct qaSeq *qaReadNext(struct lineFile *lf)
 /* Read in next record in .qa file. */
 {
 struct qaSeq *qa, seq;
 
 if (!qaFastReadNext(lf, &seq.qa, &seq.size, &seq.name))
     return NULL;
 AllocVar(qa);
 qa->name = cloneString(seq.name);
 qa->size = seq.size;
 qa->qa = cloneMem(seq.qa, seq.size+1);
 return qa;
 }
 
 
 struct qaSeq *qaRead(char *qaName)
 /* Read in a .qa file (all records) or die trying. */
 {
 struct qaSeq *qa, *qaList = NULL;
 if (isQacFile(qaName))
     {
     boolean isSwapped;
     FILE *f = qacOpenVerify(qaName, &isSwapped);
     while ((qa = qacReadNext(f, isSwapped)) != NULL)
 	{
 	slAddHead(&qaList, qa);
 	}
     fclose(f);
     }
 else
     {
     struct lineFile *lf = lineFileOpen(qaName, TRUE);
     while ((qa = qaReadNext(lf)) != NULL)
 	{
 	slAddHead(&qaList, qa);
 	}
     lineFileClose(&lf);
     }
 slReverse(&qaList);
 return qaList;
 }
 
 void qaWriteNext(FILE *f, struct qaSeq *qa)
 /* Write next record to a .qa file. */
 {
 int i;
 int lineSize = 20;
 int ix = 0;
 
 fprintf(f, ">%s\n", qa->name);
 for (i=0; i<qa->size; ++i)
     {
     fprintf(f, " %2d", qa->qa[i]);
     if (++ix == lineSize || i == qa->size-1)
         {
 	ix = 0;
 	fputc('\n', f);
 	}
     }
 }
 
 void qacWriteHead(FILE *f)
 /* Write qac header. */
 {
 bits32 sig = qacSig;
 writeOne(f, sig);
 }
 
 void qacWriteNext(FILE *f, struct qaSeq *qa)
 /* Write next record to qac file. */
 {
 int cBufSize = qa->size + (qa->size>>1);
 signed char *cBuf = needLargeMem(cBufSize);
 bits32 cSize, origSize;
 
 origSize = qa->size;
 cSize = rleCompress(qa->qa, qa->size, cBuf);
 writeString(f, qa->name);
 writeOne(f, origSize);
 writeOne(f, cSize);
 mustWrite(f, cBuf, cSize);
 freeMem(cBuf);
 }
 
 struct qaSeq *qacReadNext(FILE *f, boolean isSwapped)
 /* Read in next record in .qac file. */
 {
 bits32 cSize, origSize;
 struct qaSeq *qa;
 signed char *buf;
 char *s;
 
 s = readString(f);
 if (s == NULL)
    return NULL;
 AllocVar(qa);
 qa->name = s;
 mustReadOne(f, origSize);
 if (isSwapped)
     origSize = byteSwap32(origSize);
 mustReadOne(f, cSize);
 if (isSwapped)
     cSize = byteSwap32(cSize);
 qa->size = origSize;
 qa->qa = needLargeMem(origSize);
 buf = needLargeMem(cSize);
 mustRead(f, buf, cSize);
 rleUncompress(buf, cSize, qa->qa, origSize);
 freeMem(buf);
 return qa;
 }
 
 
 boolean isQacFile(char *fileName)
 /* Return TRUE if fileName is a qac file. */
 {
 return endsWith(fileName, ".qac");
 }
 
 FILE *qacOpenVerify(char *fileName, boolean *retIsSwapped)
 /* Open qac file, and verify that it is indeed a qac file. */
 {
 FILE *f = mustOpen(fileName, "rb");
 bits32 sig;
 mustReadOne(f, sig);
 if (sig == qacSig)
    *retIsSwapped = FALSE;
 else if (sig == caqSig)
    *retIsSwapped = TRUE;
 else
    errAbort("%s is not a good .qac file", fileName);
 return f;
 }
 
 void upOneDirFromFile(char *path, char dir[256], char name[128], 
      char ext[64])
 /* Return parent directory given pathName.  Given
  * /usr/include/sys/io.h return /usr/include/   Given
  * io.h return ../../  Given sys/io.h return ../  
  * The parent directory will be returned in dir.  The file name
  * and extension will be returned in the name and ext parameters. */
 {
 char *s;
 int len;
 
 splitPath(path, dir, name, ext);
 
 /* Handle case where no directory in pathName. */
 if (dir[0] == 0)
     {
     strcpy(dir, "../../");
     return;
     }
 
 /* Remove trailing / from current dir and see if there's
  * another / before it. */
 len = strlen(dir);
 if (dir[len-1] == '/')
     dir[len-1] = 0;
 s = strrchr(dir, '/');
 
 /* Case where convert dir/name to ..*/
 if (s == NULL)
     {
     strcpy(dir, "../");
     return;
     }
 /* Case where convert parent/child/name to parent */
 s[1] = 0;
 }
 
 #ifdef UCSC
 #endif /* UCSC */
 char *qacPathFromFaPath(char *faName)
 /* Given an fa path name return path name of corresponding qac
  * file.  Copy this result somewhere if you wish to keep it 
  * longer than the next call to qacPathFromFaPath. */
 {
 static char qacPath[512];
 char dir[256], name[128], ext[64];
 
 upOneDirFromFile(faName, dir, name, ext);
 safef(qacPath, sizeof(qacPath), "%sqac/%s.qac", dir, name);
 return qacPath;
 }
 
 #ifdef SANGER
 char *qacPathFromFaPath(char *faName)
 /* Given an fa path name return path name of corresponding qac
  * file.  Copy this result somewhere if you wish to keep it 
  * longer than the next call to qacPathFromFaPath. */
 {
 static char qacPath[512];
 strcpy(qacPath, faName);
 chopSuffix(qacPath);
 strcat(qacPath, ".qac");
 return qacPath;
 }
 #endif /* SANGER */