src/lib/obscure.c 1.51

1.51 2009/11/25 07:16:38 kent
Adding 'hashThisEqThatLine.'
Index: src/lib/obscure.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/obscure.c,v
retrieving revision 1.50
retrieving revision 1.51
diff -b -B -U 1000000 -r1.50 -r1.51
--- src/lib/obscure.c	24 Nov 2009 03:58:25 -0000	1.50
+++ src/lib/obscure.c	25 Nov 2009 07:16:38 -0000	1.51
@@ -1,722 +1,731 @@
 /* Obscure stuff that is handy every now and again. 
  *
  * This file is copyright 2002 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include <unistd.h>
 #include "portable.h"
 #include "localmem.h"
 #include "hash.h"
 #include "obscure.h"
 #include "linefile.h"
 
 static char const rcsid[] = "$Id$";
 static int _dotForUserMod = 100; /* How often does dotForUser() output a dot. */
 
 long incCounterFile(char *fileName)
 /* Increment a 32 bit value on disk. */
 {
 long val = 0;
 FILE *f = fopen(fileName, "r+b");
 if (f != NULL)
     {
     mustReadOne(f, val);
     rewind(f);
     }
 else
     {
     f = fopen(fileName, "wb");
     }
 ++val;
 if (f != NULL)
     {
     fwrite(&val, sizeof(val), 1, f);
     if (fclose(f) != 0)
         errnoAbort("fclose failed");
     }
 return val;
 }
 
 int digitsBaseTwo(unsigned long x)
 /* Return base two # of digits. */
 {
 int digits = 0;
 while (x)
     {
     digits += 1;
     x >>= 1;
     }
 return digits;
 }
 
 int digitsBaseTen(int x)
 /* Return number of digits base 10. */
 {
 int digCount = 1;
 if (x < 0)
     {
     digCount = 2;
     x = -x;
     }
 while (x >= 10)
     {
     digCount += 1;
     x /= 10;
     }
 return digCount;
 }
 
 void writeGulp(char *file, char *buf, int size)
 /* Write out a bunch of memory. */
 {
 FILE *f = mustOpen(file, "w");
 mustWrite(f, buf, size);
 carefulClose(&f);
 }
 
 void readInGulp(char *fileName, char **retBuf, size_t *retSize)
 /* Read whole file in one big gulp. */
 {
 size_t size = (size_t)fileSize(fileName);
 char *buf;
 FILE *f = mustOpen(fileName, "rb");
 *retBuf = buf = needLargeMem(size+1);
 mustRead(f, buf, size);
 buf[size] = 0;      /* Just in case it needs zero termination. */
 fclose(f);
 if (retSize != NULL)
     *retSize = size;
 }
 
 void readAllWords(char *fileName, char ***retWords, int *retWordCount, char **retBuf)
 /* Read in whole file and break it into words. You need to freeMem both
  * *retWordCount and *retBuf when done. */
 {
 int wordCount;
 char *buf = NULL;
 char **words = NULL;
 size_t bufSize;
 
 readInGulp(fileName, &buf, &bufSize);
 wordCount = chopByWhite(buf, NULL, 0);
 if (wordCount != 0)
     {
     words = needMem(wordCount * sizeof(words[0]));
     chopByWhite(buf, words, wordCount);
     }
 *retWords = words;
 *retWordCount = wordCount;
 *retBuf = buf;
 }
 
 int countWordsInFile(char *fileName)
 /* Count number of words in file. */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *line;
 int wordCount = 0;
 while (lineFileNext(lf, &line, NULL))
     wordCount += chopByWhite(line, NULL, 0);
 lineFileClose(&lf);
 return wordCount;
 }
 
 struct hash *hashWordsInFile(char *fileName, int hashSize)
 /* Create a hash of space delimited words in file. */
 {
 struct hash *hash = newHash(hashSize);
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *line, *word;
 while (lineFileNext(lf, &line, NULL))
     {
     while ((word = nextWord(&line)) != NULL)
         hashAdd(hash, word, NULL);
     }
 lineFileClose(&lf);
 return hash;
 }
 
 struct hash *hashNameIntFile(char *fileName)
 /* Given a two column file (name, integer value) return a
  * hash keyed by name with integer values */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[2];
 struct hash *hash = hashNew(16);
 while (lineFileRow(lf, row))
     hashAddInt(hash, row[0], lineFileNeedNum(lf, row, 1));
 lineFileClose(&lf);
 return hash;
 }
 
 struct hash *hashTwoColumnFile(char *fileName)
 /* Given a two column file (key, value) return a hash. */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[2];
 struct hash *hash = hashNew(16);
 while (lineFileRow(lf, row))
     {
     char *name = row[0];
     char *value = lmCloneString(hash->lm, row[1]);
     hashAdd(hash, name, value);
     }
 lineFileClose(&lf);
 return hash;
 }
 
 struct slName *readAllLines(char *fileName)
 /* Read all lines of file into a list.  (Removes trailing carriage return.) */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 struct slName *list = NULL, *el;
 char *line;
 
 while (lineFileNext(lf, &line, NULL))
      {
      el = newSlName(line);
      slAddHead(&list, el);
      }
 slReverse(&list);
 return list;
 }
 
 void copyFile(char *source, char *dest)
 /* Copy file from source to dest. */
 {
 int bufSize = 64*1024;
 char *buf = needMem(bufSize);
 int bytesRead;
 int s, d;
 
 s = open(source, O_RDONLY);
 if (s < 0)
     errAbort("Couldn't open %s. %s\n", source, strerror(errno));
 d = creat(dest, 0777);
 if (d < 0)
     {
     close(s);
     errAbort("Couldn't open %s. %s\n", dest, strerror(errno));
     }
 while ((bytesRead = read(s, buf, bufSize)) > 0)
     {
     if (write(d, buf, bytesRead) < 0)
         errAbort("Write error on %s. %s\n", dest, strerror(errno));
     }
 close(s);
 if (close(d) != 0)
     errnoAbort("close failed");
 freeMem(buf);
 }
 
 void copyOpenFile(FILE *inFh, FILE *outFh)
 /* copy an open stdio file */
 {
 int c;
 while ((c = fgetc(inFh)) != EOF)
     fputc(c, outFh);
 if (ferror(inFh))
     errnoAbort("file read failed");
 if (ferror(outFh))
     errnoAbort("file write failed");
 }
 
 void cpFile(int s, int d)
 /* Copy from source file to dest until reach end of file. */
 {
 int bufSize = 64*1024, readSize;
 char *buf = needMem(bufSize);
 
 for (;;)
     {
     readSize = read(s, buf, bufSize);
     if (readSize > 0)
         mustWriteFd(d, buf, readSize);
     if (readSize <= 0)
         break;
     }
 freeMem(buf);
 }
 
 void *intToPt(int i)
 /* Convert integer to pointer. Use when really want to store an
  * int in a pointer field. */
 {
 char *pt = NULL;
 return pt+i;
 }
 
 int ptToInt(void *pt)
 /* Convert pointer to integer.  Use when really want to store a
  * pointer in an int. */
 {
 char *a = NULL, *b = pt;
 return b - a;
 }
 
 void *sizetToPt(size_t i)
 /* Convert size_t to pointer. Use when really want to store a
  * size_t in a pointer. */
 {
 char *pt = NULL;
 return pt+i;
 }
 
 size_t ptToSizet(void *pt)
 /* Convert pointer to size_t.  Use when really want to store a
  * pointer in a size_t. */
 {
 char *a = NULL, *b = pt;
 return b - a;
 }
 
 boolean parseQuotedStringNoEscapes( char *in, char *out, char **retNext)
 /* Read quoted string from in (which should begin with first quote).
  * Write unquoted string to out, which may be the same as in.
  * Return pointer to character past end of string in *retNext. 
  * Return FALSE if can't find end.
  * Unlike parseQuotedString() do not treat backslash as an escape
  *	character, merely pass it on through.
  */
 {
 char c, *s = in;
 int quoteChar = *s++;
 
 for (;;)
    {
    c = *s++;
    if (c == 0)
        {
        warn("Unmatched %c", quoteChar);
        return FALSE;
        }
    else if (c == quoteChar)
        break;
    else
        *out++ = c;
    }
 *out = 0;
 if (retNext != NULL)
     *retNext = s;
 return TRUE;
 }
 
 boolean parseQuotedString( char *in, char *out, char **retNext)
 /* Read quoted string from in (which should begin with first quote).
  * Write unquoted string to out, which may be the same as in.
  * Return pointer to character past end of string in *retNext. 
  * Return FALSE if can't find end. */
 {
 char c, *s = in;
 int quoteChar = *s++;
 boolean escaped = FALSE;
 
 for (;;)
    {
    c = *s++;
    if (c == 0)
        {
        warn("Unmatched %c", quoteChar);
        return FALSE;
        }
    if (escaped)
        {
        if (c == '\\' || c == quoteChar)
           *out++ = c;
        else
           {
 	  *out++ = '\\';
 	  *out++ = c;
 	  }
        escaped = FALSE;
        }
    else
        {
        if (c == '\\')
            escaped = TRUE;
        else if (c == quoteChar)
            break;
        else
            *out++ = c;
        }
    }
 *out = 0;
 if (retNext != NULL)
     *retNext = s;
 return TRUE;
 }
 
 char *nextQuotedWord(char **pLine)
 /* Generalization of nextWord.  Returns next quoted
  * string or if no quotes next word.  Updates *pLine
  * to point past word that is returned. Does not return
  * quotes. */
 {
 char *line, c;
 line = skipLeadingSpaces(*pLine);
 if (line == NULL || line[0] == 0)
     return NULL;
 c = *line;
 if (c == '"' || c == '\'')
     {
     if (!parseQuotedString(line, line, pLine))
         return NULL;
     return line;
     }
 else
     {
     return nextWord(pLine);
     }
 }
 
 void escCopy(char *in, char *out, char toEscape, char escape)
 /* Copy in to out, escaping as needed.  Out better be big enough. 
  * (Worst case is strlen(in)*2 + 1.) */
 {
 char c;
 for (;;)
     {
     c = *in++;
     if (c == toEscape)
         *out++ = escape;
     *out++ = c;
     if (c == 0)
         break;
     }
 }
 
 char *makeEscapedString(char *in, char toEscape)
 /* Return string that is a copy of in, but with all
  * toEscape characters preceded by '\' 
  * When done freeMem result. */
 {
 int newSize = strlen(in) + countChars(in, toEscape);
 char *out = needMem(newSize+1);
 escCopy(in, out, toEscape, '\\');
 return out;
 }
 
 char *makeQuotedString(char *in, char quoteChar)
 /* Create a string surrounded by quoteChar, with internal
  * quoteChars escaped.  freeMem result when done. */
 {
 int newSize = 2 + strlen(in) + countChars(in, quoteChar);
 char *out = needMem(newSize+1);
 out[0] = quoteChar;
 escCopy(in, out+1, quoteChar, '\\');
 out[newSize-1] = quoteChar;
 return out;
 }
 
-struct hash *hashVarLine(char *line, int lineIx)
+struct hash *hashThisEqThatLine(char *line, int lineIx, boolean firstStartsWithLetter)
 /* Return a symbol table from a line of form:
- *   var1=val1 var2='quoted val2' var3="another val" */
+ *   1-this1=val1 2-this='quoted val2' var3="another val" 
+ * If firstStartsWithLetter is true, then the left side of the equals must start with
+ * and equals. */
 {
 char *dupe = cloneString(line);
 char *s = dupe, c;
 char *var, *val;
 struct hash *hash = newHash(8);
 
 for (;;)
     {
     if ((var = skipLeadingSpaces(s)) == NULL)
         break;
 
     if ((c = *var) == 0)
         break;
-    if (!isalpha(c))
+    if (firstStartsWithLetter && !isalpha(c))
 	errAbort("line %d of custom input: variable needs to start with letter '%s'", lineIx, var);
     val = strchr(var, '=');
     if (val == NULL)
         {
         errAbort("line %d of var %s in custom input: %s \n missing = in var/val pair", lineIx, var, line);
         }
     *val++ = 0;
     c = *val;
     if (c == '\'' || c == '"')
         {
 	if (!parseQuotedString(val, val, &s))
 	    errAbort("line %d of input: missing closing %c", lineIx, c);
 	}
     else
 	{
 	s = skipToSpaces(val);
 	if (s != NULL) *s++ = 0;
 	}
     hashAdd(hash, var, cloneString(val));
     }
 freez(&dupe);
 return hash;
 }
 
+struct hash *hashVarLine(char *line, int lineIx)
+/* Return a symbol table from a line of form:
+ *   var1=val1 var2='quoted val2' var3="another val" */
+{
+return hashThisEqThatLine(line, lineIx, TRUE);
+}
+
 struct slName *stringToSlNames(char *string)
 /* Convert string to a list of slNames separated by
  * white space, but allowing multiple words in quotes.
  * Quotes if any are stripped.  */
 {
 struct slName *list = NULL, *name;
 char *dupe = cloneString(string);
 char c, *s = dupe, *e;
 
 for (;;)
     {
     if ((s = skipLeadingSpaces(s)) == NULL)
         break;
     if ((c = *s) == 0)
         break;
     if (c == '\'' || c == '"')
         {
 	if (!parseQuotedString(s, s, &e))
 	    errAbort("missing closing %c in %s", c, string);
 	}
     else
         {
 	e = skipToSpaces(s);
 	if (e != NULL) *e++ = 0;
 	}
     name = slNameNew(s);
     slAddHead(&list, name);
     s = e;
     }
 freeMem(dupe);
 slReverse(&list);
 return list;
 }
 
 struct slName *charSepToSlNames(char *string, char c)
 /* Convert character-separated list of items to slName list. 
  * Note that the last occurence of c is optional.  (That
  * is for a comma-separated list a,b,c and a,b,c, are
  * equivalent. */
 {
 struct slName *list = NULL, *el;
 char *s, *e;
 
 s = string;
 while (s != NULL && s[0] != 0)
     {
     e = strchr(s, c);
     if (e == NULL)
         {
 	el = slNameNew(s);
 	slAddHead(&list, el);
 	break;
 	}
     else
         {
 	el = slNameNewN(s, e - s);
 	slAddHead(&list, el);
 	s = e+1;
 	}
     }
 slReverse(&list);
 return list;
 }
 
 struct slName *commaSepToSlNames(char *commaSep)
 /* Convert comma-separated list of items to slName list.  */
 {
 return charSepToSlNames(commaSep, ',');
 }
 
 
 void sprintLongWithCommas(char *s, long long l)
 /* Print out a long number with commas a thousands, millions, etc. */
 {
 long long billions, millions, thousands;
 if (l >= 1000000000)
     {
     billions = l/1000000000;
     l -= billions * 1000000000;
     millions = l/1000000;
     l -= millions * 1000000;
     thousands = l/1000;
     l -= thousands * 1000;
     sprintf(s, "%lld,%03lld,%03lld,%03lld", billions, millions, thousands, l);
     }
 else if (l >= 1000000)
     {
     millions = l/1000000;
     l -= millions * (long long)1000000;
     thousands = l/1000;
     l -= thousands * 1000;
     sprintf(s, "%lld,%03lld,%03lld", millions, thousands, l);
     }
 else if (l >= 1000)
     {
     thousands = l/1000;
     l -= thousands * 1000;
     sprintf(s, "%lld,%03lld", thousands, l);
     }
 else
     sprintf(s, "%lld", l);
 }
 
 void printLongWithCommas(FILE *f, long long l)
 /* Print out a long number with commas at thousands, millions, etc. */
 {
 char ascii[32];
 sprintLongWithCommas(ascii, l);
 fprintf(f, "%s", ascii);
 }
 
 void shuffleArrayOfPointers(void *pointerArray, int arraySize, int shuffleCount)
 /* Shuffle array of pointers of given size given number of times. */
 {
 void **array = pointerArray, *pt;
 int i, randIx;
 
 /* Randomly permute an array using the method from Cormen, et al */
 for (i=0; i<arraySize; ++i)
     {
     randIx = i + (rand() % (arraySize - i));
     pt = array[i];
     array[i] = array[randIx];
     array[randIx] = pt;
     }
 }
 
 void shuffleList(void *pList, int shuffleCount)
 /* Randomize order of slList.  Usage:
  *     randomizeList(&list)
  * where list is a pointer to a structure that
  * begins with a next field. */
 {
 struct slList **pL = (struct slList **)pList;
 struct slList *list = *pL;
 int count;
 count = slCount(list);
 if (count > 1)
     {
     struct slList *el;
     struct slList **array;
     int i;
     array = needLargeMem(count * sizeof(*array));
     for (el = list, i=0; el != NULL; el = el->next, i++)
         array[i] = el;
     for (i=0; i<4; ++i)
         shuffleArrayOfPointers(array, count, shuffleCount);
     list = NULL;
     for (i=0; i<count; ++i)
         {
         array[i]->next = list;
         list = array[i];
         }
     freeMem(array);
     slReverse(&list);
     *pL = list;       
     }
 }
 
 char *stripCommas(char *position)
 /* make a new string with commas stripped out */
 {
 char *newPos = cloneString(position);
 char *nPtr = newPos;
 
 if (position == NULL)
     return NULL;
 while((*nPtr = *position++))
     if (*nPtr != ',')
 	nPtr++;
 
 return newPos;
 }
 
 void dotForUserInit(int dotMod)
 /* Set how often dotForUser() outputs a dot. */
 {
 assert(dotMod > 0);
 _dotForUserMod = dotMod;
 }
 
 void dotForUser()
 /* Write out a dot every _dotForUserMod times this is called. */
 {
 static int dot = -10;
 /* Check to see if dot has been initialized. */
 if(dot == - 10)
     dot = _dotForUserMod;
 
 if (--dot <= 0)
     {
     putc('.', stderr);
     fflush(stderr);
     dot = _dotForUserMod;
     }
 }
 
 void spaceToUnderbar(char *s)
 /* Convert white space to underbar. */
 {
 char c;
 while ((c = *s) != 0)
     {
     if (isspace(c))
         *s = '_';
     ++s;
     }
 }
 
 void rangeRoundUp(double start, double end, double *retStart, double *retEnd)
 /* Round start and end so that they cover a slightly bigger range, but with more round
  * numbers.  For instance 0.23:9.89 becomes 0:10 */
 {
 double size = end - start;
 if (size < 0)
     errAbort("start (%g) after end (%g) in rangeRoundUp", start, end);
 
 /* Flat ranges get moved to include zero for scale. */
 if (size == 0.0)
     {
     if (start < 0.0)
         {
 	*retStart = start;
 	*retEnd = 0;
 	}
     else if (start > 0.0)
         {
 	*retStart = 0;
 	*retEnd = end;
 	}
     else
         {
 	*retStart = 0;
 	*retEnd = 1;
 	}
     return;
     }
 
 /* Figure out "increment", which will be 1, 2, 5, or 10, or a multiple of 10 of these 
  * Want to have at least two increments in range. */
 double exponent = 1;
 double scaledSize = size;
 double increment = 0;
 while (scaledSize < 100)
     {
     scaledSize *= 10;
     exponent /= 10;
     }
 while (scaledSize >= 100)
     {
     scaledSize /= 10;
     exponent *= 10;
     }
 /* At this point have a number between 10 and 100 */
 if (scaledSize < 12)
     increment = 1;
 else if (scaledSize < 20)
     increment = 2;
 else if (scaledSize < 75)
     increment = 5;
 else
     increment = 10;
 increment *= exponent;
 
 int startInIncrements = floor(start/increment);
 int endInIncrements = ceil(end/increment);
 *retStart = startInIncrements * increment;
 *retEnd = endInIncrements * increment;
 }