24cf4a1294b7e146f488b877eb87b9f2e83eaa9c hiram Wed Feb 28 17:39:28 2024 -0800 moving static loadSizes() function from hg/utils/pslCheck.c to lib/common.c no redmine diff --git src/lib/common.c src/lib/common.c index ac30338..41160e5 100644 --- src/lib/common.c +++ src/lib/common.c @@ -1,3896 +1,3909 @@ /* Commonly used routines in a wide range of applications. * Strings, singly-linked lists, and a little file i/o. * * This file is copyright 2002 Jim Kent, but license is hereby * granted for all use - public, private or commercial. */ #include "common.h" #include "errAbort.h" #include "portable.h" #include "linefile.h" #include "hash.h" - +#include "sqlNum.h" void *cloneMem(void *pt, size_t size) /* Allocate a new buffer of given size, and copy pt to it. */ { void *newPt = needLargeMem(size); memcpy(newPt, pt, size); return newPt; } static char *cloneStringZExt(const char *s, int size, int copySize) /* Make a zero terminated copy of string in memory */ { char *d = needMem(copySize+1); copySize = min(size,copySize); memcpy(d, s, copySize); d[copySize] = 0; return d; } char *cloneStringZ(const char *s, int size) /* Make a zero terminated copy of string in memory */ { return cloneStringZExt(s, strlen(s), size); } char *cloneString(const char *s) /* Make copy of string in dynamic memory */ { int size = 0; if (s == NULL) return NULL; size = strlen(s); return cloneStringZExt(s, size, size); } char *cloneLongString(char *s) /* Make clone of long string. */ { size_t size = strlen(s); return cloneMem(s, size+1); } char *catTwoStrings(char *a, char *b) /* Allocate new string that is a concatenation of two strings. */ { int aLen = strlen(a), bLen = strlen(b); int len = aLen + bLen; char *newBuf = needLargeMem(len+1); memcpy(newBuf, a, aLen); memcpy(newBuf+aLen, b, bLen); newBuf[len] = 0; return newBuf; } char *catThreeStrings(char *a, char *b, char *c) /* Allocate new string that is a concatenation of three strings. */ { int aLen = strlen(a), bLen = strlen(b), cLen = strlen(c); int len = aLen + bLen + cLen; char *newBuf = needLargeMem(len+1); memcpy(newBuf, a, aLen); memcpy(newBuf+aLen, b, bLen); memcpy(newBuf+aLen+bLen, c, cLen); newBuf[len] = 0; return newBuf; } /* Reverse the order of the bytes. */ void reverseBytes(char *bytes, long length) { long halfLen = (length>>1); char *end = bytes+length; char c; while (--halfLen >= 0) { c = *bytes; *bytes++ = *--end; *end = c; } } void reverseInts(int *a, int length) /* Reverse the order of the integer array. */ { int halfLen = (length>>1); int *end = a+length; int c; while (--halfLen >= 0) { c = *a; *a++ = *--end; *end = c; } } void reverseUnsigned(unsigned *a, int length) /* Reverse the order of the unsigned array. */ { int halfLen = (length>>1); unsigned *end = a+length; unsigned c; while (--halfLen >= 0) { c = *a; *a++ = *--end; *end = c; } } void reverseDoubles(double *a, int length) /* Reverse the order of the double array. */ { int halfLen = (length>>1); double *end = a+length; double c; while (--halfLen >= 0) { c = *a; *a++ = *--end; *end = c; } } void reverseStrings(char **a, int length) /* Reverse the order of the char* array. */ { int halfLen = (length>>1); char **end = a+length; char *c; while (--halfLen >= 0) { c = *a; *a++ = *--end; *end = c; } } static int stringCmp(const void *va, const void *vb) /* Compare function to sort array of strings. */ { char **a = (char **)va; char **b = (char **)vb; return strcmp(*a, *b); } void sortStrings(char **array, int count) /* Sort array using strcmp */ { if (count > 1) qsort(array, count, sizeof(array[0]), stringCmp); } /* Swap buffers a and b. */ void swapBytes(char *a, char *b, int length) { char c; int i; for (i=0; i<length; ++i) { c = a[i]; a[i] = b[i]; b[i] = c; } } /** List managing routines. */ /* Count up elements in list. */ int slCount(const void *list) { struct slList *pt = (struct slList *)list; int len = 0; while (pt != NULL) { len += 1; pt = pt->next; } return len; } void *slElementFromIx(void *list, int ix) /* Return the ix'th element in list. Returns NULL * if no such element. */ { struct slList *pt = (struct slList *)list; int i; for (i=0;i<ix;i++) { if (pt == NULL) return NULL; pt = pt->next; } return pt; } int slIxFromElement(void *list, void *el) /* Return index of el in list. Returns -1 if not on list. */ { struct slList *pt; int ix = 0; for (pt = list, ix=0; pt != NULL; pt = pt->next, ++ix) if (el == (void*)pt) return ix; return -1; } void *slLastEl(void *list) /* Returns last element in list or NULL if none. */ { struct slList *next, *el; if ((el = list) == NULL) return NULL; while ((next = el->next) != NULL) el = next; return el; } /* Add new node to tail of list. * Usage: * slAddTail(&list, node); * where list and nodes are both pointers to structure * that begin with a next pointer. */ void slAddTail(void *listPt, void *node) { struct slList **ppt = (struct slList **)listPt; struct slList *n = (struct slList *)node; while (*ppt != NULL) { ppt = &((*ppt)->next); } n->next = NULL; *ppt = n; } void *slPopHead(void *vListPt) /* Return head of list and remove it from list. (Fast) */ { struct slList **listPt = (struct slList **)vListPt; struct slList *el = *listPt; if (el != NULL) { *listPt = el->next; el->next = NULL; } return el; } void *slPopTail(void *vListPt) /* Return tail of list and remove it from list. (Not so fast) */ { struct slList **listPt = (struct slList **)vListPt; struct slList *el = *listPt; if (el != NULL) { for (;;) { if (el->next == NULL) { *listPt = NULL; break; } listPt = &el->next; el = el->next; } } return el; } void *slCat(void *va, void *vb) /* Return concatenation of lists a and b. * Example Usage: * struct slName *a = getNames("a"); * struct slName *b = getNames("b"); * struct slName *ab = slCat(a,b) */ { struct slList *a = va; struct slList *b = vb; struct slList *end; if (a == NULL) return b; for (end = a; end->next != NULL; end = end->next) ; end->next = b; return a; } void slReverse(void *listPt) /* Reverse order of a list. * Usage: * slReverse(&list); */ { struct slList **ppt = (struct slList **)listPt; struct slList *newList = NULL; struct slList *el, *next; next = *ppt; while (next != NULL) { el = next; next = el->next; el->next = newList; newList = el; } *ppt = newList; } void slFreeList(void *listPt) /* Free list */ { struct slList **ppt = (struct slList**)listPt; struct slList *next = *ppt; struct slList *el; while (next != NULL) { el = next; next = el->next; freeMem((char*)el); } *ppt = NULL; } void slFreeListWithFunc(void *listPt, void (*freeFunc)()) /* Free a list by calling freeFunc on each element. * listPt must be a pointer to a pointer to some slList-compatible struct (&list). * freeFunc must take one arg: a pointer to a pointer to the item it is going to free. */ { struct slList **pList = (struct slList**)listPt; struct slList *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; freeFunc(&el); } *pList = NULL; } void slSort(void *pList, int (*compare )(const void *elem1, const void *elem2)) /* Sort a singly linked list with Qsort and a temporary array. */ { struct slList **pL = (struct slList **)pList; struct slList *list = *pL; int count; count = slCount(list); if (count > 1) { struct slList *el; struct slList **array; int i; array = needLargeMem(count * sizeof(*array)); for (el = list, i=0; el != NULL; el = el->next, i++) array[i] = el; qsort(array, count, sizeof(array[0]), compare); list = NULL; for (i=0; i<count; ++i) { array[i]->next = list; list = array[i]; } freeMem(array); slReverse(&list); *pL = list; } } void slUniqify(void *pList, int (*compare )(const void *elem1, const void *elem2), void (*free)()) /* Return sorted list with duplicates removed. * Compare should be same type of function as slSort's compare (taking * pointers to pointers to elements. Free should take a simple * pointer to dispose of duplicate element, and can be NULL. */ { struct slList **pSlList = (struct slList **)pList; struct slList *oldList = *pSlList; struct slList *newList = NULL, *el; slSort(&oldList, compare); while ((el = slPopHead(&oldList)) != NULL) { if ((newList == NULL) || (compare(&newList, &el) != 0)) slAddHead(&newList, el); else if (free != NULL) free(el); } slReverse(&newList); *pSlList = newList; } void slSortMerge(void *pA, void *b, CmpFunction *compare) // Merges and sorts a pair of singly linked lists using slSort. { struct slList **pList = (struct slList **)pA; slCat(*pList, b); slSort(pList,compare); } void slSortMergeUniq(void *pA, void *b, CmpFunction *compare, void (*free)()) // Merges and sorts a pair of singly linked lists leaving only unique // items via slUniqufy. duplicate itens are defined by the compare routine // returning 0. If free is provided, items dropped from list can disposed of. { struct slList **pList = (struct slList **)pA; *pList = slCat(*pList, b); slUniqify(pList,compare,free); } boolean slRemoveEl(void *vpList, void *vToRemove) /* Remove element from singly linked list. Usage: * slRemove(&list, el); * Returns TRUE if element in list. */ { struct slList **pList = vpList; struct slList *toRemove = vToRemove; struct slList *el, *next, *newList = NULL; boolean didRemove = FALSE; for (el = *pList; el != NULL; el = next) { next = el->next; if (el != toRemove) { slAddHead(&newList, el); } else didRemove = TRUE; } slReverse(&newList); *pList = newList; return didRemove; } struct slInt *slIntNew(int x) /* Return a new int. */ { struct slInt *a; AllocVar(a); a->val = x; return a; } int slIntCmp(const void *va, const void *vb) /* Compare two slInts. */ { const struct slInt *a = *((struct slInt **)va); const struct slInt *b = *((struct slInt **)vb); return a->val - b->val; } int slIntCmpRev(const void *va, const void *vb) /* Compare two slInts in reverse direction. */ { const struct slInt *a = *((struct slInt **)va); const struct slInt *b = *((struct slInt **)vb); return b->val - a->val; } struct slInt * slIntFind(struct slInt *list, int target) /* Find target in slInt list or return NULL */ { struct slInt *i; for (i=list;i;i=i->next) if (i->val == target) return i; return NULL; } struct slUnsigned *slUnsignedNew(unsigned x) /* Return a new int. */ { struct slUnsigned *a; AllocVar(a); a->val = x; return a; } static int doubleCmp(const void *va, const void *vb) /* Compare function to sort array of doubles. */ { const double *a = va; const double *b = vb; double diff = *a - *b; if (diff < 0) return -1; else if (diff > 0) return 1; else return 0; } void doubleSort(int count, double *array) /* Sort an array of doubles. */ { if (count > 1) qsort(array, count, sizeof(array[0]), doubleCmp); } double doubleMedian(int count, double *array) /* Return median value in array. This will sort * the array as a side effect. */ { double median; doubleSort(count, array); if ((count&1) == 1) median = array[count>>1]; else { count >>= 1; median = (array[count] + array[count-1]) * 0.5; } return median; } void doubleBoxWhiskerCalc(int count, double *array, double *retMin, double *retQ1, double *retMedian, double *retQ3, double *retMax) /* Calculate what you need to draw a box and whiskers plot from an array of doubles. */ { if (count <= 0) errAbort("doubleBoxWhiskerCalc needs a positive number, not %d for count", count); if (count == 1) { *retMin = *retQ1 = *retMedian = *retQ3 = *retMax = array[0]; return; } doubleSort(count, array); double min = array[0]; double max = array[count-1]; double median; int halfCount = count>>1; if ((count&1) == 1) median = array[halfCount]; else { median = (array[halfCount] + array[halfCount-1]) * 0.5; } double q1, q3; if (count <= 3) { q1 = 0.5 * (median + min); q3 = 0.5 * (median + max); } else { int q1Ix = count/4; int q3Ix = count - 1 - q1Ix; verbose(4, "count %d, q1Ix %d, q3Ix %d\n", count, q1Ix, q3Ix); q1 = array[q1Ix]; q3 = array[q3Ix]; } *retMin = min; *retQ1 = q1; *retMedian = median; *retQ3 = q3; *retMax = max; } struct slDouble *slDoubleNew(double x) /* Return a new double. */ { struct slDouble *a; AllocVar(a); a->val = x; return a; } int slDoubleCmp(const void *va, const void *vb) /* Compare two slDoubles. */ { const struct slDouble *a = *((struct slDouble **)va); const struct slDouble *b = *((struct slDouble **)vb); double diff = a->val - b->val; if (diff < 0) return -1; else if (diff > 0) return 1; else return 0; } double slDoubleMedian(struct slDouble *list) /* Return median value on list. */ { int i,count = slCount(list); struct slDouble *el; double *array, median; if (count == 0) errAbort("Can't take median of empty list"); AllocArray(array,count); for (i=0, el=list; i<count; ++i, el=el->next) array[i] = el->val; median = doubleMedian(count, array); freeMem(array); return median; } void slDoubleBoxWhiskerCalc(struct slDouble *list, double *retMin, double *retQ1, double *retMedian, double *retQ3, double *retMax) /* Calculate what you need to draw a box and whiskers plot from a list of slDoubles. */ { int i,count = slCount(list); struct slDouble *el; double *array; if (count == 0) errAbort("Can't take do slDoubleBoxWhiskerCalc of empty list"); AllocArray(array,count); for (i=0, el=list; i<count; ++i, el=el->next) array[i] = el->val; doubleBoxWhiskerCalc(count, array, retMin, retQ1, retMedian, retQ3, retMax); freeMem(array); } static int intCmp(const void *va, const void *vb) /* Compare function to sort array of ints. */ { const int *a = va; const int *b = vb; int diff = *a - *b; if (diff < 0) return -1; else if (diff > 0) return 1; else return 0; } void intSort(int count, int *array) /* Sort an array of ints. */ { if (count > 1) qsort(array, count, sizeof(array[0]), intCmp); } int intMedian(int count, int *array) /* Return median value in array. This will sort * the array as a side effect. */ { int median; intSort(count, array); if ((count&1) == 1) median = array[count>>1]; else { count >>= 1; median = (array[count] + array[count-1]) * 0.5; } return median; } struct slName *newSlName(char *name) /* Return a new name. */ { struct slName *sn; if (name != NULL) { int len = strlen(name); sn = needMem(sizeof(*sn)+len); strcpy(sn->name, name); return sn; } else { AllocVar(sn); } return sn; } struct slName *slNameNewN(char *name, int size) /* Return new slName of given size. */ { struct slName *sn = needMem(sizeof(*sn) + size); memcpy(sn->name, name, size); return sn; } int slNameCmpCase(const void *va, const void *vb) /* Compare two slNames, ignore case. */ { const struct slName *a = *((struct slName **)va); const struct slName *b = *((struct slName **)vb); return strcasecmp(a->name, b->name); } void slNameSortCase(struct slName **pList) /* Sort slName list, ignore case. */ { slSort(pList, slNameCmpCase); } int slNameCmp(const void *va, const void *vb) /* Compare two slNames. */ { const struct slName *a = *((struct slName **)va); const struct slName *b = *((struct slName **)vb); return strcmp(a->name, b->name); } int slNameCmpStringsWithEmbeddedNumbers(const void *va, const void *vb) /* Compare strings such as gene names that may have embedded numbers, * so that bmp4a comes before bmp14a */ { const struct slName *a = *((struct slName **)va); const struct slName *b = *((struct slName **)vb); return cmpStringsWithEmbeddedNumbers(a->name, b->name); } int slNameCmpWordsWithEmbeddedNumbers(const void *va, const void *vb) /* Compare strings such as gene names that may have embedded numbers, * in a string sensitive way so that bmp4a comes before bmp14a * and ABc and abC are treated as the same. A little slow. */ { const struct slName *a = *((struct slName **)va); const struct slName *b = *((struct slName **)vb); return cmpWordsWithEmbeddedNumbers(a->name, b->name); } void slNameSort(struct slName **pList) /* Sort slName list. */ { slSort(pList, slNameCmp); } boolean slNameInList(struct slName *list, char *string) /* Return true if string is in name list -- case insensitive. */ { struct slName *el; for (el = list; el != NULL; el = el->next) if (sameWord(string, el->name)) return TRUE; return FALSE; } boolean slNameInListUseCase(struct slName *list, char *string) /* Return true if string is in name list -- case sensitive. */ { struct slName *el; for (el = list; el != NULL; el = el->next) if (string != NULL && !strcmp(string, el->name)) return TRUE; return FALSE; } void *slNameFind(void *list, char *string) /* Return first element of slName list (or any other list starting * with next/name fields) that matches string. This is case insensitive. */ { struct slName *el; for (el = list; el != NULL; el = el->next) if (sameWord(string, el->name)) return el; return NULL; } int slNameFindIx(struct slName *list, char *string) /* Return index of first element of slName list (or any other * list starting with next/name fields) that matches string. * Return -1 if not found. */ { struct slName *el; int ix = 0; for (el = list; el != NULL; el = el->next, ix++) if (sameString(string, el->name)) return ix; return -1; } char *slNameStore(struct slName **pList, char *string) /* Put string into list if it's not there already. * Return the version of string stored in list. */ { struct slName *el; for (el = *pList; el != NULL; el = el->next) { if (sameString(string, el->name)) return el->name; } el = newSlName(string); slAddHead(pList, el); return el->name; } struct slName *slNameAddHead(struct slName **pList, char *name) /* Add name to start of list and return it. */ { struct slName *el = slNameNew(name); slAddHead(pList, el); return el; } struct slName *slNameAddTail(struct slName **pList, char *name) /* Add name to end of list (not efficient for long lists), * and return it. */ { struct slName *el = slNameNew(name); slAddTail(pList, el); return el; } struct slName *slNameCloneList(struct slName *list) /* Return clone of list. */ { struct slName *el, *newEl, *newList = NULL; for (el = list; el != NULL; el = el->next) { newEl = slNameNew(el->name); slAddHead(&newList, newEl); } slReverse(&newList); return newList; } struct slName *slNameListFromString(char *s, char delimiter) /* Return list of slNames gotten from parsing delimited string. * The final delimiter is optional. a,b,c and a,b,c, are equivalent * for comma-delimited lists. */ { char *e; struct slName *list = NULL, *el; while (s != NULL && s[0] != 0) { e = strchr(s, delimiter); if (e == NULL) el = slNameNew(s); else { el = slNameNewN(s, e-s); e += 1; } slAddHead(&list, el); s = e; } slReverse(&list); return list; } struct slName *slNameListFromCommaEscaped(char *s) /* Return list of slNames gotten from parsing comma delimited string. * The final comma is optional. a,b,c and a,b,c, are equivalent * for comma-delimited lists. To escape commas, put two in a row, * which eliminates the possibility for null names * (eg. a,,b,c will parse to two elements a,b and c). */ { if (s == NULL) return NULL; struct slName *list = NULL; char buffer[strlen(s) + 1], *ptr = buffer; for (; *s != 0; s++) { *ptr++ = *s; if (*s == ',') { if (s[1] != ',') // if next character not also a , { // we found the delimeter, add the string to the list slAddHead(&list, slNameNewN(buffer, ptr - buffer - 1)); ptr = buffer; // start a new buffer } else s++; // skip the quoting comma and continue } } if (ptr > buffer) // is there something in the buffer slAddHead(&list, slNameNewN(buffer, ptr - buffer)); // add it to the list slReverse(&list); return list; } struct slName *slNameListFromStringArray(char *stringArray[], int arraySize) /* Return list of slNames from an array of strings of length arraySize. * If a string in the array is NULL, the array will be treated as * NULL-terminated (shorter than arraySize). */ { char *s; struct slName *list = NULL, *el; int i; if (stringArray == NULL) return NULL; for (i = 0; i < arraySize; i++) { s = stringArray[i]; if (s == NULL) break; el = slNameNew(s); slAddHead(&list, el); } slReverse(&list); return list; } char *slNameListToString(struct slName *list, char delimiter) /* Return string created by joining all names with the delimiter. */ { struct slName *el; int elCount = 0; int len = 0; char del[2]; char *s; if (list == NULL) return cloneString(""); del[0] = delimiter; del[1] = '\0'; for (el = list; el != NULL; el = el->next, elCount++) len += strlen(el->name); len += elCount; AllocArray(s, len); for (el = list; el != NULL; el = el->next) { strcat(s, el->name); if (el->next != NULL) strcat(s, del); } return s; } struct slName *slNameLoadReal(char *fileName) /* load file lines that are not blank or start with a '#' into a slName * list */ { struct slName *lines = NULL; char *line; struct lineFile *lf = lineFileOpen(fileName, TRUE); while (lineFileNextReal(lf, &line)) slSafeAddHead(&lines, slNameNew(line)); lineFileClose(&lf); slReverse(&lines); return lines; } struct slName *slNameIntersection(struct slName *a, struct slName *b) /* return intersection of two slName lists. */ { struct hash *hashA = newHash(0); struct slName *el, *retval = NULL; for (el = a; el != NULL; el = el->next) hashAddInt(hashA, el->name, 1); for (el = b; el != NULL; el = el->next) if(hashLookup(hashA, el->name) != NULL) slNameAddHead(&retval, el->name); hashFree(&hashA); return retval; } struct slRef *refOnList(struct slRef *refList, void *val) /* Return ref if val is already on list, otherwise NULL. */ { struct slRef *ref; for (ref = refList; ref != NULL; ref = ref->next) if (ref->val == val) return ref; return NULL; } struct slRef *slRefNew(void *val) /* Create new slRef element. */ { struct slRef *ref; AllocVar(ref); ref->val = val; return ref; } void refAdd(struct slRef **pRefList, void *val) /* Add reference to list. */ { struct slRef *ref; AllocVar(ref); ref->val = val; slAddHead(pRefList, ref); } void refAddUnique(struct slRef **pRefList, void *val) /* Add reference to list if not already on list. */ { if (refOnList(*pRefList, val) == NULL) { refAdd(pRefList, val); } } void slRefFreeListAndVals(struct slRef **pList) /* Free up (with simple freeMem()) each val on list, and the list itself as well. */ { struct slRef *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; freeMem(el->val); freeMem(el); } *pList = NULL; } struct slRef *refListFromSlList(void *list) /* Make a reference list that mirrors a singly-linked list. */ { struct slList *el; struct slRef *refList = NULL, *ref; for (el= list; el != NULL; el = el->next) { ref = slRefNew(el); slAddHead(&refList, ref); } slReverse(&refList); return refList; } struct slPair *slPairNew(char *name, void *val) /* Allocate new name/value pair. */ { struct slPair *el; AllocVar(el); el->name = cloneString(name); el->val = val; return el; } void slPairAdd(struct slPair **pList, char *name, void *val) /* Add new slPair to head of list. */ { struct slPair *el = slPairNew(name, val); slAddHead(pList, el); } void slPairFree(struct slPair **pEl) /* Free up struct and name. (Don't free up values.) */ { struct slPair *el = *pEl; if (el != NULL) { freeMem(el->name); freez(pEl); } } void slPairFreeList(struct slPair **pList) /* Free up list. (Don't free up values.) */ { struct slPair *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; slPairFree(&el); } *pList = NULL; } void slPairFreeValsExt(struct slPair *list, void (*freeFunc)()) /* Free up all values on list using freeFunc. * freeFunc should take a simple pointer to free an item, and can be NULL. */ { struct slPair *el; for (el = list; el != NULL; el = el->next) { if (freeFunc) freeFunc(el->val); else freez(&el->val); } } void slPairFreeVals(struct slPair *list) /* Free up all values on list. */ { slPairFreeValsExt(list, NULL); } void slPairFreeValsAndListExt(struct slPair **pList, void (*freeFunc)()) /* Free up all values on list using freeFunc and list itself. * freeFunc should take a simple pointer to free an item, and can be NULL. */ { if (pList) slPairFreeValsExt(*pList, freeFunc); slPairFreeList(pList); } void slPairFreeValsAndList(struct slPair **pList) /* Free up all values on list and list itself */ { slPairFreeVals(*pList); slPairFreeList(pList); } struct slPair *slPairFind(struct slPair *list, char *name) /* Return list element of given name, or NULL if not found. */ { struct slPair *el; for (el = list; el != NULL; el = el->next) if (sameString(name, el->name)) break; return el; } void *slPairFindVal(struct slPair *list, char *name) /* Return value associated with name in list, or NULL if not found. */ { struct slPair *el = slPairFind(list, name); if (el == NULL) return NULL; return el->val; } struct slPair *slPairListFromString(char *str,boolean respectQuotes) // Return slPair list parsed from list in string like: [name1=val1 name2=val2 ...] // if respectQuotes then string can have double quotes: [name1="val 1" "name 2"=val2 ...] // resulting pair strips quotes: {name1}={val 1},{name 2}={val2} // Returns NULL if parse error. Free this up with slPairFreeValsAndList. { char *s = skipLeadingSpaces(str); // Would like to remove this and tighten up the standard someday. if (isEmpty(s)) return NULL; struct slPair *list = NULL; char name[1024]; char val[1024]; char buf[1024]; bool inQuote = FALSE; char *b = buf; char sep = '='; char c = ' '; int mode = 0; while(1) { c = *s++; if (mode == 0 || mode == 2) // reading name or val { boolean term = FALSE; if (respectQuotes && b == buf && !inQuote && c == '"') inQuote = TRUE; else if (inQuote && c == '"') term = TRUE; else if ((c == sep || c == 0) && !inQuote) { term = TRUE; --s; // rewind } else if (c == ' ' && !inQuote) { warn("slPairListFromString: Unexpected whitespace in %s", str); return NULL; } else if (c == 0 && inQuote) { warn("slPairListFromString: Unterminated quote in %s", str); return NULL; } else { *b++ = c; if ((b - buf) > sizeof buf) { warn("slPairListFromString: pair name or value too long in %s", str); return NULL; } } if (term) { inQuote = FALSE; *b = 0; if (mode == 0) { safecpy(name, sizeof name, buf); if (strlen(name)<1) { warn("slPairListFromString: Pair name cannot be empty in %s", str); return NULL; } // Shall we check for name being alphanumeric, at least for the respectQuotes=FALSE case? } else // mode == 2 { safecpy(val, sizeof val, buf); if (!respectQuotes && (hasWhiteSpace(name) || hasWhiteSpace(val))) // should never happen { warn("slPairListFromString() Unexpected white space in name=value pair: [%s]=[%s] in string=[%s]\n", name, val, str); break; } slPairAdd(&list, name, cloneString(val)); } ++mode; } } else if (mode == 1) // read required "=" sign { if (c != '=') { warn("slPairListFromString: Expected character = after name in %s", str); return NULL; } ++mode; sep = ' '; b = buf; } else // (mode == 3) reading optional separating space { if (c == 0) break; if (c != ' ') { mode = 0; --s; b = buf; sep = '='; } } } slReverse(&list); return list; } char *slPairListToString(struct slPair *list,boolean quoteIfSpaces) // Returns an allocated string of pairs in form of [name1=val1 name2=val2 ...] // If requested, will wrap name or val in quotes if contain spaces: [name1="val 1" "name 2"=val2] { // Don't rely on dyString. We should do the accounting ourselves and not create extra dependencies. int count = 0; struct slPair *pair = list; for (;pair != NULL; pair = pair->next) { assert(pair->name != NULL && pair->val != NULL); // Better assert and get this over with, // complete with stack count += strlen(pair->name); count += strlen((char *)(pair->val)); count += 2; // = and ' ' delimit if (quoteIfSpaces) { if (hasWhiteSpace(pair->name)) count += 2; // " and " if (hasWhiteSpace((char *)(pair->val))) count += 2; // " and " } } if (count == 0) return NULL; char *str = needMem(count+5); // A bit of slop char *strPtr = str; for (pair = list; pair != NULL; pair = pair->next, strPtr += strlen(strPtr)) { if (pair != list) // Not first cycle *strPtr++ = ' '; if (hasWhiteSpace(pair->name)) { if (quoteIfSpaces) sprintf(strPtr,"\"%s\"=",pair->name); else { warn("slPairListToString() Unexpected white space in name: [%s]\n", pair->name); sprintf(strPtr,"%s=",pair->name); // warn but still make string } } else sprintf(strPtr,"%s=",pair->name); strPtr += strlen(strPtr); if (hasWhiteSpace((char *)(pair->val))) { if (quoteIfSpaces) sprintf(strPtr,"\"%s\"",(char *)(pair->val)); else { warn("slPairListToString() Unexpected white space in val: [%s]\n", (char *)(pair->val)); sprintf(strPtr,"%s",(char *)(pair->val)); // warn but still make string } } else sprintf(strPtr,"%s",(char *)(pair->val)); } return str; } char *slPairNameToString(struct slPair *list, char delimiter,boolean quoteIfSpaces) // Return string created by joining all names (ignoring vals) with the delimiter. // If requested, will wrap name in quotes if contain spaces: [name1,"name 2" ...] { int elCount = 0; int count = 0; struct slPair *pair = list; for (; pair != NULL; pair = pair->next, elCount++) { assert(pair->name != NULL); count += strlen(pair->name); if (quoteIfSpaces && hasWhiteSpace(pair->name)) count += 2; } count += elCount; if (count == 0) return NULL; char *str = needMem(count+5); // A bit of slop char *strPtr = str; for (pair = list; pair != NULL; pair = pair->next, strPtr += strlen(strPtr)) { if (pair != list) *strPtr++ = delimiter; if (hasWhiteSpace(pair->name)) { if (quoteIfSpaces) sprintf(strPtr,"\"%s\"",pair->name); else { if (delimiter == ' ') // if delimied by commas, this is entirely okay! warn("slPairListToString() Unexpected white space in name delimited by space: " "[%s]\n", pair->name); sprintf(strPtr,"%s",pair->name); // warn but still make string } } else sprintf(strPtr,"%s",pair->name); } return str; } int slPairCmpCase(const void *va, const void *vb) /* Compare two slPairs, ignore case. */ { const struct slPair *a = *((struct slPair **)va); const struct slPair *b = *((struct slPair **)vb); return strcasecmp(a->name, b->name); } void slPairSortCase(struct slPair **pList) /* Sort slPair list, ignore case. */ { slSort(pList, slPairCmpCase); } int slPairCmpWordsWithEmbeddedNumbers(const void *va, const void *vb) /* Sort slPairList ignoring case and dealing with embedded numbers so 2 comes * before 10, not after. */ { const struct slPair *a = *((struct slPair **)va); const struct slPair *b = *((struct slPair **)vb); return cmpWordsWithEmbeddedNumbers(a->name, b->name); } int slPairCmp(const void *va, const void *vb) /* Compare two slPairs. */ { const struct slPair *a = *((struct slPair **)va); const struct slPair *b = *((struct slPair **)vb); return strcmp(a->name, b->name); } int slPairValCmpCase(const void *va, const void *vb) /* Case insensitive compare two slPairs on their values (must be string). */ { const struct slPair *a = *((struct slPair **)va); const struct slPair *b = *((struct slPair **)vb); return strcasecmp((char *)(a->val), (char *)(b->val)); } int slPairValCmp(const void *va, const void *vb) /* Compare two slPairs on their values (must be string). */ { const struct slPair *a = *((struct slPair **)va); const struct slPair *b = *((struct slPair **)vb); return strcmp((char *)(a->val), (char *)(b->val)); } void slPairValSortCase(struct slPair **pList) /* Sort slPair list on values (must be string), ignore case. */ { slSort(pList, slPairValCmpCase); } void slPairValSort(struct slPair **pList) /* Sort slPair list on values (must be string). */ { slSort(pList, slPairValCmp); } int slPairIntCmp(const void *va, const void *vb) // Compare two slPairs on their integer values. { const struct slPair *a = *((struct slPair **)va); const struct slPair *b = *((struct slPair **)vb); return ((char *)(a->val) - (char *)(b->val)); // cast works and val is 0 vased integer } void slPairIntSort(struct slPair **pList) // Sort slPair list on integer values. { slSort(pList, slPairIntCmp); } int slPairAtoiCmp(const void *va, const void *vb) // Compare two slPairs on their strings interpreted as integer values. { const struct slPair *a = *((struct slPair **)va); const struct slPair *b = *((struct slPair **)vb); return (atoi((char *)(a->val)) - atoi((char *)(b->val))); } void slPairValAtoiSort(struct slPair **pList) // Sort slPair list on string values interpreted as integers. { slSort(pList, slPairAtoiCmp); } void gentleFree(void *pt) { if (pt != NULL) freeMem((char*)pt); } int differentWord(char *s1, char *s2) /* strcmp ignoring case - returns zero if strings are * the same (ignoring case) otherwise returns difference * between first non-matching characters. */ { char c1, c2; for (;;) { c1 = toupper(*s1++); c2 = toupper(*s2++); if (c1 != c2) /* Takes care of end of string in one but not the other too */ return c2-c1; if (c1 == 0) /* Take care of end of string in both. */ return 0; } } int differentWordNullOk(char *s1, char *s2) /* Returns 0 if two strings (either of which may be NULL) * are the same, ignoring case. Otherwise returns the * difference between the first non-matching characters. */ { if (s1 == s2) return FALSE; else if (s1 == NULL) return -1; else if (s2 == NULL) return 1; else return differentWord(s1,s2); } int differentStringNullOk(char *a, char *b) /* Returns 0 if two strings (either of which may be NULL) * are the same. Otherwise it returns a positive or negative * number depending on the alphabetical order of the two * strings. * This is basically a strcmp that can handle NULLs in * the input. If used in a sort the NULLs will end * up before any of the cases with data. */ { if (a == b) return FALSE; else if (a == NULL) return -1; else if (b == NULL) return 1; else return strcmp(a,b) != 0; } boolean isEmptyTextField(char *s) /* Recognize NULL or dot as empty text */ { return (isEmpty(s) || sameString(".", s)); } boolean startsWith(const char *start, const char *string) /* Returns TRUE if string begins with start. */ { char c; int i; for (i=0; ;i += 1) { if ((c = start[i]) == 0) return TRUE; if (string[i] != c) return FALSE; } } boolean startsWithNoCase(const char *start, const char *string) /* Returns TRUE if string begins with start, case-insensitive. */ { char c; int i; for (i=0; ;i += 1) { if ((c = tolower(start[i])) == 0) return TRUE; if (tolower(string[i]) != c) return FALSE; } } boolean startsWithWord(char *firstWord, char *line) /* Return TRUE if first white-space-delimited word in line * is same as firstWord. Comparison is case sensitive. */ { int len = strlen(firstWord); int i; for (i=0; i<len; ++i) if (firstWord[i] != line[i]) return FALSE; char c = line[len]; return c == 0 || isspace(c); } boolean startsWithWordByDelimiter(char *firstWord,char delimit, char *line) /* Return TRUE if first word in line is same as firstWord as delimited by delimit. Comparison is case sensitive. Delimit of ' ' uses isspace() */ { if (delimit == ' ') return startsWithWord(firstWord,line); if (!startsWith(firstWord,line)) return FALSE; char c = line[strlen(firstWord)]; return (c == '\0' || c == delimit); } char * findWordByDelimiter(char *word,char delimit, char *line) /* Return pointer to first occurance of word in line broken by 'delimit' char Comparison is case sensitive. Delimit of ' ' uses isspace() */ { int ix; char *p=line; while (p!=NULL && *p!='\0') { for (ix = 0; word[ix] != '\0' && word[ix] == *p; ix++,p++) ; // advance as long as they match if (ix == strlen(word)) { if (*p=='\0' || *p==delimit || (delimit == ' ' && isspace(*p))) return p - ix; // matched and delimited } for (; *p!='\0' && *p!=delimit && (delimit != ' ' || !isspace(*p)); p++) ; // advance to next delimit if (*p!='\0') { p++; continue; // delimited so start again after delimit } } return NULL; } char *rStringIn(char *needle, char *haystack) /* Return last position of needle in haystack, or NULL if it's not there. */ { int nSize = strlen(needle); char *pos; for (pos = haystack + strlen(haystack) - nSize; pos >= haystack; pos -= 1) { if (memcmp(needle, pos, nSize) == 0) return pos; } return NULL; } char *nextStringBetween(char *start, char *end, char **pHaystack) /* Return next string that occurs between start and end strings * starting seach at *pHaystack. This will update *pHaystack to after * end, so it can be called repeatedly. Returns NULL when * no more to be found*/ { char *pos, *p; int len; char *haystack = *pHaystack; if (isEmpty(haystack)) return NULL; if ((p = stringIn(start, haystack)) != NULL) { pos = p + strlen(start); if (isEmpty(end)) return cloneString(pos); if ((p = stringIn(end, pos)) != NULL) { len = p - pos; pos = cloneMem(pos, len + 1); pos[len] = 0; *pHaystack = p; return pos; } } *pHaystack = NULL; return NULL; } char *stringBetween(char *start, char *end, char *haystack) /* Return string between start and end strings, or NULL if * none found. The first such instance is returned. * String must be freed by caller. */ { return nextStringBetween(start, end, &haystack); } boolean endsWith(char *string, char *end) /* Returns TRUE if string ends with end. */ { int sLen, eLen, offset; sLen = strlen(string); eLen = strlen(end); offset = sLen - eLen; if (offset < 0) return FALSE; return sameString(string+offset, end); } char lastChar(char *s) /* Return last character in string. */ { if (s == NULL || s[0] == 0) return 0; return s[strlen(s)-1]; } void trimLastChar(char *s) /* Erase last character in string. */ { int len = strlen(s); if (len > 0) s[len-1] = 0; } char *lastNonwhitespaceChar(char *s) // Return pointer to last character in string that is not whitespace. { if (s == NULL || s[0] == 0) return NULL; char *sPos = s + (strlen(s) - 1); for (;sPos >= s;sPos--) { if (!isspace(*sPos)) return sPos; } return NULL; } char *matchingCharBeforeInLimits(char *limit, char *s, char c) /* Look for character c sometime before s, but going no further than limit. * Return NULL if not found. */ { while (--s >= limit) if (*s == c) return s; return NULL; } char *memMatch(char *needle, int nLen, char *haystack, int hLen) /* Returns first place where needle (of nLen chars) matches * haystack (of hLen chars) */ { char c = *needle++; nLen -= 1; hLen -= nLen; while (--hLen >= 0) { if (*haystack++ == c && memcmp(needle, haystack, nLen) == 0) { return haystack-1; } } return NULL; } void toUpperN(char *s, int n) /* Convert a section of memory to upper case. */ { int i; for (i=0; i<n; ++i) s[i] = toupper(s[i]); } void toLowerN(char *s, int n) /* Convert a section of memory to lower case. */ { int i; for (i=0; i<n; ++i) s[i] = tolower(s[i]); } void toggleCase(char *s, int size) /* toggle upper and lower case chars in string. */ { char c; int i; for (i=0; i<size; ++i) { c = s[i]; if (isupper(c)) c = tolower(c); else if (islower(c)) c = toupper(c); s[i] = c; } } char *strUpper(char *s) /* Convert entire string to upper case. */ { char c; char *ss=s; for (;;) { if ((c = *ss) == 0) break; *ss++ = toupper(c); } return s; } void replaceChar(char *s, char oldc, char newc) /* Repace one char with another. Modifies original string. */ { if (!s) return; char c; while((c=*s)) { if (c == oldc) *s = newc; ++s; } } char *replaceChars(char *string, char *old, char *new) /* Replaces the old with the new. The old and new string need not be of equal size Can take any length string. Return value needs to be freeMem'd. */ { int numTimes = 0; int oldLen = strlen(old); int newLen = strlen(new); int strLen = 0; char *result = NULL; char *ptr = strstr(string, old); char *resultPtr = NULL; while(NULL != ptr) { numTimes++; ptr += oldLen; ptr = strstr(ptr, old); } strLen = max(strlen(string) + (numTimes * (newLen - oldLen)), strlen(string)); result = needMem(strLen + 1); ptr = strstr(string, old); resultPtr = result; while(NULL != ptr) { strLen = ptr - string; strcpy(resultPtr, string); string = ptr + oldLen; resultPtr += strLen; strcpy(resultPtr, new); resultPtr += newLen; ptr = strstr(string, old); } strcpy(resultPtr, string); return result; } int strSwapStrs(char *string, int sz,char *oldStr, char *newStr) /* Swaps all occurrences of the old with the new in string. Need not be same size Swaps in place but restricted by sz. Returns count of swaps or -1 for sz failure. */ { // WARNING: called at low level, so no errors allowed. int count = 0; char *p=NULL; for(p=strstr(string,oldStr);p!=NULL;p=strstr(p+strlen(oldStr),oldStr)) count++; if (count == 0) return 0; if((strlen(string)+(count*(strlen(newStr) - strlen(oldStr))))>=sz) return -1; for(p=strstr(string,oldStr);p!=NULL;p=strstr(p+strlen(newStr),oldStr)) { memmove(p+strlen(newStr),p+strlen(oldStr),strlen(p+strlen(oldStr))+1); // NULL at end is also moved! memcpy(p,newStr,strlen(newStr)); } return count; } char *strLower(char *s) /* Convert entire string to lower case */ { char c; char *ss=s; for (;;) { if ((c = *ss) == 0) break; *ss++ = tolower(c); } return s; } char * memSwapChar(char *s, int len, char oldChar, char newChar) /* Substitute newChar for oldChar throughout memory of given length. */ { int ix=0; for (;ix<len;ix++) { if (s[ix] == oldChar) s[ix] = newChar; } return s; } void stripChar(char *s, char c) /* Remove all occurences of c from s. */ { char *in = s, *out = s; char b; for (;;) { b = *out = *in++; if (b == 0) break; if (b != c) ++out; } } char *stripEnclosingChar(char *inout,char encloser) // Removes enclosing char if found at both beg and end, preserving pointer // Note: handles brackets '(','{' and '[' by complement at end { if (inout == NULL || strlen(inout) < 2 || *inout != encloser) return inout; char *end = inout + (strlen(inout) - 1); char closer = encloser; switch (closer) { case '(': closer = ')'; break; case '{': closer = '}'; break; case '[': closer = ']'; break; default: break; } if (*end != closer) return inout; *end = '\0'; return memmove(inout,inout+1,strlen(inout)); // use memmove to safely copy in place } void stripString(char *s, char *strip) /* Remove all occurences of strip from s. */ { char c, *in = s, *out = s; int stripSize = strlen(strip); char stripFirst = strip[0]; while ((c = *in) != 0) { c = *in; if (c == stripFirst) { if (startsWith(strip, in)) { in += stripSize; continue; } } *out = c; ++out; ++in; } *out = 0; } int countCase(char *s,boolean upper) // Count letters with case (upper or lower) { char a; int count = 0; while ((a = *s++) != 0) if (( upper && isupper(a)) || (!upper && islower(a))) ++count; return count; } int countChars(char *s, char c) /* Return number of characters c in string s. */ { char a; int count = 0; while ((a = *s++) != 0) if (a == c) ++count; return count; } int countCharsN(char *s, char c, int size) /* Return number of characters c in string s of given size. */ { int i; int count = 0; for (i=0; i<size; ++i) if (s[i] == c) ++count; return count; } int countLeadingChars(char *s, char c) /* Count number of characters c at start of string. */ { int count = 0; while (*s++ == c) ++count; return count; } int countLeadingDigits(const char *s) /* Return number of leading digits in s */ { int count = 0; while (isdigit(*s)) { ++count; ++s; } return count; } int countLeadingNondigits(const char *s) /* Count number of leading non-digit characters in s. */ { int count = 0; char c; while ((c = *s++) != 0) { if (isdigit(c)) break; ++count; } return count; } int countSeparatedItems(char *string, char separator) /* Count number of items in string you would parse out with given * separator, assuming final separator is optional. */ { int count = 0; char c, lastC = 0; while ((c = *string++) != 0) { if (c == separator) ++count; lastC = c; } if (lastC != separator && lastC != 0) ++count; return count; } int cmpStringsWithEmbeddedNumbers(const char *a, const char *b) /* Compare strings such as gene names that may have embedded numbers, * so that bmp4a comes before bmp14a */ { for (;;) { /* Figure out number of digits at start, and do numerical comparison if there * are any. If numbers agree step over numerical part, otherwise return difference. */ int aNum = countLeadingDigits(a); int bNum = countLeadingDigits(b); if (aNum >= 0 && bNum >= 0) { int diff = atoi(a) - atoi(b); if (diff != 0) return diff; a += aNum; b += bNum; } /* Count number of non-digits at start. */ int aNonNum = countLeadingNondigits(a); int bNonNum = countLeadingNondigits(b); // If different sizes of non-numerical part, then don't match, let strcmp sort out how if (aNonNum != bNonNum) return strcmp(a,b); // If no characters left then they are the same! else if (aNonNum == 0) return 0; // Non-numerical part is the same length and non-zero. See if it is identical. Return if not. else { int diff = memcmp(a,b,aNonNum); if (diff != 0) return diff; a += aNonNum; b += bNonNum; } } } int cmpWordsWithEmbeddedNumbers(const char *a, const char *b) /* Case insensitive version of cmpStringsWithEmbeddedNumbers. */ { char *A = cloneString(a); char *B = cloneString(b); int diff = cmpStringsWithEmbeddedNumbers(strUpper(A), strUpper(B)); freeMem(A); freeMem(B); return diff; } int countSame(char *a, char *b) /* Count number of characters that from start in a,b that are same. */ { char c; int i; int count = 0; for (i=0; ; ++i) { c = a[i]; if (b[i] != c) break; if (c == 0) break; ++count; } return count; } /* int chopString(in, sep, outArray, outSize); */ /* This chops up the input string (cannabilizing it) * into an array of zero terminated strings in * outArray. It returns the number of strings. * If you pass in NULL for outArray, it will just * return the number of strings that it *would* * chop. This splits the string. * GOTCHA: since multiple separators are skipped * and treated as one, it is impossible to parse * a list with an empty string. * e.g. cat\t\tdog returns only cat and dog but no empty string */ int chopString(char *in, char *sep, char *outArray[], int outSize) { int recordCount = 0; for (;;) { if (outArray != NULL && recordCount >= outSize) break; /* Skip initial separators. */ in += strspn(in, sep); if (*in == 0) break; if (outArray != NULL) outArray[recordCount] = in; recordCount += 1; in += strcspn(in, sep); if (*in == 0) break; if (outArray != NULL) *in = 0; in += 1; } return recordCount; } int chopByWhite(char *in, char *outArray[], int outSize) /* Like chopString, but specialized for white space separators. * See the GOTCHA in chopString */ { int recordCount = 0; char c; for (;;) { if (outArray != NULL && recordCount >= outSize) break; /* Skip initial separators. */ while (isspace(*in)) ++in; if (*in == 0) break; /* Store start of word and look for end of word. */ if (outArray != NULL) outArray[recordCount] = in; recordCount += 1; for (;;) { if ((c = *in) == 0) break; if (isspace(c)) break; ++in; } if (*in == 0) break; /* Tag end of word with zero. */ if (outArray != NULL) *in = 0; /* And skip over the zero. */ in += 1; } return recordCount; } int chopByWhiteRespectDoubleQuotes(char *in, char *outArray[], int outSize) // NOTE: this routine does not do what this comment says. It did not ever remove quotes due to // a coding error so I took out the code that pretended to be doing this. /* Like chopString, but specialized for white space separators. * Further, any doubleQuotes (") are respected. * If doubleQuote is encloses whole string, then they are removed: * "Fred and Ethyl" results in word [Fred and Ethyl] * If doubleQuotes exist inside string they are retained: * Fred" and Ethyl" results in word [Fred" and Ethyl"] * Special note "" is a valid, though empty word. */ { int recordCount = 0; char c; boolean quoting = FALSE; for (;;) { if (outArray != NULL && recordCount >= outSize) break; /* Skip initial separators. */ while (isspace(*in)) ++in; if (*in == 0) break; /* Store start of word and look for end of word. */ if (outArray != NULL) outArray[recordCount] = in; recordCount += 1; quoting = FALSE; for (;;) { if ((c = *in) == 0) break; if (quoting) { if (c == '"') quoting = FALSE; } else { quoting = (c == '"'); if (isspace(c)) break; } ++in; } if (*in == 0) break; /* Tag end of word with zero. */ if (outArray != NULL) *in = 0; /* And skip over the zero. */ in += 1; } return recordCount; } int chopByCharRespectDoubleQuotes(char *in, char sep, char *outArray[], int outSize) /* Chop a string into sep delimited strings but honor double quotes */ { int recordCount = 0; char c; boolean quoting = FALSE; for (;;) { if (outArray != NULL && recordCount >= outSize) break; // skip initial sep while ((*in) == sep) ++in; if (*in == 0) break; if (outArray != NULL) outArray[recordCount] = in; recordCount += 1; quoting = FALSE; for (;;) { if ((c = *in) == 0) break; if (quoting) { if (c == '"') quoting = FALSE; } else { quoting = (c == '"'); if (c == sep) break; } ++in; } if (*in == 0) break; // Tag end of word with zero if (outArray != NULL) *in = 0; in += 1; } return recordCount; } int chopByChar(char *in, char chopper, char *outArray[], int outSize) /* Chop based on a single character. */ { int i; char c; if (*in == 0) return 0; for (i=0; (i<outSize) || (outArray==NULL); ++i) { if (outArray != NULL) outArray[i] = in; for (;;) { if ((c = *in++) == 0) return i+1; else if (c == chopper) { if (outArray != NULL) in[-1] = 0; break; } } } return i; } char crLfChopper[] = "\n\r"; char whiteSpaceChopper[] = " \t\n\r"; char *skipBeyondDelimit(char *s,char delimit) /* Returns NULL or pointer to first char beyond one (or more contiguous) delimit char. If delimit is ' ' then skips beyond first patch of whitespace. */ { if (s != NULL) { char *beyond = NULL; if (delimit == ' ') return skipLeadingSpaces(skipToSpaces(s)); else beyond = strchr(s,delimit); if (beyond != NULL) { for (beyond++;*beyond == delimit;beyond++) ; if (*beyond != '\0') return beyond; } } return NULL; } char *skipLeadingSpaces(const char *stringIn) /* Return first non-white space. */ { char c, *s = (char *)stringIn; if (s == NULL) return NULL; for (;;) { c = *s; if (!isspace(c)) return s; ++s; } } char *skipToSpaces(const char *stringIn) /* Return first white space or NULL if none.. */ { char c, *s = (char *)stringIn; if (s == NULL) return NULL; for (;;) { c = *s; if (c == 0) return NULL; if (isspace(c)) return s; ++s; } } int eraseTrailingSpaces(char *s) /* Replace trailing white space with zeroes. Returns number of * spaces erased. */ { int len = strlen(s); int i; char c; int erased = 0; for (i=len-1; i>=0; --i) { c = s[i]; if (isspace(c)) { s[i] = 0; ++erased; } else break; } return erased; } /* Remove white space from a string */ void eraseWhiteSpace(char *s) { char *in, *out; char c; in = out = s; for (;;) { c = *in++; if (c == 0) break; if (!isspace(c)) *out++ = c; } *out++ = 0; } /* Remove any chars leaving digits only */ void eraseNonDigits(char *s) { char *in, *out; char c; in = out = s; for (;;) { c = *in++; if (c == 0) break; if (isdigit(c)) *out++ = c; } *out = 0; } /* Remove non-alphanumeric chars from string */ void eraseNonAlphaNum(char *s) { char *in, *out; char c; in = out = s; for (;;) { c = *in++; if (c == 0) break; if (isalnum(c)) *out++ = c; } *out = 0; } char *trimSpaces(char *s) /* Remove leading and trailing white space. */ { if (s != NULL) { s = skipLeadingSpaces(s); eraseTrailingSpaces(s); } return s; } void repeatCharOut(FILE *f, char c, int count) /* Write character to file repeatedly. */ { while (--count >= 0) fputc(c, f); } void spaceOut(FILE *f, int count) /* Put out some spaces to file. */ { repeatCharOut(f, ' ', count); } void starOut(FILE *f, int count) /* Put out some asterisks to file. */ { repeatCharOut(f, '*', count); } boolean hasWhiteSpace(char *s) /* Return TRUE if there is white space in string. */ { char c; while ((c = *s++) != 0) if (isspace(c)) return TRUE; return FALSE; } char *firstWordInLine(char *line) /* Returns first word in line if any (white space separated). * Puts 0 in place of white space after word. */ { char *e; line = skipLeadingSpaces(line); if ((e = skipToSpaces(line)) != NULL) *e = 0; return line; } char *cloneFirstWord(char *line) /* Clone first word in line */ { char *startFirstWord = skipLeadingSpaces(line); if (startFirstWord == NULL) return NULL; char *endFirstWord = skipToSpaces(startFirstWord); if (endFirstWord == NULL) return cloneString(startFirstWord); else return cloneStringZ(startFirstWord, endFirstWord - startFirstWord); } char *cloneNotFirstWord(char *s) /* return part of string after first space, not changing s. Result has to be freed. */ { if (s==NULL) return cloneString(""); char* spcPos = stringIn(" ", s); if (spcPos==NULL) return cloneString(""); return cloneString(spcPos+1); } char *lastWordInLine(char *line) /* Returns last word in line if any (white space separated). * Returns NULL if string is empty. Removes any terminating white space * from line. */ { char *s = line; char *word = NULL, *wordEnd = NULL; for (;;) { s = skipLeadingSpaces(s); if (s == NULL || s[0] == 0) break; word = s; s = wordEnd = skipToSpaces(s); if (s == NULL) break; } if (wordEnd != NULL) *wordEnd = 0; return word; } char *nextWord(char **pLine) /* Return next word in *pLine and advance *pLine to next * word. */ { char *s = *pLine, *e; if (s == NULL || s[0] == 0) return NULL; s = skipLeadingSpaces(s); if (s[0] == 0) return NULL; e = skipToSpaces(s); if (e != NULL) *e++ = 0; *pLine = e; return s; } char *nextTabWord(char **pLine) /* Return next tab-separated word. */ { char *s = *pLine; char *e; if (s == NULL || *s == '\n' || *s == 0) { *pLine = NULL; return NULL; } e = strchr(s, '\t'); if (e == NULL) { e = strchr(s, '\n'); if (e != NULL) *e = 0; *pLine = NULL; } else { *e++ = 0; *pLine = e; } return s; } char *cloneFirstWordByDelimiterNoSkip(char *line,char delimit) /* Returns a cloned first word, not harming the memory passed in. Does not skip leading white space.*/ { if (line == NULL || *line == 0) return NULL; int size=0; char *e; for (e=line;*e!=0;e++) { if (*e==delimit) break; else if (delimit == ' ' && isspace(*e)) break; size++; } if (size == 0) return NULL; char *new = needMem(size + 2); // Null terminated by 2 memcpy(new, line, size); return new; } char *cloneFirstWordByDelimiter(char *line,char delimit) /* Returns a cloned first word, not harming the memory passed in. Skips over leading white space. */ { if (line == NULL || *line == 0) return NULL; line = skipLeadingSpaces(line); return cloneFirstWordByDelimiterNoSkip(line, delimit); } char *cloneNextWordByDelimiter(char **line,char delimit) /* Returns a cloned first word, advancing the line pointer but not harming memory passed in */ { char *new = cloneFirstWordByDelimiter(*line,delimit); if (new != NULL) { *line = skipLeadingSpaces(*line); *line += strlen(new); if ( **line != 0) (*line)++; } return new; } char *nextStringInList(char **pStrings) /* returns pointer to the first string and advances pointer to next in list of strings dilimited by 1 null and terminated by 2 nulls. */ { if (pStrings == NULL || *pStrings == NULL || **pStrings == 0) return NULL; char *p=*pStrings; *pStrings += strlen(p)+1; return p; } int cntStringsInList(char *pStrings) /* returns count of strings in a list of strings dilimited by 1 null and terminated by 2 nulls. */ { int cnt=0; char *p = pStrings; while (nextStringInList(&p) != NULL) cnt++; return cnt; } int stringArrayIx(char *string, char *array[], int arraySize) /* Return index of string in array or -1 if not there. */ { int i; for (i=0; i<arraySize; ++i) if (!differentWord(array[i], string)) return i; return -1; } int cmpStringOrder(char *a, char *b, char **orderFields, int orderCount) /* Compare two strings to sort in same order as orderedFields. If strings are * not in order, will sort them to be after all ordered fields, alphabetically */ { int aIx = stringArrayIx(a, orderFields, orderCount); int bIx = stringArrayIx(b, orderFields, orderCount); if (aIx < 0) // A not in list? { if (bIx < 0) // Neither in list, be alphabetical return(strcmp(a, b)); else // Only b in list, move a towards end return 1; } else if (bIx < 0) // Only a in list, move b towards end return -1; else return aIx - bIx; // Both in ordered list, just subtract indexes to sort } int ptArrayIx(void *pt, void *array, int arraySize) /* Return index of pt in array or -1 if not there. */ { int i; void **a = array; for (i=0; i<arraySize; ++i) { if (pt == a[i]) return i; } return -1; } FILE *mustOpen(char *fileName, char *mode) /* Open a file - or squawk and die. */ { FILE *f; if (sameString(fileName, "stdin")) return stdin; if (sameString(fileName, "stdout")) return stdout; if ((f = fopen(fileName, mode)) == NULL) { char *modeName = ""; if (mode) { if (mode[0] == 'r') modeName = " to read"; else if (mode[0] == 'w') modeName = " to write"; else if (mode[0] == 'a') modeName = " to append"; } errAbort("mustOpen: Can't open %s%s: %s", fileName, modeName, strerror(errno)); } return f; } void mustWrite(FILE *file, void *buf, size_t size) /* Write to a file or squawk and die. */ { if (size != 0 && fwrite(buf, size, 1, file) != 1) { errAbort("Error writing %lld bytes: %s\n", (long long)size, strerror(ferror(file))); } } void mustRead(FILE *file, void *buf, size_t size) /* Read size bytes from a file or squawk and die. */ { if (size != 0 && fread(buf, size, 1, file) != 1) { if (ferror(file)) errAbort("Error reading %lld bytes: %s", (long long)size, strerror(ferror(file))); else errAbort("End of file reading %lld bytes", (long long)size); } } void writeString(FILE *f, char *s) /* Write a 255 or less character string to a file. Truncate if longer. This * will write the length of the string in the first byte then the string * itself. */ { UBYTE bLen; int len = strlen(s); if (len > 255) { warn("String too long in writeString (%d chars):\n%s", len, s); len = 255; } bLen = len; writeOne(f, bLen); mustWrite(f, s, len); } void writeStringSafe(FILE *f, char *s) /* Write a 255 or less character string to a file. Generate an error if * longer. This will write the length of the string in the first byte then * the string itself. */ { if (strlen(s) > 255) errAbort("attempt to write string longer than 255 bytes"); writeString(f, s); } char *readString(FILE *f) /* Read a string (written with writeString) into * memory. freeMem the result when done. */ { UBYTE bLen; int len; char *s; if (!readOne(f, bLen)) return NULL; len = bLen; s = needMem(len+1); if (len > 0) mustRead(f, s, len); return s; } char *mustReadString(FILE *f) /* Read a string. Squawk and die at EOF or if any problem. */ { char *s = readString(f); if (s == NULL) errAbort("Couldn't read string"); return s; } boolean fastReadString(FILE *f, char buf[256]) /* Read a string into buffer, which must be long enough * to hold it. String is in 'writeString' format. */ { UBYTE bLen; int len; if (!readOne(f, bLen)) return FALSE; if ((len = bLen)> 0) mustRead(f, buf, len); buf[len] = 0; return TRUE; } void msbFirstWriteBits64(FILE *f, bits64 x) /* Write out 64 bit number in manner that is portable across architectures */ { int i; UBYTE buf[8]; for (i=7; i>=0; --i) { buf[i] = (UBYTE)(x&0xff); x >>= 8; } mustWrite(f, buf, 8); } bits64 msbFirstReadBits64(FILE *f) /* Write out 64 bit number in manner that is portable across architectures */ { int i; UBYTE buf[8]; bits64 x = 0; mustRead(f, buf, 8); for (i=0; i<8; ++i) { x <<= 8; x |= buf[i]; } return x; } void mustGetLine(FILE *file, char *buf, int charCount) /* Read at most charCount-1 bytes from file, but stop after newline if one is * encountered. The string in buf is '\0'-terminated. (See man 3 fgets.) * Die if there is an error. */ { char *success = fgets(buf, charCount, file); if (success == NULL && charCount > 0) buf[0] = '\0'; if (ferror(file)) errAbort("mustGetLine: fgets failed: %s", strerror(ferror(file))); } static char *getWhenceStr(int whence) /* get string description of fseek/lseek whence parameter */ { return ((whence == SEEK_SET) ? "SEEK_SET" : (whence == SEEK_CUR) ? "SEEK_CUR" : (whence == SEEK_END) ? "SEEK_END" : "invalid 'whence' value"); } void mustSeek(FILE *file, off_t offset, int whence) /* Seek to given offset, relative to whence (see man fseek) in file or errAbort. */ { int ret = fseek(file, offset, whence); if (ret < 0) errnoAbort("fseek(%lld, %s (%d)) failed", (long long)offset, getWhenceStr(whence), whence); } int mustOpenFd(char *fileName, int flags) /* Open a file descriptor (see man 2 open) or squawk and die. */ { if (sameString(fileName, "stdin")) return STDIN_FILENO; if (sameString(fileName, "stdout")) return STDOUT_FILENO; // mode is necessary when O_CREAT is given, ignored otherwise int mode = 0666; int fd = open(fileName, flags, mode); if (fd < 0) { char *modeName = ""; if ((flags & (O_WRONLY | O_CREAT | O_TRUNC)) == (O_WRONLY | O_CREAT | O_TRUNC)) modeName = " to create and truncate"; else if ((flags & (O_WRONLY | O_CREAT)) == (O_WRONLY | O_CREAT)) modeName = " to create"; else if ((flags & O_WRONLY) == O_WRONLY) modeName = " to write"; else if ((flags & O_RDWR) == O_RDWR) modeName = " to append"; else modeName = " to read"; errnoAbort("mustOpenFd: Can't open %s%s", fileName, modeName); } return fd; } void mustReadFd(int fd, void *buf, size_t size) /* Read size bytes from a file or squawk and die. */ { ssize_t actualSize; char *cbuf = buf; // using a loop because linux was not returning all data in a single request when request size exceeded 2GB. // MacOS complains invalid argument if it is over 2GB while (size > 0) { actualSize = read(fd, cbuf, min(0x7FFF000,size)); // max 2GB 0x7FFF000 MAX_RW_COUNT = (INT_MAX & PAGE_MASK) if (actualSize < 0) errnoAbort("Error reading %lld bytes", (long long)size); if (actualSize == 0) errAbort("End of file reading %llu bytes (got %lld)", (unsigned long long)size, (long long)actualSize); cbuf += actualSize; size -= actualSize; } } void mustWriteFd(int fd, void *buf, size_t size) /* Write size bytes to file descriptor fd or die. (See man 2 write.) */ { ssize_t result = write(fd, buf, size); if (result < size) { if (result < 0) errnoAbort("mustWriteFd: write failed"); else errAbort("mustWriteFd only wrote %lld of %lld bytes. Likely the disk is full.", (long long)result, (long long)size); } } off_t mustLseek(int fd, off_t offset, int whence) /* Seek to given offset, relative to whence (see man lseek) in file descriptor fd or errAbort. * Return final offset (e.g. if this is just an (fd, 0, SEEK_CUR) query for current position). */ { off_t ret = lseek(fd, offset, whence); if (ret < 0) errnoAbort("lseek(%d, %lld, %s (%d)) failed", fd, (long long)offset, getWhenceStr(whence), whence); return ret; } void mustCloseFd(int *pFd) /* Close file descriptor *pFd if >= 0, abort if there's an error, set *pFd = -1. */ { if (pFd != NULL && *pFd >= 0) { if (close(*pFd) < 0) errnoAbort("close failed"); *pFd = -1; } } char *addSuffix(char *head, char *suffix) /* Return a needMem'd string containing "headsuffix". Should be free'd when finished. */ { char *ret = NULL; int size = strlen(head) + strlen(suffix) +1; ret = needMem(sizeof(char)*size); snprintf(ret, size, "%s%s", head, suffix); return ret; } void chopSuffix(char *s) /* Remove suffix (last . in string and beyond) if any. */ { char *e = strrchr(s, '.'); if (e != NULL) *e = 0; } void chopSuffixAt(char *s, char c) /* Remove end of string from first occurrence of char c. * chopSuffixAt(s, '.') is equivalent to regular chopSuffix. */ { char *e = strrchr(s, c); if (e != NULL) *e = 0; } char *chopPrefixAt(char *s, char c) /* Like chopPrefix, but can chop on any character, not just '.' */ { char *e = strchr(s, c); if (e == NULL) return s; *e++ = 0; return e; } char *chopPrefix(char *s) /* This will replace the first '.' in a string with * 0, and return the character after this. If there * is no '.' in the string this will just return the * unchanged s passed in. */ { return chopPrefixAt(s, '.'); } boolean carefulCloseWarn(FILE **pFile) /* Close file if open and null out handle to it. * Return FALSE and print a warning message if there * is a problem.*/ { FILE *f; boolean ok = TRUE; if ((pFile != NULL) && ((f = *pFile) != NULL)) { if (f != stdin && f != stdout) { if (fclose(f) != 0) { errnoWarn("fclose failed"); ok = FALSE; } } else if (f == stdout) { // One expects close() to actually flush the file and close it. If // the file was opened using the magic name "stdout" and then does a // setvbuf(), writes to file, calls carefulClose, then frees the // buffer, the FILE object points to invalid memory. Then the exit() // I/O cleanup causes the invalid memory to be written to the file, // possible outputting corruption data. If would be consistent with // stdio behavior to have "stdout" magic name open "/dev/stdout". fflush(f); } *pFile = NULL; } return ok; } void carefulClose(FILE **pFile) /* Close file if open and null out handle to it. * Warn and abort if there's a problem. */ { if (!carefulCloseWarn(pFile)) noWarnAbort(); } char *firstWordInFile(char *fileName, char *wordBuf, int wordBufSize) /* Read the first word in file into wordBuf. */ { FILE *f = mustOpen(fileName, "r"); mustGetLine(f, wordBuf, wordBufSize); fclose(f); return trimSpaces(wordBuf); } int fileOffsetSizeCmp(const void *va, const void *vb) /* Help sort fileOffsetSize by offset. */ { const struct fileOffsetSize *a = *((struct fileOffsetSize **)va); const struct fileOffsetSize *b = *((struct fileOffsetSize **)vb); if (a->offset > b->offset) return 1; else if (a->offset == b->offset) return 0; else return -1; } struct fileOffsetSize *fileOffsetSizeMerge(struct fileOffsetSize *inList) /* Returns a new list which is inList transformed to have adjacent blocks * merged. Best to use this with a sorted list. */ { struct fileOffsetSize *newList = NULL, *newEl = NULL, *oldEl, *nextOld; for (oldEl = inList; oldEl != NULL; oldEl = nextOld) { nextOld = oldEl->next; if (nextOld != NULL && nextOld->offset < oldEl->offset) errAbort("Unsorted inList in fileOffsetSizeMerge %llu %llu", oldEl->offset, nextOld->offset); if (newEl == NULL || newEl->offset + newEl->size < oldEl->offset) { newEl = CloneVar(oldEl); slAddHead(&newList, newEl); } else { newEl->size = oldEl->offset + oldEl->size - newEl->offset; } } slReverse(&newList); return newList; } void fileOffsetSizeFindGap(struct fileOffsetSize *list, struct fileOffsetSize **pBeforeGap, struct fileOffsetSize **pAfterGap) /* Starting at list, find all items that don't have a gap between them and the previous item. * Return at gap, or at end of list, returning pointers to the items before and after the gap. */ { struct fileOffsetSize *pt, *next; for (pt = list; ; pt = next) { next = pt->next; if (next == NULL || next->offset != pt->offset + pt->size) { *pBeforeGap = pt; *pAfterGap = next; return; } } } void mustSystem(char *cmd) /* Execute cmd using "sh -c" or die. (See man 3 system.) fail on errors */ { if (cmd == NULL) // don't allow (system() supports testing for shell this way) errAbort("mustSystem: called with NULL command."); int status = system(cmd); if (status == -1) errnoAbort("error starting command: %s", cmd); else if (WIFSIGNALED(status)) errAbort("command terminated by signal %d: %s", WTERMSIG(status), cmd); else if (WIFEXITED(status)) { if (WEXITSTATUS(status) != 0) errAbort("command exited with %d: %s", WEXITSTATUS(status), cmd); } else errAbort("bug: invalid exit status for command: %s", cmd); } int roundingScale(int a, int p, int q) /* returns rounded a*p/q */ { if (a > 100000 || p > 100000) { double x = a; x *= p; x /= q; return round(x); } else return (a*p + q/2)/q; } int intAbs(int a) /* Return integer absolute value */ { return (a >= 0 ? a : -a); } int rangeIntersection(int start1, int end1, int start2, int end2) /* Return amount of bases two ranges intersect over, 0 or negative if no * intersection. */ { int s = max(start1,start2); int e = min(end1,end2); return e-s; } int positiveRangeIntersection(int start1, int end1, int start2, int end2) /* Return number of bases in intersection of two ranges, or * zero if they don't intersect. */ { int ret = rangeIntersection(start1,end1,start2,end2); if (ret < 0) ret = 0; return ret; } bits64 byteSwap64(bits64 a) /* Return byte-swapped version of a */ { union {bits64 whole; UBYTE bytes[8];} u,v; u.whole = a; v.bytes[0] = u.bytes[7]; v.bytes[1] = u.bytes[6]; v.bytes[2] = u.bytes[5]; v.bytes[3] = u.bytes[4]; v.bytes[4] = u.bytes[3]; v.bytes[5] = u.bytes[2]; v.bytes[6] = u.bytes[1]; v.bytes[7] = u.bytes[0]; return v.whole; } bits64 readBits64(FILE *f, boolean isSwapped) /* Read and optionally byte-swap 64 bit entity. */ { bits64 val; mustReadOne(f, val); if (isSwapped) val = byteSwap64(val); return val; } bits64 fdReadBits64(int fd, boolean isSwapped) /* Read and optionally byte-swap 64 bit entity. */ { bits64 val; mustReadOneFd(fd, val); if (isSwapped) val = byteSwap64(val); return val; } bits64 memReadBits64(char **pPt, boolean isSwapped) /* Read and optionally byte-swap 64 bit entity from memory buffer pointed to by * *pPt, and advance *pPt past read area. */ { bits64 val; memcpy(&val, *pPt, sizeof(val)); if (isSwapped) val = byteSwap64(val); *pPt += sizeof(val); return val; } bits32 byteSwap32(bits32 a) /* Return byte-swapped version of a */ { union {bits32 whole; UBYTE bytes[4];} u,v; u.whole = a; v.bytes[0] = u.bytes[3]; v.bytes[1] = u.bytes[2]; v.bytes[2] = u.bytes[1]; v.bytes[3] = u.bytes[0]; return v.whole; } bits32 readBits32(FILE *f, boolean isSwapped) /* Read and optionally byte-swap 32 bit entity. */ { bits32 val; mustReadOne(f, val); if (isSwapped) val = byteSwap32(val); return val; } bits32 fdReadBits32(int fd, boolean isSwapped) /* Read and optionally byte-swap 32 bit entity. */ { bits32 val; mustReadOneFd(fd, val); if (isSwapped) val = byteSwap32(val); return val; } bits32 memReadBits32(char **pPt, boolean isSwapped) /* Read and optionally byte-swap 32 bit entity from memory buffer pointed to by * *pPt, and advance *pPt past read area. */ { bits32 val; memcpy(&val, *pPt, sizeof(val)); if (isSwapped) val = byteSwap32(val); *pPt += sizeof(val); return val; } bits16 byteSwap16(bits16 a) /* Return byte-swapped version of a */ { union {bits16 whole; UBYTE bytes[2];} u,v; u.whole = a; v.bytes[0] = u.bytes[1]; v.bytes[1] = u.bytes[0]; return v.whole; } bits16 readBits16(FILE *f, boolean isSwapped) /* Read and optionally byte-swap 16 bit entity. */ { bits16 val; mustReadOne(f, val); if (isSwapped) val = byteSwap16(val); return val; } bits16 fdReadBits16(int fd, boolean isSwapped) /* Read and optionally byte-swap 16 bit entity. */ { bits16 val; mustReadOneFd(fd, val); if (isSwapped) val = byteSwap16(val); return val; } bits16 memReadBits16(char **pPt, boolean isSwapped) /* Read and optionally byte-swap 16 bit entity from memory buffer pointed to by * *pPt, and advance *pPt past read area. */ { bits16 val; memcpy(&val, *pPt, sizeof(val)); if (isSwapped) val = byteSwap16(val); *pPt += sizeof(val); return val; } double byteSwapDouble(double a) /* Return byte-swapped version of a */ { union {double whole; UBYTE bytes[8];} u,v; u.whole = a; v.bytes[0] = u.bytes[7]; v.bytes[1] = u.bytes[6]; v.bytes[2] = u.bytes[5]; v.bytes[3] = u.bytes[4]; v.bytes[4] = u.bytes[3]; v.bytes[5] = u.bytes[2]; v.bytes[6] = u.bytes[1]; v.bytes[7] = u.bytes[0]; return v.whole; } double readDouble(FILE *f, boolean isSwapped) /* Read and optionally byte-swap double-precision floating point entity. */ { double val; mustReadOne(f, val); if (isSwapped) val = byteSwapDouble(val); return val; } double memReadDouble(char **pPt, boolean isSwapped) /* Read and optionally byte-swap double-precision floating point entity * from memory buffer pointed to by *pPt, and advance *pPt past read area. */ { double val; memcpy(&val, *pPt, sizeof(val)); if (isSwapped) val = byteSwapDouble(val); *pPt += sizeof(val); return val; } float byteSwapFloat(float a) /* Return byte-swapped version of a */ { union {float whole; UBYTE bytes[4];} u,v; u.whole = a; v.bytes[0] = u.bytes[3]; v.bytes[1] = u.bytes[2]; v.bytes[2] = u.bytes[1]; v.bytes[3] = u.bytes[0]; return v.whole; } float readFloat(FILE *f, boolean isSwapped) /* Read and optionally byte-swap single-precision floating point entity. */ { float val; mustReadOne(f, val); if (isSwapped) val = byteSwapFloat(val); return val; } float memReadFloat(char **pPt, boolean isSwapped) /* Read and optionally byte-swap single-precision floating point entity * from memory buffer pointed to by *pPt, and advance *pPt past read area. */ { float val; memcpy(&val, *pPt, sizeof(val)); if (isSwapped) val = byteSwapFloat(val); *pPt += sizeof(val); return val; } void removeReturns(char *dest, char *src) /* Removes the '\r' character from a string. * The source and destination strings can be the same, if there are * no other threads */ { int i = 0; int j = 0; /* until the end of the string */ for (;;) { /* skip the returns */ while(src[j] == '\r') j++; /* copy the characters */ dest[i] = src[j]; /* check to see if done */ if(src[j] == '\0') break; /* advance the counters */ i++; j++; } } char* readLine(FILE* fh) /* Read a line of any size into dynamic memory, return null on EOF */ { int bufCapacity = 256; int bufSize = 0; char* buf = needMem(bufCapacity); int ch; /* loop until EOF of EOLN */ while (((ch = getc(fh)) != EOF) && (ch != '\n')) { /* expand if almost full, always keep one extra char for * zero termination */ if (bufSize >= bufCapacity-2) { bufCapacity *= 2; buf = realloc(buf, bufCapacity); if (buf == NULL) { errAbort("Out of memory in readline - request size %d bytes", bufCapacity); } } buf[bufSize++] = ch; } /* only return EOF if no data was read */ if ((ch == EOF) && (bufSize == 0)) { freeMem(buf); return NULL; } buf[bufSize] = '\0'; return buf; } boolean fileExists(char *fileName) /* Return TRUE if file exists (may replace this with non- * portable faster way some day). */ { /* To make piping easier stdin and stdout always exist. */ if (sameString(fileName, "stdin")) return TRUE; if (sameString(fileName, "stdout")) return TRUE; return fileSize(fileName) != -1; } /* Friendly name for strstrNoCase */ char *containsStringNoCase(char *haystack, char *needle) { return strstrNoCase(haystack, needle); } char *strstrNoCase(char *haystack, char *needle) /* A case-insensitive strstr function Will also robustly handle null strings param haystack - The string to be searched param needle - The string to look for in the haystack string return - The position of the first occurence of the desired substring or -1 if it is not found */ { char *haystackCopy = NULL; char *needleCopy = NULL; int index = 0; int haystackLen = 0; int needleLen = 0; char *p, *q; if (NULL == haystack || NULL == needle) { return NULL; } haystackLen = strlen(haystack); needleLen = strlen(needle); haystackCopy = (char*) needMem(haystackLen + 1); needleCopy = (char*) needMem(needleLen + 1); for(index = 0; index < haystackLen; index++) { haystackCopy[index] = tolower(haystack[index]); } haystackCopy[haystackLen] = 0; /* Null terminate */ for(index = 0; index < needleLen; index++) { needleCopy[index] = tolower(needle[index]); } needleCopy[needleLen] = 0; /* Null terminate */ p=strstr(haystackCopy, needleCopy); q=haystackCopy; freeMem(haystackCopy); freeMem(needleCopy); if(p==NULL) return NULL; return p-q+haystack; } int vatruncatef(char *buf, int size, char *format, va_list args) /* Like vasafef, but truncates the formatted string instead of barfing on * overflow. */ { char *truncStr = " [truncated]"; int sz = vsnprintf(buf, size, format, args); /* note that some version return -1 if too small */ if ((sz < 0) || (sz >= size)) strncpy(buf + size - 1 - strlen(truncStr), truncStr, strlen(truncStr)); buf[size-1] = 0; return sz; } void truncatef(char *buf, int size, char *format, ...) /* Like safef, but truncates the formatted string instead of barfing on * overflow. */ { va_list args; va_start(args, format); vatruncatef(buf, size, format, args); // ignore returned size va_end(args); } int vasafef(char* buffer, int bufSize, char *format, va_list args) /* Format string to buffer, vsprintf style, only with buffer overflow * checking. The resulting string is always terminated with zero byte. */ { int sz = vsnprintf(buffer, bufSize, format, args); /* note that some version return -1 if too small */ if ((sz < 0) || (sz >= bufSize)) { buffer[bufSize-1] = (char) 0; errAbort("buffer overflow, size %d, format: %s, buffer: '%s'", bufSize, format, buffer); } return sz; } int safef(char* buffer, int bufSize, char *format, ...) /* Format string to buffer, vsprintf style, only with buffer overflow * checking. The resulting string is always terminated with zero byte. */ { int sz; va_list args; va_start(args, format); sz = vasafef(buffer, bufSize, format, args); va_end(args); return sz; } int safefcat(char* buffer, int bufSize, char *format, ...) /* Safely format string to the end of the buffer. Returns number of characters * appended. */ { int sz, len = strlen(buffer);; va_list args; va_start(args, format); sz = vasafef(buffer + len, bufSize - len, format, args); va_end(args); return sz; } void safecpy(char *buf, size_t bufSize, const char *src) /* copy a string to a buffer, with bounds checking.*/ { size_t slen = strlen(src); if (slen > bufSize-1) errAbort("buffer overflow, size %lld, string size: %lld", (long long)bufSize, (long long)slen); strcpy(buf, src); } void safencpy(char *buf, size_t bufSize, const char *src, size_t n) /* copy n characters from a string to a buffer, with bounds checking. * Unlike strncpy, always null terminates the result */ { if (n > bufSize-1) errAbort("buffer overflow, size %lld, substring size: %lld", (long long)bufSize, (long long)n); // strlen(src) can take a long time when src is for example a pointer into a chromosome sequence. // Instead of setting slen to max(strlen(src), n), just stop counting length at n. size_t slen = 0; while (src[slen] != '\0' && slen < n) slen++; strncpy(buf, src, n); buf[slen] = '\0'; } void safecat(char *buf, size_t bufSize, const char *src) /* Append a string to a buffer, with bounds checking.*/ { size_t blen = strlen(buf); size_t slen = strlen(src); if (blen+slen > bufSize-1) errAbort("buffer overflow, size %lld, new string size: %lld", (long long)bufSize, (long long)(blen+slen)); strcat(buf, src); } void safencat(char *buf, size_t bufSize, const char *src, size_t n) /* append n characters from a string to a buffer, with bounds checking. */ { size_t blen = strlen(buf); if (blen+n > bufSize-1) errAbort("buffer overflow, size %lld, new string size: %lld", (long long)bufSize, (long long)(blen+n)); size_t slen = strlen(src); if (slen > n) slen = n; strncat(buf, src, n); buf[blen+slen] = '\0'; } void safememset(char *buf, size_t bufSize, const char c, size_t n) /* Append a character to a buffer repeatedly, n times with bounds checking.*/ { size_t blen = strlen(buf); if (blen+n+1 > bufSize) errAbort("buffer overflow, size %lld, new string size: %lld", (long long)bufSize, (long long)(blen+n)); memset(buf+blen, c, n); buf[blen+n] = 0; } static char *naStr = "n/a"; static char *emptyStr = ""; char *naForNull(char *s) /* Return 'n/a' if s is NULL, otherwise s. */ { if (s == NULL) s = naStr; return s; } char *naForEmpty(char *s) /* Return n/a if s is "" or NULL, otherwise s. */ { if (s == NULL || s[0] == 0) s = naStr; return s; } char *emptyForNull(char *s) /* Return "" if s is NULL, otherwise s. */ { if (s == NULL) s = emptyStr; return s; } char *nullIfAllSpace(char *s) /* Return NULL if s is all spaces, otherwise s. */ { s = skipLeadingSpaces(s); if (s != NULL) if (s[0] == 0) s = NULL; return s; } char *trueFalseString(boolean b) /* Return "true" or "false" */ { return (b ? "true" : "false"); } void uglyTime(char *label, ...) /* Print label and how long it's been since last call. Call with * a NULL label to initialize. */ { static long lastTime = 0; long time = clock1000(); va_list args; va_start(args, label); if (label != NULL) { fprintf(stdout, "<span class='timing'>"); vfprintf(stdout, label, args); fprintf(stdout, ": %ld millis<BR></span>\n", time - lastTime); } lastTime = time; va_end(args); } void uglyt(char *label, ...) /* Print label and how long it's been since last call. Call with * a NULL label to initialize. */ { static long lastTime = 0; long time = clock1000(); if (label != NULL) { va_list args; va_start(args, label); vfprintf(stdout, label, args); fprintf(stdout, ": %ld ms\n", time - lastTime); lastTime = time; va_end(args); } } void makeDirs(char* path) /* make a directory, including parent directories */ { char pathBuf[PATH_LEN]; char* next = pathBuf; strcpy(pathBuf, path); if (*next == '/') next++; while((*next != '\0') && (next = strchr(next, '/')) != NULL) { *next = '\0'; makeDir(pathBuf); *next = '/'; next++; } makeDir(pathBuf); } boolean isSymbolString(char *s) /* Return TRUE if s can be used as a symbol in the C language */ { char c = *s++; if (!isalpha(c) && (c != '_')) return FALSE; while ((c = *s++) != 0) { if (!(isalnum(c) || (c == '_'))) return FALSE; } return TRUE; } boolean isNumericString(char *s) /* Return TRUE if string is numeric (integer or floating point) */ { char *end; strtod(s, &end); return (end != s && *end == 0); } char *skipNumeric(char *s) /* Return first char of s that's not a digit */ { while (isdigit(*s)) ++s; return s; } char *skipToNumeric(char *s) /* skip up to where numeric digits appear */ { while (*s != 0 && !isdigit(*s)) ++s; return s; } char *splitOffNonNumeric(char *s) /* Split off non-numeric part, e.g. mm of mm8. Result should be freed when done */ { return cloneStringZ(s,skipToNumeric(s)-s); } char *splitOffNumber(char *db) /* Split off number part, e.g. 8 of mm8. Result should be freed when done */ { return cloneString(skipToNumeric(db)); } boolean isAllDigits(char *s) /* Return TRUE if string is non-empty and contains only digits (i.e. is a nonnegative integer). */ { if (isEmpty(s)) return FALSE; char c; while ((c = *s++) != 0) if (!isdigit(c)) return FALSE; return TRUE; } time_t mktimeFromUtc(struct tm *tm) // convert UTC time to UTC time_t // The timegm function is available on Linux and BSD and MacOS/Darwin // This is thread-safe and avoids setenv { return timegm(tm); } time_t dateToSeconds(const char *date,const char*format) // Convert a string date to time_t { struct tm storage={0,0,0,0,0,0,0,0,0}; if (strptime(date,format,&storage)==NULL) return 0; else return mktime(&storage); } boolean dateIsOld(const char *date,const char*format) // Is this string date older than now? { time_t test = dateToSeconds(date,format); time_t now = clock1(); return (test < now); } boolean dateIsOlderBy(const char *date,const char*format, time_t seconds) // Is this string date older than now by this many seconds? { time_t test = dateToSeconds(date,format); time_t now = clock1(); return (test + seconds < now); } static int daysOfMonth(struct tm *tp) /* Returns the days of the month given the year */ { int days=0; switch(tp->tm_mon) { case 3: case 5: case 8: case 10: days = 30; break; case 1: days = 28; if ( (tp->tm_year % 4) == 0 && ((tp->tm_year % 20) != 0 || (tp->tm_year % 100) == 0) ) days = 29; break; default: days = 31; break; } return days; } unsigned dayOfYear() /* Return the day of the year. */ { time_t now = time(NULL); struct tm *tm = localtime(&now); return tm->tm_yday; } static void dateAdd(struct tm *tp,int addYears,int addMonths,int addDays) /* Add years,months,days to a date */ { tp->tm_mday += addDays; tp->tm_mon += addMonths; tp->tm_year += addYears; int dom=28; while ( (tp->tm_mon >11 || tp->tm_mon <0) || (tp->tm_mday>dom || tp->tm_mday<1) ) { if (tp->tm_mon>11) // First month: tm.tm_mon is 0-11 range { tp->tm_year += (tp->tm_mon / 12); tp->tm_mon = (tp->tm_mon % 12); } else if (tp->tm_mon<0) { tp->tm_year += (tp->tm_mon / 12) - 1; tp->tm_mon = (tp->tm_mon % 12) + 12; } else { dom = daysOfMonth(tp); if (tp->tm_mday>dom) { tp->tm_mday -= dom; tp->tm_mon += 1; dom = daysOfMonth(tp); } else if (tp->tm_mday < 1) { tp->tm_mon -= 1; dom = daysOfMonth(tp); tp->tm_mday += dom; } } } } char *dateAddTo(char *date,char *format,int addYears,int addMonths,int addDays) /* Add years,months,days to a formatted date and returns the new date as a cloned string * format is a strptime/strftime format: %F = yyyy-mm-dd */ { char *newDate = needMem(12); struct tm tp; if (strptime(date,format, &tp)) { dateAdd(&tp,addYears,addMonths,addDays); // tp.tm_year only contains years since 1900 strftime(newDate,12,format,&tp); } return cloneString(newDate); // newDate is never freed! } boolean haplotype(const char *name) /* Is this name a haplotype name ? _hap or _alt in the name */ { if (stringIn("_hap", name) || stringIn("_alt", name)) return TRUE; else return FALSE; } char *shorterDouble(double value) /* Work around a "bug" in %g output that goes into scientific notation too early. */ { static char g15buffer[4096]; sprintf(g15buffer, "%.15g", value); return cloneString(g15buffer); } + +struct hash *loadSizes(char *sizesFile) +/* load a sizes file */ +{ +struct hash *sizes = hashNew(20); +struct lineFile *lf = lineFileOpen(sizesFile, TRUE); +char *cols[2]; + +while (lineFileNextRowTab(lf, cols, ArraySize(cols))) + hashAddInt(sizes, cols[0], sqlUnsigned(cols[1])); +lineFileClose(&lf); +return sizes; +}