2d05d30ed4df1612d72ba84c812d004de935b122 angie Fri May 17 16:08:54 2024 -0700 Add lib module mmHash (memory-mapped hash), util tabToMmHash, and hgPhyloPlace support for using mmHash files instead of tab-separated files for metadata and name lookup. Using mmHash for name lookup saves about 50-55 seconds for SARS-CoV-2 hgPhyloPlace name/ID queries. diff --git src/inc/common.h src/inc/common.h index 05078e2..dee94e2 100644 --- src/inc/common.h +++ src/inc/common.h @@ -510,31 +510,31 @@ /* Compare two slDoubles. */ double slDoubleMedian(struct slDouble *list); /* Return median value on list. */ /******* slName - a zero terminated string on a list *******/ struct slName /* List of names. The name array is allocated to accommodate full name */ { struct slName *next; /* Next in list. */ char name[1]; /* Allocated at run time to length of string. */ }; -struct slName *newSlName(char *name); +struct slName *newSlName(const char *name); #define slNameNew newSlName /* Return a new slName. */ #define slNameFree freez /* Free a single slName */ #define slNameFreeList slFreeList /* Free a list of slNames */ struct slName *slNameNewN(char *name, int size); /* Return new slName of given size. */ int slNameCmpCase(const void *va, const void *vb); /* Compare two slNames, ignore case. */ @@ -564,31 +564,31 @@ /* Return true if string is in name list -- case sensitive. */ void *slNameFind(void *list, char *string); /* Return first element of slName list (or any other list starting * with next/name fields) that matches string. This is case insensitive. */ int slNameFindIx(struct slName *list, char *string); /* Return index of first element of slName list (or any other * list starting with next/name fields) that matches string. * ... Return -1 if not found. */ char *slNameStore(struct slName **pList, char *string); /* Put string into list if it's not there already. * Return the version of string stored in list. */ -struct slName *slNameAddHead(struct slName **pList, char *name); +struct slName *slNameAddHead(struct slName **pList, const char *name); /* Add name to start of list and return it. */ struct slName *slNameAddTail(struct slName **pList, char *name); /* Add name to end of list (not efficient for long lists), * and return it. */ struct slName *slNameCloneList(struct slName *list); /* Return clone of list. */ struct slName *slNameListFromString(char *s, char delimiter); /* Return list of slNames gotten from parsing delimited string. * The final delimiter is optional. a,b,c and a,b,c, are equivalent * for comma-delimited lists. */ #define slNameListFromComma(s) slNameListFromString(s, ',') @@ -977,31 +977,32 @@ int countSame(char *a, char *b); /* Count number of characters that from start in a,b that are same. */ int countSeparatedItems(char *string, char separator); /* Count number of items in string you would parse out with given * separator, assuming final separator is optional. */ int chopString(char *in, char *sep, char *outArray[], int outSize); /* int chopString(in, sep, outArray, outSize); */ /* This chops up the input string (cannabilizing it) * into an array of zero terminated strings in * outArray. It returns the number of strings. * If you pass in NULL for outArray, it will just * return the number of strings that it *would* - * chop. */ + * chop. + * NOTE: unlike chopByChar, this skips empty words between separators! */ extern char crLfChopper[]; extern char whiteSpaceChopper[]; /* Some handy predefined separators. */ int chopByWhite(char *in, char *outArray[], int outSize); /* Like chopString, but specialized for white space separators. */ #define chopLine(line, words) chopByWhite(line, words, ArraySize(words)) /* Chop line by white space. */ int chopByWhiteRespectDoubleQuotes(char *in, char *outArray[], int outSize); /* Like chopString, but specialized for white space separators. * Further, any doubleQuotes (") are respected. * If doubleQuote encloses whole string, then they are removed: