b168eec18c0cb3f2d06b1ccc59eb34776403f50e kent Wed May 31 18:05:09 2017 -0700 Writing and testing new csvParseNext function. diff --git src/lib/csv.c src/lib/csv.c index fa68180..8338c6f 100644 --- src/lib/csv.c +++ src/lib/csv.c @@ -1,62 +1,140 @@ /* csv - stuff to help process comma separated values. Have to wrap quotes around * things with commas, and escape quotes with more quotes sometimes. */ #include "common.h" #include "linefile.h" #include "csv.h" char *csvEscapeToDyString(struct dyString *dy, char *string) /* Wrap string in quotes if it has any commas. Anything already in quotes get s double-quoted * Returns transformated result, which will be input string if it has no commas, otherwise * will be dy*/ { /* If there are no commas just output it */ if (strchr(string, ',') == NULL) { return string; } dyStringClear(dy); dyStringAppendC(dy, '"'); char c; while ((c = *string++) != 0) { if (c == '"') dyStringAppendC(dy, c); dyStringAppendC(dy, c); } dyStringAppendC(dy, '"'); return dy->string; } void csvWriteVal(char *val, FILE *f) /* Write val, which may have some quotes or commas in it, in a way to be compatable with * csv list representation */ { /* If there are no commas just output it */ if (strchr(val, ',') == NULL) { fputs(val, f); return; } /* Strip surrounding quotes if any */ val = trimSpaces(val); int valLen = strlen(val); if (valLen > 2 && val[0] == '"' && lastChar(val) == '"') { val[valLen-1] = 0; val += 1; } /* Put quotes around it and output, escaping internal quotes with double quotes */ fputc('"', f); char c; while ((c = *val++) != 0) { if (c == '"') fputc('"', f); fputc(c, f); } fputc('"', f); } +char *csvParseNext(char **pos, struct dyString *scratch) +/* Return next value starting at pos, putting results into scratch and + * returning scratch->string or NULL if no values left. Will update *pos + * to after trailing comma if any. This will tolerate and ignore leading + * and trailing white space. + * Since an empty or all-white string is will return NULL, if you + * want empty strings to be a legitimate value then they have to be quoted + * or followed by a comma. */ +{ +// Return NULL at end of string +char *s = skipLeadingSpaces(*pos); +if (isEmpty(s)) + return NULL; + +// Clear scratch pad and get first character +dyStringClear(scratch); +char c = *s; + +// If we start with a quote then fall into logic that goes to next quote, +// treating internal "" as a single " so that can have internal quotes +if (c == '"') + { + for (;;) + { + c = *(++s); + if (c == 0) + errAbort("Isolated quote in csvParseNext %s", *pos); + if (c == '"') + { + ++s; + if (*s == c) // Next char also a quote we convert the two quotes to one + { + dyStringAppendC(scratch, c); + } + else + { + // End of string. Skip over white space until next comma + s = skipLeadingSpaces(s); + c = *s; + if (c == ',') + { + ++s; // skip over trailing comma. + break; + } + if (c == 0) + break; + else + errAbort("Unexpected text after quotes in csvParseNext %s", *pos); + } + } + else + dyStringAppendC(scratch, c); + } + } +else // Did not start with a quote, so we just copy until comma or end of string. + { + char lastC = 0; + for (;;) + { + if (c == 0) + break; + if (c == ',') + { + ++s; // skip over trailing comma. + break; + } + dyStringAppendC(scratch, c); + lastC = c; + c = *(++s); + } + if (isspace(lastC)) + eraseTrailingSpaces(scratch->string); + } + +// Update position to start reading next one from and return scratchpad +*pos = s; +return scratch->string; +}