src/lib/common.c 1.146
1.146 2010/03/18 01:52:09 tdreszer
Needed another flavor of chopByWhite that respects quotes
Index: src/lib/common.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/common.c,v
retrieving revision 1.145
retrieving revision 1.146
diff -b -B -U 4 -r1.145 -r1.146
--- src/lib/common.c 10 Mar 2010 22:12:33 -0000 1.145
+++ src/lib/common.c 18 Mar 2010 01:52:09 -0000 1.146
@@ -1515,8 +1515,82 @@
}
return recordCount;
}
+int chopByWhiteRespectDoubleQuotes(char *in, char *outArray[], int outSize)
+/* Like chopString, but specialized for white space separators.
+ * Further, any doubleQuotes (") are respected.
+ * If doubleQuote is encloses whole string, then they are removed:
+ * "Fred and Ethyl" results in word [Fred and Ethyl]
+ * If doubleQuotes exist inside string they are retained:
+ * Fred "and Ethyl" results in word [Fred "and Ethyl"]
+ * Special note "" is a valid, though empty word. */
+{
+int recordCount = 0;
+char c;
+char *quoteBegins = NULL;
+boolean quoting = FALSE;
+for (;;)
+ {
+ if (outArray != NULL && recordCount >= outSize)
+ break;
+
+ /* Skip initial separators. */
+ while (isspace(*in)) ++in;
+ if (*in == 0)
+ break;
+
+ /* Store start of word and look for end of word. */
+ if (outArray != NULL)
+ {
+ outArray[recordCount] = in;
+ if((*in == '"'))
+ quoteBegins = (in+1);
+ else
+ quoteBegins = NULL;
+ }
+ recordCount += 1;
+ quoting = FALSE;
+ for (;;)
+ {
+ if ((c = *in) == 0)
+ break;
+ if(quoting)
+ {
+ if(c == '"')
+ {
+ quoting = FALSE;
+ if(quoteBegins != NULL) // implies out array
+ {
+ if((c = *(in+1) == 0 )|| isspace(c)) // whole word is quoted.
+ {
+ outArray[recordCount-1] = quoteBegins; // Fix beginning of word
+ quoteBegins = NULL;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ quoting = (c == '"');
+ if (isspace(c))
+ break;
+ }
+ ++in;
+ }
+ if (*in == 0)
+ break;
+
+ /* Tag end of word with zero. */
+ if (outArray != NULL)
+ *in = 0;
+ /* And skip over the zero. */
+ in += 1;
+ }
+ return recordCount;
+}
+
int chopByChar(char *in, char chopper, char *outArray[], int outSize)
/* Chop based on a single character. */
{
int i;