src/lib/common.c 1.146

1.146 2010/03/18 01:52:09 tdreszer
Needed another flavor of chopByWhite that respects quotes
Index: src/lib/common.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/common.c,v
retrieving revision 1.145
retrieving revision 1.146
diff -b -B -U 4 -r1.145 -r1.146
--- src/lib/common.c	10 Mar 2010 22:12:33 -0000	1.145
+++ src/lib/common.c	18 Mar 2010 01:52:09 -0000	1.146
@@ -1515,8 +1515,82 @@
     }
 return recordCount;
 }
 
+int chopByWhiteRespectDoubleQuotes(char *in, char *outArray[], int outSize)
+/* Like chopString, but specialized for white space separators.
+ * Further, any doubleQuotes (") are respected.
+ * If doubleQuote is encloses whole string, then they are removed:
+ *   "Fred and Ethyl" results in word [Fred and Ethyl]
+ * If doubleQuotes exist inside string they are retained:
+ *   Fred "and Ethyl" results in word [Fred "and Ethyl"]
+ * Special note "" is a valid, though empty word. */
+{
+int recordCount = 0;
+char c;
+char *quoteBegins = NULL;
+boolean quoting = FALSE;
+for (;;)
+    {
+    if (outArray != NULL && recordCount >= outSize)
+        break;
+
+    /* Skip initial separators. */
+    while (isspace(*in)) ++in;
+    if (*in == 0)
+        break;
+
+    /* Store start of word and look for end of word. */
+    if (outArray != NULL)
+        {
+        outArray[recordCount] = in;
+        if((*in == '"'))
+            quoteBegins = (in+1);
+        else
+            quoteBegins = NULL;
+        }
+    recordCount += 1;
+    quoting = FALSE;
+    for (;;)
+        {
+        if ((c = *in) == 0)
+            break;
+        if(quoting)
+            {
+            if(c == '"')
+                {
+                quoting = FALSE;
+                if(quoteBegins != NULL) // implies out array
+                    {
+                    if((c = *(in+1) == 0 )|| isspace(c)) // whole word is quoted.
+                        {
+                        outArray[recordCount-1] = quoteBegins; // Fix beginning of word
+                        quoteBegins = NULL;
+                        break;
+                        }
+                    }
+                }
+            }
+        else
+            {
+            quoting = (c == '"');
+            if (isspace(c))
+                break;
+            }
+        ++in;
+        }
+    if (*in == 0)
+        break;
+
+    /* Tag end of word with zero. */
+    if (outArray != NULL)
+        *in = 0;
+    /* And skip over the zero. */
+    in += 1;
+    }
+    return recordCount;
+}
+
 int chopByChar(char *in, char chopper, char *outArray[], int outSize)
 /* Chop based on a single character. */
 {
 int i;