38dd9122b6a2e1fefb229f37d95686a7fa80c4a3
tdreszer
  Fri Apr 15 17:49:09 2011 -0700
Initial checkin of continuation char support in RA files
diff --git src/lib/ra.c src/lib/ra.c
index ede46de..4d74908 100644
--- src/lib/ra.c
+++ src/lib/ra.c
@@ -32,34 +32,106 @@
     if (tag[0] == 0 || tag[0] == '#')
        {
        if (dy)
 	   {
 	   dyStringAppend(dy, line);
 	   dyStringAppendC(dy, '\n');
 	   }
 	}
     else
        break;
     }
 lineFileReuse(lf);
 return TRUE;
 }
 
-boolean raNextTagVal(struct lineFile *lf, char **retTag, char **retVal, struct dyString *dy)
-/* Read next line.  Return FALSE at end of file or blank line.  Otherwise
- * fill in *retTag and *retVal and return TRUE.
- * If dy parameter is non-null, then the text parsed gets appended to dy. */
+boolean raNextTagVal(struct lineFile *lf, char **retTag, char **retVal, struct dyString *dyRecord)
+// Read next line.  Return FALSE at end of file or blank line.  Otherwise fill in
+// *retTag and *retVal and return TRUE.  If dy parameter is non-null, then the text parsed
+// gets appended to dy. Continuation lines in RA file will be joined to produce tag and val,
+// but dy will be filled with the unedited multiple lines containing the continuation chars.
+// NOTE: retTag & retVal, if returned, point to static mem which will be overwritten on next call!
+{
+*retTag = NULL;
+*retVal = NULL;
+
+// Old function returned pointers to lf memory, but joining continuation lines requires new mem.
+// Not wishing to force memory management on callers, static memeory is held here!
+static struct dyString *dyFullLine = NULL;   // static must be initialized with constant
+if (dyFullLine == NULL)
+    dyFullLine = dyStringNew(1024);
+dyStringClear(dyFullLine);
+
+char *line;
+while (lineFileNext(lf, &line, NULL)) // NOTE: While it would be nice to use lineFileNextFull here,
+    {                                 //       we cannot because we need the true lines to fill dyRecord
+    char *clippedText = skipLeadingSpaces(line);
+    if (clippedText == NULL || clippedText[0] == 0)
+        {
+        if (dyRecord)
+            lineFileReuse(lf);   // Just so don't loose leading space in dy.
+        break;
+        }
+
+    // Append whatever line was read from file.
+    if (dyRecord)
+       {
+       dyStringAppend(dyRecord, line); // Line may contain continuation chars
+       dyStringAppendC(dyRecord,'\n');
+       }
+
+    // ignores commented lines
+    if (clippedText[0] == '#')
+        {
+        if (startsWith("#EOF", clippedText))
+            break;
+        else
+            continue;
+        }
+
+    // Something we are interested in so add to static memory
+    eraseTrailingSpaces(clippedText);   // don't bother with trailing whitespace
+
+    // Join continued lines
+    char *lastChar = clippedText + (strlen(clippedText) - 1);
+    if (*lastChar == '\\')
+        {
+        if (lastChar > clippedText && *(lastChar - 1) != '\\') // Not an escaped continuation char
+            {
+            *lastChar = '\0';
+            dyStringAppend(dyFullLine,clippedText);
+            continue; // More to look forward to.
+            }
+        }
+    dyStringAppend(dyFullLine,clippedText);
+    break;
+    }
+if (dyStringLen(dyFullLine) > 0)
+    {
+    line = dyStringContents(dyFullLine);
+    *retTag = nextWord(&line);
+    *retVal = trimSpaces(line);
+    }
+return (*retTag != NULL);
+}
+
+boolean raNextTagValUnjoined(struct lineFile *lf, char **retTag, char **retVal, struct dyString *dy)
+// NOTE: this is the former raNextTagVal routine is ignorant of continuation lines.
+//       It is provided in case older RAs need it.
+// Read next line.  Return FALSE at end of file or blank line.  Otherwise
+// fill in *retTag and *retVal and return TRUE.
+// If dy parameter is non-null, then the text parsed gets appended to dy.
 {
 char *line;
 for (;;)
     {
     if (!lineFileNext(lf, &line, NULL))
        return FALSE;
     char *tag = skipLeadingSpaces(line);
     if (tag[0] == 0)
        {
        if (dy)
 	   lineFileReuse(lf);	/* Just so don't lose leading space in dy. */
        return FALSE;
        }
     if (dy)
        {
@@ -70,66 +142,149 @@
        {
        if (startsWith("#EOF", tag))
 	   return FALSE;
        else
 	   {
 	   continue;
 	   }
        }
     break;
     }
 *retTag = nextWord(&line);
 *retVal = trimSpaces(line);
 return TRUE;
 }
 
-struct hash *raNextRecord(struct lineFile *lf)
-/* Return a hash containing next record.
- * Returns NULL at end of file.  freeHash this
- * when done.  Note this will free the hash
- * keys and values as well, so you'll have to
- * cloneMem them if you want them for later. */
+struct hash *raNextStanza(struct lineFile *lf,boolean joined)
+// Return a hash containing next record.
+// Will ignore '#' comments and if requsted, joins lines ending in continuation char '\'.
+// Returns NULL at end of file.  freeHash this
+// when done.  Note this will free the hash
+// keys and values as well, so you'll have to
+// cloneMem them if you want them for later.
 {
 struct hash *hash = NULL;
 char *key, *val;
 
 if (!raSkipLeadingEmptyLines(lf, NULL))
     return NULL;
-while (raNextTagVal(lf, &key, &val, NULL))
+
+// Which function to use?
+boolean (*raNextTagAndVal)(struct lineFile *, char **, char **, struct dyString *) = raNextTagVal;
+if (!joined)
+    raNextTagAndVal = raNextTagValUnjoined;
+
+while (raNextTagAndVal(lf, &key, &val, NULL))
     {
     if (hash == NULL)
 	hash = newHash(7);
-    val = lmCloneString(hash->lm, val);
-    hashAdd(hash, key, val);
+    hashAdd(hash, key, lmCloneString(hash->lm, val));
     }
 return hash;
 }
 
-struct slPair *raNextRecordAsSlPairList(struct lineFile *lf)
-/* Return ra record as a slPair list instead of a hash.  Handy if you want to preserve the order.
- * Do a slPairFreeValsAndList on result when done. */
+struct slPair *raNextStanzAsPairs(struct lineFile *lf,boolean joined)
+// Return ra stanza as an slPair list instead of a hash.  Handy to preserve the order.
+// Will ignore '#' comments and if requsted, joins lines ending in continuation char '\'.
 {
 struct slPair *list = NULL;
 char *key, *val;
 if (!raSkipLeadingEmptyLines(lf, NULL))
     return NULL;
-while (raNextTagVal(lf, &key, &val, NULL))
-    slPairAdd(&list, key, cloneString(val));
+
+// Which function to use?
+boolean (*raNextTagAndVal)(struct lineFile *, char **, char **, struct dyString *) = raNextTagVal;
+if (!joined)
+    raNextTagAndVal = raNextTagValUnjoined;
+
+while (raNextTagAndVal(lf, &key, &val, NULL))
+    {
+    slPairAdd(&list, key, cloneString(val)); // val is already cloned so just pass it through.
+    }
+
 slReverse(&list);
 return list;
 }
 
+struct slPair *raNextStanzaLinesAndUntouched(struct lineFile *lf)
+// Return list of lines starting from current position, up through last line of next stanza.
+// May return a few blank/comment lines at end with no real stanza.
+// Will join continuation lines, allocating memory as needed.
+// returns pairs with name=joined line and if joined,
+// val will contain raw lines '\'s and linefeeds, else val will be NULL.
+{
+struct dyString *dyFullLine      = dyStringNew(1024);
+struct dyString *dyUntouched = dyStringNew(1024);
+struct slPair *pairs = NULL;
+boolean buildingContinuation = FALSE;
+boolean stanzaStarted = FALSE;
+char *line;
+while (lineFileNext(lf, &line, NULL))
+    {
+    char *clippedText = skipLeadingSpaces(line);
+
+    // When to break?  After the stanza is over.  But first detect that it has started.
+    if (!buildingContinuation)
+        {
+        if (stanzaStarted && clippedText[0] == 0)
+            {
+            lineFileReuse(lf);
+            break;
+            }
+        if (!stanzaStarted && clippedText[0] != 0 && clippedText[0] != '#')
+            stanzaStarted = TRUE; // Comments don't start stanzas and may be followed by blanks
+        }
+
+    // build full lines
+    dyStringAppend(dyUntouched,line);
+    if (dyStringLen(dyFullLine) == 0)
+        dyStringAppend(dyFullLine,line); // includes first line's whitespace.
+    else if (clippedText[0] != '\0')
+        dyStringAppend(dyFullLine,clippedText); // don't include continued line's leading spaces
+
+    // Will the next line continue this one?
+    if (clippedText[0] != '\0' && clippedText[0] != '#') // Comment lines can't be continued!
+        {
+        line = dyStringContents(dyFullLine);
+        char *lastChar = lastNonWhitespaceChar(line);
+        if (lastChar != NULL && *lastChar == '\\')
+            {
+            if (lastChar > line && *(lastChar - 1) != '\\') // Not an escaped continuation char
+                {
+                dyStringResize(dyFullLine,(lastChar - line)); // This clips off the last char and any trailing white-space in dyString
+                dyStringAppendC(dyUntouched,'\n'); // Untouched lines delimited by newlines
+                buildingContinuation = TRUE;
+                continue;
+                }
+            }
+        }
+    if (buildingContinuation)
+        slPairAdd(&pairs, dyStringContents(dyFullLine), cloneString(dyStringContents(dyUntouched)));
+    else
+        slPairAdd(&pairs, dyStringContents(dyFullLine), NULL);
+
+    // Ready to start the next full line
+    dyStringClear(dyFullLine);
+    dyStringClear(dyUntouched);
+    buildingContinuation = FALSE;
+    }
+slReverse(&pairs);
+dyStringFree(&dyFullLine);
+dyStringFree(&dyUntouched);
+return pairs;
+}
+
 struct hash *raFromString(char *string)
 /* Return hash of key/value pairs from string.
  * As above freeHash this when done. */
 {
 char *dupe = cloneString(string);
 char *s = dupe, *lineEnd;
 struct hash *hash = newHash(7);
 char *key, *val;
 
 for (;;)
     {
     s = skipLeadingSpaces(s);
     if (s == NULL || s[0] == 0)
         break;
     lineEnd = strchr(s, '\n');