4a30449df90693f80750c79f16225a0000b9fa4c
kent
  Tue Mar 26 22:48:30 2013 -0700
Added test case.
diff --git src/lib/meta.c src/lib/meta.c
new file mode 100644
index 0000000..70e4c18
--- /dev/null
+++ src/lib/meta.c
@@ -0,0 +1,339 @@
+/* metaRa - stuff to parse and interpret a genome-hub meta.txt file, which is in 
+ * a hierarchical ra format.  That is something like:
+ *     meta topLevel
+ *     cellLine HELA
+ *
+ *         meta midLevel
+ *         target H3K4Me3
+ *         antibody abCamAntiH3k4me3
+ *       
+ *            meta lowLevel
+ *            fileName hg19/chipSeq/helaH3k4me3.narrowPeak.bigBed
+ * The file is interpreted so that lower level stanzas inherit tags from higher level ones.
+ */
+
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "errabort.h"
+#include "meta.h"
+#include "ra.h"
+
+struct metaTagVal *metaTagValNew(char *tag, char *val)
+/* Create new meta tag/val */
+{
+struct metaTagVal *mtv;
+AllocVar(mtv);
+mtv->tag = cloneString(tag);
+mtv->val = cloneString(val);
+return mtv;
+}
+
+int metaTagValCmp(const void *va, const void *vb)
+/* Compare to sort based on tag name . */
+{
+const struct metaTagVal *a = *((struct metaTagVal **)va);
+const struct metaTagVal *b = *((struct metaTagVal **)vb);
+return strcmp(a->tag, b->tag);
+}
+
+void metaSortTags(struct meta *meta)
+/* Do canonical sort so that the first tag stays first but the
+ * rest are alphabetical. */
+{
+slSort(&meta->tagList->next, metaTagValCmp);
+}
+
+int countLeadingSpacesDetabbing(char *s, int tabStop)
+/* Count up leading chars including those implied by tab. Set tabStop to 8
+ * for usual UNIX results. */
+{
+int count = 0;
+char c;
+while ((c = *s++) != 0)
+    {
+    if (c == ' ')
+        ++count;
+    else if (c == '\t')
+        {
+	int tabBefore = (count % tabStop) * tabStop;
+	count = (tabBefore+1)*tabStop;
+	}
+    else
+        break;
+    }
+return count;
+}
+
+struct meta *metaNextStanza(struct lineFile *lf)
+/* Return next stanza in a meta file.  Does not set parent/child/next pointers. 
+ * Returns NULL at end of file.  Does a little error checking,  making sure
+ * that indentation level is consistent across all lines of stanza. Returns
+ * indentation level. */
+{
+/* See if anything left in file, and if not return. */
+if (!raSkipLeadingEmptyLines(lf, NULL))
+    return NULL;
+
+/* Allocate return structure and vars to help parse. */
+struct meta *meta;
+AllocVar(meta);
+struct dyString *dy = dyStringNew(256);
+char *tag,*val;
+
+/* Loop to get all tags in stanza. */
+boolean firstTime = TRUE;
+int initialIndent = 0;
+for (;;)
+    {
+    dyStringClear(dy);
+    if (!raNextTagVal(lf, &tag, &val, dy))
+        break;
+
+    /* Make tag/val and add it to list. */
+    struct metaTagVal *mtv;
+    AllocVar(mtv);
+    mtv->tag = cloneString(tag);
+    mtv->val = cloneString(val);
+    slAddHead(&meta->tagList, mtv);
+
+    /* Check indentation. */
+    int indent =  countLeadingSpacesDetabbing(dy->string, 8);
+    if (firstTime)
+        {
+	initialIndent = indent;
+	firstTime = FALSE;
+	}
+    else
+        {
+	if (indent != initialIndent)
+	    {
+	    warn("Error line %d of %s\n", lf->lineIx, lf->fileName);
+	    warn("Indentation level %d doesn't match level %d at start of stanza.", 
+		indent, initialIndent);
+	    if (strchr(dy->string, '\t'))
+	        warn("There are tabs in the indentation, be sure tab stop is set to 8 spaces."); 
+	    noWarnAbort();
+	    }
+	}
+    }
+slReverse(&meta->tagList);
+
+/* Set up remaining fields and return. */
+assert(meta->tagList != NULL);
+meta->name = meta->tagList->val;
+meta->indent = initialIndent;
+return meta;
+}
+
+static struct meta *rReverseMetaList(struct meta *list)
+/* Return reverse list, and reverse all childen lists too.  Needed because
+ * we addHead instead of addTail while building tree because it's faster,
+ * especially as lists get long. */
+{
+slReverse(&list);
+struct meta *meta;
+for (meta = list; meta != NULL; meta = meta->next)
+    {
+    if (meta->children != NULL)
+        meta->children = rReverseMetaList(meta->children);
+    }
+return list;
+}
+
+struct meta *metaLoadAll(char *fileName, char *keyTag, char *parentTag,
+    boolean ignoreOtherStanzas, boolean ignoreIndent)
+/* Loads in all ra stanzas from file and turns them into a list of meta, some of which
+ * may have children.  The keyTag parameter is optional.  If non-null it should be set to
+ * the tag name that starts a stanza.   If null, the first tag of the first stanza will be used.
+ * The parentTag if non-NULL will be a tag name used to define the parent of a stanza.
+ * The ignoreOtherStanzas flag if set will ignore stanzas that start with other tags.  
+ * If not set the routine will abort on such stanzas.  The ignoreIndent if set will
+ * use the parentTag (which must be set) to define the hierarchy.  Otherwise the program
+ * will look at the indentation, and if there is a parentTag complain about any
+ * disagreements between indentation and parentTag. */
+{
+struct lineFile *lf = lineFileOpen(fileName, TRUE);
+struct meta *meta, *forest = NULL, *lastMeta = NULL;
+if (ignoreIndent)
+    {
+    errAbort("Currently metaLoadAll can't ignore indentation, sorry.");
+    }
+while ((meta = metaNextStanza(lf)) != NULL)
+    {
+    struct meta **pList;
+    if (forest == NULL)   /* First time. */
+        {
+	if (meta->indent != 0)
+	    errAbort("Initial stanza of %s should not be indented", fileName);
+	if (keyTag == NULL)
+	    keyTag = meta->tagList->tag;
+	pList = &forest;
+	}
+    else
+        {
+	if (!sameString(keyTag, meta->tagList->tag))
+	    {
+	    if (ignoreOtherStanzas)
+	        {
+		continue;
+		// TODO: We should really free memory here. 
+		}
+	    else
+	        errAbort("Stanza beginning with %s instead of %s line %d of %s",
+		    meta->tagList->tag, keyTag, lf->lineIx, lf->fileName);
+	    }
+	if (meta->indent > lastMeta->indent)
+	    {
+	    pList = &lastMeta->children;
+	    meta->parent = lastMeta;
+	    }
+	else if (meta->indent == lastMeta->indent)
+	    {
+	    if (meta->indent == 0)
+		pList = &forest;
+	    else
+		{
+	        pList = &lastMeta->parent->children;
+		meta->parent = lastMeta->parent;
+		}
+	    }
+	else /* meta->indent < lastMeta->indent */
+	    {
+	    /* Find sibling at same level as us. */
+	    struct meta *olderSibling;
+	    for (olderSibling = lastMeta->parent; 
+		olderSibling != NULL; olderSibling = olderSibling->parent)
+	        {
+		if (meta->indent == olderSibling->indent)
+		    break;
+		}
+	    if (olderSibling == NULL)
+	        {
+		warn("Indentation inconsistent in stanza ending line %d of %s.", 
+		    lf->lineIx, lf->fileName);
+		warn("If you are using tabs, check your tab stop is set to 8.");
+		warn("Otherwise check that when you are reducing indentation in a stanza");
+		warn("that it is the same as the previous stanza at the same level.");
+		noWarnAbort();
+		}
+	    if (olderSibling->parent == NULL)
+	        pList = &forest;
+	    else
+	        {
+		pList = &olderSibling->parent->children;
+		meta->parent = olderSibling->parent;
+		}
+	    }
+	}
+    slAddHead(pList, meta);
+    lastMeta = meta;
+    }
+lineFileClose(&lf);
+forest = rReverseMetaList(forest);
+return forest;
+}
+
+static void rMetaListWrite(struct meta *metaList, struct meta *parent,
+    boolean level, int indent, boolean withParent, FILE *f)
+/* Write out list of stanzas at same level to file,  their children too. */
+{
+int totalIndent = level * indent;
+struct meta *meta;
+for (meta = metaList; meta != NULL; meta = meta->next)
+    {
+    struct metaTagVal *mtv;
+    boolean gotParent = FALSE;
+    for (mtv = meta->tagList; mtv != NULL; mtv = mtv->next)
+	{
+	if (sameString(mtv->tag, "parent"))
+	    {
+	    if (withParent)
+	        gotParent = TRUE;
+	    else
+	        continue;   
+	    }
+	spaceOut(f, totalIndent);
+	fprintf(f, "%s %s\n", mtv->tag, mtv->val);
+	}
+    if (withParent && !gotParent && parent != NULL)
+        {
+	spaceOut(f, totalIndent);
+	fprintf(f, "%s %s\n", "parent", parent->name);
+	}
+    fprintf(f, "\n");
+    if (meta->children)
+        rMetaListWrite(meta->children, meta, level+1, indent, withParent, f);
+    }
+}
+
+void metaWriteAll(struct meta *metaList, char *fileName, int indent, boolean withParent)
+/* Write out metadata, optionally adding meta tag. */
+{
+FILE *f = mustOpen(fileName, "w");
+rMetaListWrite(metaList, NULL, 0, indent, withParent, f);
+carefulClose(&f);
+}
+
+char *metaLocalTagVal(struct meta *meta, char *name)
+/* Return value of tag found in this node, not going up to parents. */
+{
+struct metaTagVal *mtv;
+for (mtv = meta->tagList; mtv != NULL; mtv = mtv->next)
+    if (sameString(mtv->tag, name))
+        return mtv->val;
+return NULL;
+}
+
+char *metaTagVal(struct meta *meta, char *name)
+/* Return value of tag found in this node or if its not there in parents.
+ * Returns NULL if tag not found. */
+{
+struct meta *m;
+for (m = meta; m != NULL; m = m->parent)
+    {
+    char *val = metaLocalTagVal(m, name);
+    if (val != NULL)
+       return val;
+    }
+return NULL;
+}
+
+void metaAddTag(struct meta *meta, char *tag, char *val)
+/* Add tag to meta, replacing existing tag if any */
+{
+/* First loop through to replace an existing tag. */
+struct metaTagVal *mtv;
+for (mtv = meta->tagList; mtv != NULL; mtv = mtv->next)
+    {
+    if (sameString(mtv->tag, tag))
+       {
+       freeMem(mtv->val);
+       mtv->val = cloneString(val);
+       return;
+       }
+    }
+/* If didn't make it then add new tag (at end) */
+mtv = metaTagValNew(tag, val);
+slAddTail(&meta->tagList, mtv);
+}
+
+static void rHashMetaList(struct hash *hash, struct meta *list)
+/* Add list, and any children of list to hash */
+{
+struct meta *meta;
+for (meta = list; meta != NULL; meta = meta->next)
+    {
+    hashAddUnique(hash, meta->name, meta);
+    if (meta->children)
+        rHashMetaList(hash, meta->children);
+    }
+}
+
+struct hash *metaHash(struct meta *forest)
+/* Return hash of meta at all levels of heirarchy keyed by forest. */
+{
+struct hash *hash = hashNew(0);
+rHashMetaList(hash, forest);
+return hash;
+}