src/hg/oneShot/tdbRewriteViewsToSubtracks1/tdbRewriteViewsToSubtracks1.c 1.1

1.1 2010/02/03 22:08:41 kent
Making a single file rewriter for track in the middle.
Index: src/hg/oneShot/tdbRewriteViewsToSubtracks1/tdbRewriteViewsToSubtracks1.c
===================================================================
RCS file: src/hg/oneShot/tdbRewriteViewsToSubtracks1/tdbRewriteViewsToSubtracks1.c
diff -N src/hg/oneShot/tdbRewriteViewsToSubtracks1/tdbRewriteViewsToSubtracks1.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/oneShot/tdbRewriteViewsToSubtracks1/tdbRewriteViewsToSubtracks1.c	3 Feb 2010 22:08:41 -0000	1.1
@@ -0,0 +1,696 @@
+/* tdbRewriteViewsToSubtracks1 - Convert views to subtracks with sub-sub-tracks.. 
+ * This code is a slight modification of tdbRewriteViewsToSubtracks.  It works on 
+ * a single file rather than the whole directory, but */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "localmem.h"
+#include "dystring.h"
+#include "obscure.h"
+#include "portable.h"
+#include "errabort.h"
+#include "ra.h"
+
+
+static char const rcsid[] = "$Id$";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "tdbRewriteViewsToSubtracks - Convert views to subtracks with sub-sub-tracks.\n"
+  "usage:\n"
+  "   tdbRewriteViewsToSubtracks1 inFile.ra outFile.ra\n"
+  );
+}
+
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+#define glKeyField "track"	 /* The field that has the record ID */
+#define glParentField "subTrack" /* The field that points to the parent. */
+
+struct raTag
+/* A tag in a .ra file. */
+    {
+    struct raTag *next;
+    char *name;		/* Name of tag. */
+    char *val;		/* Value of tag. */
+    char *text;		/* Text - including white space and comments before tag. */
+    };
+
+struct raRecord
+/* A record in a .ra file. */
+    {
+    struct raRecord *next;	/* Next in list. */
+    char *key;			/* First word in track line if any. */
+    struct raTag *tagList;	/* List of tags that make us up. */
+    int startLineIx, endLineIx; /* Start and end in file for error reporting. */
+    struct raFile *file;	/* Pointer to file we are in. */
+    char *endComments;		/* Some comments that may follow record. */
+    };
+
+struct raFile
+/* A file full of ra's. */
+    {
+    struct raFile *next;	  /* Next (in include list) */
+    char *name;		  	  /* Name of file */
+    struct raRecord *recordList;  /* List of all records in file */
+    char *endSpace;		  /* Text after last record. */
+    struct raLevel *level;	  /* Level this is in. */
+    };
+
+struct raLevel
+/* A level of ra files.  List of files with includes starting from trackDb.ra at given
+ * directory level. */
+    {
+    struct raLevel *parent;	/* Parent level */
+    char *name;			/* Level name - directory */
+    struct raFile *fileList;	/* List of files at this level. */
+    struct hash *trackHash;	/* Hash of all track records in level. */
+    };
+
+#ifdef SOON
+static void raTagWrite(struct raTag *tag, FILE *f)
+/* Write tag to file */
+{
+fputs(tag->text, f);
+}
+#endif
+
+void recordLocationReport(struct raRecord *rec, FILE *out)
+/* Write out where record ends. */
+{
+fprintf(out, "in stanza from lines %d-%d of %s\n", 
+	rec->startLineIx, rec->endLineIx, rec->file->name);
+}
+
+void recordWarn(struct raRecord *rec, char *format, ...)
+/* Issue a warning message. */
+{
+va_list args;
+va_start(args, format);
+vaWarn(format, args);
+va_end(args);
+recordLocationReport(rec, stderr);
+}
+
+void recordAbort(struct raRecord *rec, char *format, ...)
+/* Issue a warning message. */
+{
+va_list args;
+va_start(args, format);
+vaWarn(format, args);
+va_end(args);
+recordLocationReport(rec, stderr);
+noWarnAbort();
+}
+
+struct raTag *raRecordFindTag(struct raRecord *r, char *name)
+/* Find tag of given name.  Return NULL if not found. */
+{
+struct raTag *tag;
+for (tag = r->tagList; tag != NULL; tag = tag->next)
+    if (sameString(tag->name, name))
+        break;
+return tag;
+}
+
+char *firstTagInText(char *text)
+/* Return the location of tag in text - skipping blank and comment lines and white-space */
+{
+char *s = text;
+for (;;)
+    {
+    s = skipLeadingSpaces(s);
+    if (s[0] == '#')
+        {
+	s = strchr(s, '\n');
+	}
+    else
+        break;
+    }
+return s;
+}
+
+static struct raRecord *readRecordsFromFile(struct raFile *file, struct dyString *dy, struct lm *lm)
+/* Read all the records in a file and return as a list.  The dy parameter returns the
+ * last bits of the file (after the last record). */
+{
+char *fileName = file->name;
+struct raRecord *r, *rList = NULL;
+struct lineFile *lf = lineFileOpen(fileName, TRUE);
+while (raSkipLeadingEmptyLines(lf, dy))
+    {
+    /* Create a tag structure in local memory. */
+    lmAllocVar(lm, r);
+    r->startLineIx = lf->lineIx;
+    char *name, *val;
+    while (raNextTagVal(lf, &name, &val, dy))
+        {
+	struct raTag *tag;
+	lmAllocVar(lm, tag);
+	tag->name = lmCloneString(lm, name);
+	tag->val = lmCloneString(lm, val);
+	tag->text = lmCloneString(lm, dy->string);
+	if (sameString(name, glKeyField))
+	    r->key = lmCloneFirstWord(lm, tag->val);
+	slAddHead(&r->tagList, tag);
+	dyStringClear(dy);
+	}
+    if (dy->stringSize > 0)
+        {
+	r->endComments = lmCloneString(lm, dy->string);
+	}
+    slReverse(&r->tagList);
+    r->endLineIx = lf->lineIx;
+    r->file = file;
+    slAddHead(&rList, r);
+    }
+lineFileClose(&lf);
+slReverse(&rList);
+return rList;
+}
+
+struct raFile *raFileRead(char *fileName, struct raLevel *level, struct lm *lm)
+/* Read in file */
+{
+struct dyString *dy = dyStringNew(0);
+struct raFile *raFile;
+lmAllocVar(lm, raFile);
+raFile->name = lmCloneString(lm, fileName);
+raFile->recordList = readRecordsFromFile(raFile, dy, lm);
+raFile->endSpace = lmCloneString(lm, dy->string);
+raFile->level = level;
+dyStringFree(&dy);
+return raFile;
+}
+
+static void recurseThroughIncludes(char *fileName, struct lm *lm, 
+	struct hash *circularHash,  struct raLevel *level, struct raFile **pFileList)
+/* Recurse through include files. */
+{
+struct raFile *raFile = raFileRead(fileName, level, lm);
+slAddHead(pFileList, raFile);
+struct raRecord *r;
+for (r = raFile->recordList; r != NULL; r = r->next)
+    {
+    struct raTag *tag = r->tagList;
+    if (sameString(tag->name, "include"))
+        {
+	for (; tag != NULL; tag = tag->next)
+	    {
+	    if (!sameString(tag->name, "include"))
+	       recordAbort(r, "Non-include tag %s in an include stanza", tag->name);
+	    char *relPath = tag->val;
+	    char dir[PATH_LEN];
+	    splitPath(fileName, dir, NULL, NULL);
+	    char includeName[PATH_LEN];
+	    safef(includeName, sizeof(includeName), "%s%s", dir, relPath);
+	    if (hashLookup(circularHash, includeName))
+		recordAbort(r, "Including file %s in an infinite loop", includeName);
+	    recurseThroughIncludes(includeName, lm, circularHash, level, pFileList);
+	    }
+	}
+    }
+}
+
+struct hash *hashLevelTracks(struct raLevel *level)
+/* Return hash of all track in level.  May include multiple copies of same track
+ * with different releases. */
+{
+struct hash *hash = hashNew(0);
+struct raFile *file;
+for (file = level->fileList; file != NULL; file = file->next)
+    {
+    struct raRecord *r;
+    for (r = file->recordList; r != NULL; r = r->next)
+        {
+	if (r->key != NULL)
+	    hashAdd(hash, r->key, r);
+	}
+    }
+return hash;
+}
+
+struct raLevel *raLevelReadOneFile(char *initialFile, struct lm *lm)
+/* Read initialFile and all files that are included by it. */
+{
+/* Create structure for level. */
+struct raLevel *level;
+lmAllocVar(lm, level);
+char dir[PATH_LEN];
+splitPath(initialFile, dir, NULL, NULL);
+level->name = lmCloneString(lm, dir);
+
+/* Build up list of files by recursion. */
+if (fileExists(initialFile))
+    {
+    struct hash *circularHash = hashNew(0);
+    hashAdd(circularHash, initialFile, NULL);
+    recurseThroughIncludes(initialFile, lm, circularHash, level, &level->fileList);
+    hashFree(&circularHash);
+    slReverse(&level->fileList);
+    }
+
+level->trackHash = hashLevelTracks(level);
+return level;
+}
+
+
+struct raRecord *findClosestParent(struct raLevel *level, struct raRecord *record, char *parentKey)
+/* Look up key in level hash, and return the one that is closest to self, but before self
+ * in the same file. This disregards release.   It is in fact used to do the
+ * inheritance of releases. */
+{
+struct raRecord *closestParent = NULL;
+int closestDistance = BIGNUM;
+struct hashEl *hel;
+for (hel = hashLookup(level->trackHash, parentKey); hel != NULL; hel = hashLookupNext(hel))
+    {
+    struct raRecord *parent = hel->val;
+    int distance = record->startLineIx - parent->startLineIx;
+    if (distance < 0)
+        distance = BIGNUM/4 - distance;
+    if (record->file != parent->file)
+        distance = BIGNUM/2; 
+    if (distance < closestDistance)
+        {
+	closestDistance = distance;
+	closestParent = parent;
+	}
+    }
+return closestParent;
+}
+
+char *findRelease(struct raRecord *record, struct raLevel *level)
+/* Find release tag in self or parent track at this level. */
+{
+while (record != NULL)
+    {
+    struct raTag *releaseTag = raRecordFindTag(record, "release");
+    if (releaseTag != NULL)
+        return releaseTag->val;
+    struct raTag *parentTag = raRecordFindTag(record, glParentField);
+    if (parentTag != NULL)
+        record = findClosestParent(level, record, parentTag->val);
+    else
+        record = NULL;
+    }
+return NULL;
+}
+
+struct raRecord *findRecordAtLevel(struct raLevel *level, char *key, char *release)
+/* Find record of given key and release in level. */
+{
+/* Look up key in hash */
+struct hashEl *firstEl = hashLookup(level->trackHash, key);
+
+/* Loop through and return any ones that match on both key (implicit in hash find) and
+ * in release. */
+struct hashEl *hel;
+for (hel = firstEl; hel != NULL; hel = hashLookupNext(hel))
+    {
+    struct raRecord *r = hel->val;
+    struct raTag *releaseTag = raRecordFindTag(r, "release");
+    char *rRelease = (releaseTag == NULL ? NULL : releaseTag->val);
+    if (sameOk(release, rRelease))
+        return r;
+    }
+
+/* If given record has no defined release, return first match regardless of release. */
+if (release == NULL && firstEl != NULL)
+    return firstEl->val;
+
+/* Match to records that have no release defined. */
+for (hel = firstEl; hel != NULL; hel = hashLookupNext(hel))
+    {
+    struct raRecord *r = hel->val;
+    struct raTag *releaseTag = raRecordFindTag(r, "release");
+    if (releaseTag == NULL)
+        return r;
+    }
+
+return NULL;
+}
+
+struct raRecord *findRecordInParentFileLevel(struct raLevel *level, struct raRecord *record)
+/* Find record matching release in parent file level.  */
+{
+char *release = findRelease(record, level);
+struct raRecord *parentRecord = NULL;
+struct raLevel *parent;
+for (parent = level->parent; parent != NULL; parent = parent->parent)
+    {
+    if ((parentRecord = findRecordAtLevel(parent, record->key, release)) != NULL)
+        break;
+    }
+return parentRecord;
+}
+
+struct raRecord *findRecordInLevelOrLevelsUp(struct raLevel *level, char *key, char *release)
+/* Find record matching key and compatible with release in level or ancestral levels.  */
+{
+struct raLevel *generation;
+for (generation = level; generation != NULL; generation = generation->parent)
+    {
+    struct raRecord *record = findRecordAtLevel(generation, key, release);
+    if (record != NULL)
+        return record;
+    }
+return NULL;
+}
+
+void raRecordWriteTags(struct raRecord *r, FILE *f)
+/* Write out tags in record to file, including preceding spaces. */
+{
+/* Write all tags. */
+struct raTag *t;
+for (t = r->tagList; t != NULL; t = t->next)
+    fputs(t->text, f);
+}
+
+
+void raTagWriteIndented(struct raTag *t, FILE *f, int indents)
+/* Write out tag with extra indents.  Include (and indent) any preceeding comments. */
+{
+char *s = t->text;
+for (;;)
+    {
+    char *e = skipLeadingSpaces(s);
+    mustWrite(f, s, e-s);
+    spaceOut(f, indents);
+    s = e;
+    e = strchr(s, '\n');
+    if (e != NULL)
+	{
+        mustWrite(f, s, e-s+1);
+	s = e+1;
+	if (s[0] == 0)
+	    break;
+	}
+    else
+        {
+	fprintf(f, "%s\n", s);
+	}
+    }
+}
+
+void raRecordWriteIndented(struct raRecord *r, FILE *f, int indent)
+/* Write out tags in record to file, including preceding spaces. */
+{
+/* Write all tags. */
+struct raTag *t;
+for (t = r->tagList; t != NULL; t = t->next)
+    raTagWriteIndented(t, f, indent);
+if (r->endComments)
+    {
+    spaceOut(f, indent);
+    fputs(r->endComments, f);
+    }
+}
+
+struct raTag *findViewSubGroup(struct raRecord *r)
+/* Find tag that is one of the subGroup tags with first word view.  May return NULL. */
+{
+int i;
+for (i=1; ; ++i)
+    {
+    char tagName[16];
+    safef(tagName, sizeof(tagName), "subGroup%d", i);
+    struct raTag *tag = raRecordFindTag(r, tagName);
+    if (tag == NULL)
+        return NULL;
+    if (startsWithWord("view", tag->val))
+        return tag;
+    }
+}
+
+static void writeRecordAsSubOfView(struct raRecord *r, FILE *f, char *viewTrackName)
+/* Write out a record, but indent it more and substitute viewTrackName for existing
+ * subTrack. */
+{
+struct raTag *t;
+for (t = r->tagList; t != NULL; t = t->next)
+    {
+    if (sameString(t->name, "subTrack"))
+        {
+	char *s = firstTagInText(t->text);
+	mustWrite(f, t->text, s - t->text);
+	spaceOut(f, 4);
+	fprintf(f, "subTrack %s", viewTrackName);
+	/* Skip over subTrack and name in original text. */
+	int i;
+	for (i=0; i<2; ++i)
+	    {
+	    s = skipLeadingSpaces(s);
+	    s = skipToSpaces(s);
+	    }
+	if (s != NULL)
+	     fputs(s, f);
+	else
+	    fputc('\n', f);
+	}
+    else
+        {
+	raTagWriteIndented(t, f, 4);
+	}
+    }
+}
+
+
+boolean shouldBeParentOfView(struct raRecord *r)
+/* Return TRUE if its a record that we'll insert views underneath */
+{
+return findViewSubGroup(r) != NULL;
+}
+
+void rewriteSettingsByViewComplex(struct raFile *file, struct raRecord *complexRecord, FILE *f,	
+	struct lm *lm)
+/* Rewrite track that has settings by view */
+{
+/* Get list of views from subGroup1 or subGroup2 tag. */
+struct raTag *viewSubGroupTag = findViewSubGroup(complexRecord);
+if (viewSubGroupTag == NULL)
+    recordAbort(complexRecord, "Can't find view subGroup#");
+char *line = lmCloneString(lm, viewSubGroupTag->val);
+/*  line looks something like: 
+ *       view Views FiltTransfrags=Filtered_Transfrags Transfrags=Raw_Transfrags */
+char *viewWord = nextWord(&line);
+assert(sameString(viewWord, "view"));
+nextWord(&line);	// Just skip over name to label views with
+struct slPair *viewList = NULL;
+char *thisEqThat;
+while ((thisEqThat = nextWord(&line)) != NULL)
+    {
+    char *eq = strchr(thisEqThat, '=');
+    if (eq == NULL)
+        recordAbort(complexRecord, "expecting this=that got %s in %s tag", 
+		eq, viewSubGroupTag->name);
+    *eq = 0;
+    slPairAdd(&viewList, thisEqThat, lmCloneString(lm, eq+1));
+    }
+slReverse(&viewList);
+
+/* Write self, the parent record.  This is just most but not all tags of the original record,
+ * omitting settingsByView and visibilityViewDefaults which will be moved into the view. */
+struct raTag *t;
+for (t = complexRecord->tagList; t != NULL; t = t->next)
+    {
+    if (!sameString(t->name, "settingsByView") && !sameString(t->name, "visibilityViewDefaults"))
+	fputs(t->text, f);
+    }
+
+/* Parse out visibilityViewDefaults. */
+struct hash *visHash = NULL;
+struct raTag *visTag = raRecordFindTag(complexRecord, "visibilityViewDefaults");
+if (visTag != NULL)
+    {
+    char *dupe = lmCloneString(lm, visTag->val);
+    visHash = hashThisEqThatLine(dupe, complexRecord->startLineIx, FALSE);
+    }
+
+/* Parse out settingsByView. */
+struct raTag *settingsTag = raRecordFindTag(complexRecord, "settingsByView");
+struct hash  *settingsHash = hashNew(4);
+if (settingsTag != NULL)
+    {
+    char *dupe = lmCloneString(lm, settingsTag->val);
+    char *line = dupe;
+    char *viewName;
+    while ((viewName = nextWord(&line)) != NULL)
+	{
+	char *settings = strchr(viewName, ':');
+	if (settings == NULL)
+	    recordAbort(complexRecord, "missing colon in settingsByView '%s'", viewName);
+	struct slPair *el, *list = NULL;
+	*settings++ = 0;
+	if (!slPairFind(viewList, viewName))
+	    recordAbort(complexRecord, "View '%s' in settingsByView is not defined in subGroup",
+	    	viewName);
+	char *words[32];
+	int cnt,ix;
+	cnt = chopByChar(settings,',',words,ArraySize(words));
+	for (ix=0; ix<cnt; ix++)
+	    {
+	    char *name = words[ix];
+	    char *val = strchr(name, '=');
+	    if (val == NULL)
+		recordAbort(complexRecord, "Missing equals in settingsByView on %s", name);
+	    *val++ = 0;
+
+	    AllocVar(el);
+	    el->name = cloneString(name);
+	    el->val = cloneString(val);
+	    slAddHead(&list,el);
+	    }
+	slReverse(&list);
+	hashAdd(settingsHash, viewName, list);
+	}
+    }
+
+/* Go through each view and write it, and then the children who are in that view. */
+struct slPair *view;
+for (view = viewList; view != NULL; view = view->next)
+    {
+    char viewTrackName[256];
+    safef(viewTrackName, sizeof(viewTrackName), "%sView%s", complexRecord->key, view->name);
+    fprintf(f, "\n");	/* Blank line to open view. */
+    fprintf(f, "    track %s\n", viewTrackName);
+    char *shortLabel = lmCloneString(lm, view->val);
+    subChar(shortLabel, '_', ' ');
+    fprintf(f, "    shortLabel %s\n", shortLabel);
+    fprintf(f, "    view %s\n", view->name);
+    char *vis = NULL;
+    if (visHash != NULL)
+         vis = hashFindVal(visHash, view->name);
+    if (vis != NULL)
+	{
+	int len = strlen(vis);
+	boolean gotPlus = (lastChar(vis) == '+');
+	if (gotPlus)
+	    len -= 1;
+	char visOnly[len+1];
+	memcpy(visOnly, vis, len);
+	visOnly[len] = 0;
+	fprintf(f, "    visibility %s\n", visOnly);
+	if (gotPlus)
+	    fprintf(f, "    viewUi on\n");
+	}
+    fprintf(f, "    subTrack %s\n", complexRecord->key);
+    struct slPair *settingList = hashFindVal(settingsHash, view->name);
+    struct slPair *setting;
+    for (setting = settingList; setting != NULL; setting = setting->next)
+	fprintf(f, "    %s %s\n", setting->name, (char*)setting->val);
+
+    /* Scan for children that are in this view. */
+    struct raRecord *r;
+    for (r = file->recordList; r != NULL; r = r->next)
+	{
+	struct raTag *subTrackTag = raRecordFindTag(r, "subTrack");
+	if (subTrackTag != NULL)
+	    {
+	    if (startsWithWord(complexRecord->key, subTrackTag->val))
+		{
+		struct raTag *subGroupsTag = raRecordFindTag(r, "subGroups");
+		if (subGroupsTag != NULL)
+		    {
+		    struct hash *hash = hashThisEqThatLine(subGroupsTag->val, 
+			    r->startLineIx, FALSE);
+		    char *viewName = hashFindVal(hash, "view");
+		    if (viewName != NULL && sameString(viewName, view->name))
+			{
+			writeRecordAsSubOfView(r, f, viewTrackName);
+			}
+		    hashFree(&hash);
+		    }
+		}
+	    }
+	}
+    }
+}
+
+void rewriteTrack(struct raLevel *level, struct raFile *file, 
+	struct raRecord *r, FILE *f, struct lm *lm)
+/* Write one track record.  */
+{
+struct raRecord *recordInParentFile = findRecordInParentFileLevel(level, r);
+struct raTag *subTrack = raRecordFindTag(r, "subTrack");
+
+if (shouldBeParentOfView(r))
+    {
+    if (recordInParentFile)
+        recordAbort(r, "Can't handle settingsByViews with records in parent file levels");
+    /* We are the parent. */
+    // fprintf(f, "# Rewriting parent with subGroup view %s\n", r->key);
+    rewriteSettingsByViewComplex(file, r, f, lm);
+    }
+else if (subTrack)
+    {
+    struct raRecord *rParent = findRecordInLevelOrLevelsUp(level, 
+    	lmCloneFirstWord(lm, subTrack->val),
+    	findRelease(r, level));
+    if (rParent == NULL)
+        recordAbort(r, "Couldn't find rParent track %s, subTrack %s.", r->key, subTrack->val);
+    if (shouldBeParentOfView(rParent))
+        {
+	if (rParent->file != r->file)
+	    recordAbort(r, "complex parent %s not in same file as subTrack %s", 
+	    	rParent->key, r->key);
+	// fprintf(f, "# Omitting child (%s) of parent (%s) with views\n", r->key, rParent->key);
+	}
+    else
+        {
+	raRecordWriteTags(r, f);
+	}
+    }
+else
+    {
+    raRecordWriteTags(r, f);
+    }
+}
+
+void rewriteFile(struct raLevel *level, struct raFile *file, char *outName, struct lm *lm)
+/* Rewrite file to outName, consulting symbols in parent. */
+{
+FILE *f = mustOpen(outName, "w");
+struct raRecord *r;
+for (r = file->recordList; r != NULL; r = r->next)
+    {
+    /* Rewrite leading track tag in stanzas.  Other tags pass through. */
+    struct raTag *t = r->tagList;
+    if (sameString(t->name, "track"))
+        {
+	rewriteTrack(level, file, r, f, lm);
+	}
+    else
+	{
+	for (; t != NULL; t = t->next)
+	    fputs(t->text, f);
+	}
+    if (r->endComments != NULL)
+        fputs(r->endComments, f);
+    }
+fputs(file->endSpace, f);
+carefulClose(&f);
+}
+
+void tdbRewriteViewsToSubtracks(char *inFile, char *outFile)
+/* tdbRewriteViewsToSubtracks - Convert views to subtracks with sub-sub-tracks.. */
+{
+struct lm *rootLm = lmInit(0);
+struct raLevel *rootLevel = raLevelReadOneFile(inFile, rootLm);
+rewriteFile(rootLevel, rootLevel->fileList, outFile, rootLm);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 3)
+    usage();
+tdbRewriteViewsToSubtracks(argv[1], argv[2]);
+return 0;
+}