src/hg/utils/fiveCToBed/fiveCToBed.c 1.1

1.1 2009/10/26 15:31:48 aamp
Added a little converter for 5C data.
Index: src/hg/utils/fiveCToBed/fiveCToBed.c
===================================================================
RCS file: src/hg/utils/fiveCToBed/fiveCToBed.c
diff -N src/hg/utils/fiveCToBed/fiveCToBed.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/utils/fiveCToBed/fiveCToBed.c	26 Oct 2009 15:31:48 -0000	1.1
@@ -0,0 +1,141 @@
+/* fiveCToBed - Convert 5C data (matrix) to bed. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "hdb.h"
+#include "options.h"
+#include "jksql.h"
+
+static char const rcsid[] = "$Id$";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "fiveCToBed - Convert 5C data (matrix) to bed\n"
+  "usage:\n"
+  "   fiveCToBed in.txt outRoot\n"
+  );
+}
+
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+struct bed *getVert(char **choppedArray, int wordCount)
+/* convert first line into array of beds. */
+{
+int i;
+struct bed *list = NULL;
+for (i = 0; i < wordCount; i++)
+    {
+    struct bed *newBed;
+    int s, e;
+    char *chrom;
+    AllocVar(newBed);
+    if (hgParseChromRange(NULL, choppedArray[i], &chrom, &s, &e))
+	{
+	newBed->chrom = cloneString(chrom);
+	newBed->chromStart = (unsigned)s;
+	newBed->chromEnd = (unsigned)e;	
+	slAddHead(&list, newBed);
+	}
+    else
+	freeMem(newBed);
+    }
+slReverse(&list);
+return list;
+}
+
+void swapChromThick(struct bed *someBed)
+/* swap chromStart/chromEnd with thickStart/thickEnd */
+{
+unsigned swap = someBed->chromStart;
+someBed->chromStart = someBed->thickStart;
+someBed->thickStart = swap;
+swap = someBed->chromEnd;
+someBed->chromEnd = someBed->thickEnd;
+someBed->thickEnd = swap;
+}
+
+void fiveCToBed(char *infile, char *outRoot)
+/* fiveCToBed - Convert 5C data (matrix) to bed. */
+/* very primitive coding. pathetic almost */
+{
+struct lineFile *lf = lineFileOpen(infile, TRUE);
+char *words[4096];
+int count = lineFileChopTab(lf, words);
+/* deal with line 1... it'll be pretty long */
+struct bed *vertList = getVert(words, count);
+struct bed *vert;
+char tssLociOutName[128];
+char dhsLociOutName[128];
+char tssInterOutName[128];
+char dhsInterOutName[128];
+FILE *tssLociOut;
+FILE *dhsLociOut;
+FILE *tssInterOut;
+FILE *dhsInterOut;
+safef(tssLociOutName, sizeof(tssLociOutName), "%sTssLoci.bed", outRoot);
+safef(dhsLociOutName, sizeof(dhsLociOutName), "%sDhsLoci.bed", outRoot);
+safef(tssInterOutName, sizeof(tssInterOutName), "%sTssInter.bed", outRoot);
+safef(dhsInterOutName, sizeof(dhsInterOutName), "%sDhsInter.bed", outRoot);
+tssLociOut = mustOpen(tssLociOutName, "w");
+dhsLociOut = mustOpen(dhsLociOutName, "w");
+tssInterOut = mustOpen(tssInterOutName, "w");
+dhsInterOut = mustOpen(dhsInterOutName, "w");
+/* the first line has enough info for one of the four beds outputted. */
+for (vert = vertList; vert != NULL; vert = vert->next)
+    bedTabOutN(vert, 3, dhsLociOut);
+carefulClose(&dhsLociOut);
+/* deal with remaining lines */
+while (lineFileNextRowTab(lf, words, count))
+    {
+    struct bed rowBed;
+    char *chrom;
+    int s, e;
+    char *range = cloneString(words[0]);
+    if (hgParseChromRange(NULL, range, &chrom, &s, &e))
+	{
+	int i;
+	rowBed.chrom = cloneString(chrom);
+	rowBed.chromStart = (unsigned)s;
+	rowBed.chromEnd = (unsigned)e;
+	bedTabOutN(&rowBed, 3, tssLociOut);
+	rowBed.name = cloneString(".");
+	rowBed.score = 1000;
+	rowBed.strand[0] = '+';
+	rowBed.strand[1] = '\0';
+	for (i = 1, vert = vertList; (vert != NULL) && (i < count); vert = vert->next, i++)
+	    {
+	    unsigned val = sqlUnsigned(words[i]);
+	    if (val > 0)
+		{
+		rowBed.thickStart = vert->chromStart;
+		rowBed.thickEnd = vert->chromEnd;
+		rowBed.itemRgb = val;
+		bedTabOutN(&rowBed, 9, tssInterOut);
+		swapChromThick(&rowBed);
+		bedTabOutN(&rowBed, 9, dhsInterOut);
+		swapChromThick(&rowBed);
+		}
+	    }
+	}
+    freez(&range);
+    }
+carefulClose(&tssLociOut);
+carefulClose(&tssInterOut);
+carefulClose(&dhsInterOut);
+lineFileClose(&lf);
+bedFreeList(&vertList);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 3)
+    usage();
+fiveCToBed(argv[1], argv[2]);
+return 0;
+}