src/hg/utils/fiveCToBed/fiveCToBed.c 1.1
1.1 2009/10/26 15:31:48 aamp
Added a little converter for 5C data.
Index: src/hg/utils/fiveCToBed/fiveCToBed.c
===================================================================
RCS file: src/hg/utils/fiveCToBed/fiveCToBed.c
diff -N src/hg/utils/fiveCToBed/fiveCToBed.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/utils/fiveCToBed/fiveCToBed.c 26 Oct 2009 15:31:48 -0000 1.1
@@ -0,0 +1,141 @@
+/* fiveCToBed - Convert 5C data (matrix) to bed. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "hdb.h"
+#include "options.h"
+#include "jksql.h"
+
+static char const rcsid[] = "$Id$";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+ "fiveCToBed - Convert 5C data (matrix) to bed\n"
+ "usage:\n"
+ " fiveCToBed in.txt outRoot\n"
+ );
+}
+
+static struct optionSpec options[] = {
+ {NULL, 0},
+};
+
+struct bed *getVert(char **choppedArray, int wordCount)
+/* convert first line into array of beds. */
+{
+int i;
+struct bed *list = NULL;
+for (i = 0; i < wordCount; i++)
+ {
+ struct bed *newBed;
+ int s, e;
+ char *chrom;
+ AllocVar(newBed);
+ if (hgParseChromRange(NULL, choppedArray[i], &chrom, &s, &e))
+ {
+ newBed->chrom = cloneString(chrom);
+ newBed->chromStart = (unsigned)s;
+ newBed->chromEnd = (unsigned)e;
+ slAddHead(&list, newBed);
+ }
+ else
+ freeMem(newBed);
+ }
+slReverse(&list);
+return list;
+}
+
+void swapChromThick(struct bed *someBed)
+/* swap chromStart/chromEnd with thickStart/thickEnd */
+{
+unsigned swap = someBed->chromStart;
+someBed->chromStart = someBed->thickStart;
+someBed->thickStart = swap;
+swap = someBed->chromEnd;
+someBed->chromEnd = someBed->thickEnd;
+someBed->thickEnd = swap;
+}
+
+void fiveCToBed(char *infile, char *outRoot)
+/* fiveCToBed - Convert 5C data (matrix) to bed. */
+/* very primitive coding. pathetic almost */
+{
+struct lineFile *lf = lineFileOpen(infile, TRUE);
+char *words[4096];
+int count = lineFileChopTab(lf, words);
+/* deal with line 1... it'll be pretty long */
+struct bed *vertList = getVert(words, count);
+struct bed *vert;
+char tssLociOutName[128];
+char dhsLociOutName[128];
+char tssInterOutName[128];
+char dhsInterOutName[128];
+FILE *tssLociOut;
+FILE *dhsLociOut;
+FILE *tssInterOut;
+FILE *dhsInterOut;
+safef(tssLociOutName, sizeof(tssLociOutName), "%sTssLoci.bed", outRoot);
+safef(dhsLociOutName, sizeof(dhsLociOutName), "%sDhsLoci.bed", outRoot);
+safef(tssInterOutName, sizeof(tssInterOutName), "%sTssInter.bed", outRoot);
+safef(dhsInterOutName, sizeof(dhsInterOutName), "%sDhsInter.bed", outRoot);
+tssLociOut = mustOpen(tssLociOutName, "w");
+dhsLociOut = mustOpen(dhsLociOutName, "w");
+tssInterOut = mustOpen(tssInterOutName, "w");
+dhsInterOut = mustOpen(dhsInterOutName, "w");
+/* the first line has enough info for one of the four beds outputted. */
+for (vert = vertList; vert != NULL; vert = vert->next)
+ bedTabOutN(vert, 3, dhsLociOut);
+carefulClose(&dhsLociOut);
+/* deal with remaining lines */
+while (lineFileNextRowTab(lf, words, count))
+ {
+ struct bed rowBed;
+ char *chrom;
+ int s, e;
+ char *range = cloneString(words[0]);
+ if (hgParseChromRange(NULL, range, &chrom, &s, &e))
+ {
+ int i;
+ rowBed.chrom = cloneString(chrom);
+ rowBed.chromStart = (unsigned)s;
+ rowBed.chromEnd = (unsigned)e;
+ bedTabOutN(&rowBed, 3, tssLociOut);
+ rowBed.name = cloneString(".");
+ rowBed.score = 1000;
+ rowBed.strand[0] = '+';
+ rowBed.strand[1] = '\0';
+ for (i = 1, vert = vertList; (vert != NULL) && (i < count); vert = vert->next, i++)
+ {
+ unsigned val = sqlUnsigned(words[i]);
+ if (val > 0)
+ {
+ rowBed.thickStart = vert->chromStart;
+ rowBed.thickEnd = vert->chromEnd;
+ rowBed.itemRgb = val;
+ bedTabOutN(&rowBed, 9, tssInterOut);
+ swapChromThick(&rowBed);
+ bedTabOutN(&rowBed, 9, dhsInterOut);
+ swapChromThick(&rowBed);
+ }
+ }
+ }
+ freez(&range);
+ }
+carefulClose(&tssLociOut);
+carefulClose(&tssInterOut);
+carefulClose(&dhsInterOut);
+lineFileClose(&lf);
+bedFreeList(&vertList);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 3)
+ usage();
+fiveCToBed(argv[1], argv[2]);
+return 0;
+}