src/utils/bedClip/bedClip.c 1.1

1.1 2009/09/10 02:22:28 kent
The bedClip utility seems to work.
Index: src/utils/bedClip/bedClip.c
===================================================================
RCS file: src/utils/bedClip/bedClip.c
diff -N src/utils/bedClip/bedClip.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/utils/bedClip/bedClip.c	10 Sep 2009 02:22:28 -0000	1.1
@@ -0,0 +1,85 @@
+/* bedClip - Remove lines from bed file that refer to off-chromosome places.. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "bbiFile.h"
+#include "sqlNum.h"
+#include "obscure.h"
+
+static char const rcsid[] = "$Id$";
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "bedClip - Remove lines from bed file that refer to off-chromosome places.\n"
+  "usage:\n"
+  "   bedClip input.bed chrom.sizes output.bed\n"
+  "options:\n"
+  "   -verbose=2 - set to get list of lines clipped and why\n"
+  );
+}
+
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+void bedClip(char *inFile, char *chromSizes, char *outFile)
+/* bedClip - Remove lines from bed file that refer to off-chromosome places.. */
+{
+struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes);
+struct lineFile *lf = lineFileOpen(inFile, TRUE);
+FILE *f = mustOpen(outFile, "w");
+char *line;
+while (lineFileNextReal(lf, &line))
+    {
+    char *chrom = nextWord(&line);
+    char *startString = nextWord(&line);
+    char *endString = nextWord(&line);
+    if (endString == NULL)
+        errAbort("Need at least three fields line %d of %s", lf->lineIx, lf->fileName);
+    if (startString[0] == '-')
+	{
+	verbose(2, "Clipping negative line %d of %s\n", lf->lineIx, lf->fileName);
+        continue;		// Clip off negatives
+	}
+    if (!isdigit(startString[0]))
+        errAbort("Expecting number got %s line %d of %s", startString, lf->lineIx, lf->fileName);
+    if (!isdigit(endString[0]))
+        errAbort("Expecting number got %s line %d of %s", endString, lf->lineIx, lf->fileName);
+    int start = sqlUnsigned(startString);
+    int end = sqlUnsigned(endString);
+    if (start >= end)
+	{
+	verbose(2, "Clipping end <= start line %d of %s\n", lf->lineIx, lf->fileName);
+	continue;
+	}
+    struct hashEl *hel = hashLookup(chromSizesHash, chrom);
+    if (hel == NULL)
+        errAbort("Chromosome %s isn't in %s line %d of %s\n", chrom, chromSizes, lf->lineIx, lf->fileName);
+    int chromSize = ptToInt(hel->val);
+    if (end > chromSize)
+	{
+	verbose(2, "Clipping end > chromSize line %d of %s\n", lf->lineIx, lf->fileName);
+	continue;
+	}
+    fprintf(f, "%s\t%s\t%s", chrom, startString, endString);
+    line = skipLeadingSpaces(line);
+    if (line == NULL || line[0] == 0)
+        fputc('\n', f);
+    else
+        fprintf(f, "\t%s\n", line);
+    }
+carefulClose(&f);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 4)
+    usage();
+bedClip(argv[1], argv[2], argv[3]);
+return 0;
+}