src/hg/bedSplitOnChrom/bedSplitOnChrom.c 1.2

1.2 2009/09/29 00:17:04 braney
added a couple of options to facilitate use in ENCODE pipeline
Index: src/hg/bedSplitOnChrom/bedSplitOnChrom.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/bedSplitOnChrom/bedSplitOnChrom.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 4 -r1.1 -r1.2
--- src/hg/bedSplitOnChrom/bedSplitOnChrom.c	28 Apr 2008 10:16:00 -0000	1.1
+++ src/hg/bedSplitOnChrom/bedSplitOnChrom.c	29 Sep 2009 00:17:04 -0000	1.2
@@ -8,8 +8,10 @@
 
 static char const rcsid[] = "$Id$";
 
 int maxChromCount = 256;
+boolean nfCheck;    /* check for number of fields consistency */
+boolean doStrand;   /* append strand to file name */
 
 void usage()
 /* Explain usage and exit. */
 {
@@ -17,13 +19,17 @@
   "bedSplitOnChrom - Split bed into a directory with one file per chromosome.\n"
   "usage:\n"
   "   bedSplitOnChrom inFile.bed outDir\n"
   "options:\n"
-  "   maxChromCount=N Maximum number of different chromosomes, default %d\n"
+  "   -strand   append strand to file name\n"
+  "   -noCheck  do not check to see if number of fields is same in every record\n"
+  "   -maxChromCount=N Maximum number of different chromosomes, default %d\n"
   , maxChromCount);
 }
 
 static struct optionSpec options[] = {
+   {"strand", OPTION_BOOLEAN},
+   {"noCheck", OPTION_BOOLEAN},
    {"maxChromCount", OPTION_INT},
    {NULL, 0},
 };
 
@@ -47,12 +53,27 @@
 
 /* Output as needed, creating a hash of open files. */
 char path[PATH_LEN];
 struct hash *fileHash = hashNew(8);
+char buffer[4096];
+
 for (;;)
     {
     /* Look up file in hash, creating a new file if need be. */
     char *chrom = row[0];
+
+    if (doStrand)
+	{
+	char *ptr = buffer;
+
+	for(;*chrom; chrom++, ptr++)
+	    *ptr = *chrom;
+
+	*ptr++ = row[5][0];
+	*ptr++ = 0;
+
+	chrom = buffer;
+	}
     FILE *f = hashFindVal(fileHash, chrom);
     if (f == NULL)
         {
 	if (fileHash->elCount >= maxChromCount)
@@ -80,9 +101,9 @@
      * usual number of fields. */
     int fieldsInLine = lineFileChopNext(lf, row, ArraySize(row));
     if (fieldsInLine == 0)
         break;
-    if (fieldsInLine != numFields)
+    if (nfCheck && (fieldsInLine != numFields))
 	errAbort("First line in %s had %d fields, but line %d has %d fields.",
 		lf->fileName, numFields, lf->lineIx, fieldsInLine);
     }
 
@@ -101,7 +122,9 @@
 optionInit(&argc, argv, options);
 maxChromCount = optionInt("maxChromCount", maxChromCount);
 if (argc != 3)
     usage();
+nfCheck = !optionExists("noCheck");
+doStrand = optionExists("strand");
 bedSplitOnChrom(argv[1], argv[2]);
 return 0;
 }