c6240b7df11882be9197ba483d5df6369c98ec15
braney
  Thu May 12 10:21:48 2011 -0700
add the ability to have tab separated fields
diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c
index ce38398..06384b9 100644
--- src/utils/bedToBigBed/bedToBigBed.c
+++ src/utils/bedToBigBed/bedToBigBed.c
@@ -8,63 +8,67 @@
 #include "asParse.h"
 #include "basicBed.h"
 #include "sig.h"
 #include "rangeTree.h"
 #include "zlibFace.h"
 #include "sqlNum.h"
 #include "bigBed.h"
 
 static char const rcsid[] = "$Id: bedToBigBed.c,v 1.24 2010/05/19 18:51:13 hiram Exp $";
 
 int blockSize = 256;
 int itemsPerSlot = 512;
 int bedFields = 0;
 char *as = NULL;
 static boolean doCompress = FALSE;
+static boolean tabSep = FALSE;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bedToBigBed v. %d - Convert bed file to bigBed.\n"
   "usage:\n"
   "   bedToBigBed in.bed chrom.sizes out.bb\n"
   "Where in.bed is in one of the ascii bed formats, but not including track lines\n"
   "and chrom.sizes is two column: <chromosome name> <size in bases>\n"
   "and out.bb is the output indexed big bed file.\n"
   "The in.bed file must be sorted by chromosome,start,\n"
   "  to sort a bed file, use the unix sort command:\n"
   "     sort -k1,1 -k2,2n unsorted.bed > sorted.bed\n"
   "\n"
   "options:\n"
   "   -blockSize=N - Number of items to bundle in r-tree.  Default %d\n"
   "   -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n"
   "   -bedFields=N - Number of fields that fit standard bed definition.  If undefined\n"
   "                  assumes all fields in bed are defined.\n"
   "   -as=fields.as - If have non-standard fields, it's great to put a definition\n"
   "                   of each field in a row in AutoSql format here.\n"
   "   -unc - If set, do not use compression."
+  "   -tabs - If set, expect fields to be tab separated, normally\n"
+  "           expects white space separator.\n"
   , bbiCurrentVersion, blockSize, itemsPerSlot
   );
 }
 
 static struct optionSpec options[] = {
    {"blockSize", OPTION_INT},
    {"itemsPerSlot", OPTION_INT},
    {"bedFields", OPTION_INT},
    {"as", OPTION_STRING},
    {"unc", OPTION_BOOLEAN},
+   {"tabs", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, 
 	bits16 definedFieldCount, int itemsPerSlot, struct bbiBoundsArray *bounds, 
 	int sectionCount, boolean doCompress, FILE *f, 
 	int resTryCount, int resScales[], int resSizes[],
 	bits16 *retFieldCount, bits16 *retDefinedFieldCount, bits32 *retMaxBlockSize)
 /* Read through lf, writing it in f.  Save starting points of blocks (every itemsPerSlot)
  * to boundsArray */
 {
 int maxBlockSize = 0;
 struct bbiChromUsage *usage = usageList;
 char *line, **row = NULL;
 int fieldCount = 0, fieldAlloc=0, lastField = 0;
@@ -82,51 +86,58 @@
     resEnds[resTry] = 0;
 boolean atEnd = FALSE, sameChrom = FALSE;
 bits32 start = 0, end = 0;
 char *chrom = NULL;
 
 for (;;)
     {
     /* Get next line of input if any. */
     if (lineFileNextReal(lf, &line))
 	{
 	/* First time through figure out the field count, and if not set, the defined field count. */
 	if (fieldCount == 0)
 	    {
 	    if (as == NULL)
 		{
+		if (tabSep)
+		    fieldCount = chopString(line, "\t", NULL, 0);
+		else
 		fieldCount = chopByWhite(line, NULL, 0);
 		if (definedFieldCount == 0)
 		    definedFieldCount = fieldCount;
 		char *asText = bedAsDef(definedFieldCount, fieldCount);
 		as = asParseText(asText);
 		allocedAs = TRUE;
 		freeMem(asText);
 		}
 	    else
 		{
 		fieldCount = slCount(as->columnList);
 		}
 	    fieldAlloc = fieldCount + 1;
 	    lastField = fieldCount - 1;
 	    AllocArray(row, fieldAlloc);
 	    *retFieldCount = fieldCount;
 	    *retDefinedFieldCount = definedFieldCount;
 	    }
 
 	/* Chop up line and make sure the word count is right. */
-	int wordCount = chopByWhite(line, row, fieldAlloc);
+	int wordCount;
+	if (tabSep)
+	    wordCount = chopString(line, "\t", row, fieldAlloc);
+	else
+	    wordCount = chopByWhite(line, row, fieldAlloc);
 	lineFileExpectWords(lf, fieldCount, wordCount);
 
 	/* Parse out first three fields. */
 	chrom = row[0];
 	start = lineFileNeedNum(lf, row, 1);
 	end = lineFileNeedNum(lf, row, 2);
 
 	/* Check remaining fields are formatted right. */
 	if (fieldCount > 3)
 	    {
 	    /* Go through and check that numerical strings really are numerical. */
 	    struct asColumn *asCol = slElementFromIx(as->columnList, 3);
 	    int i;
 	    for (i=3; i<fieldCount; ++i)
 		{
@@ -657,23 +668,24 @@
 /* bedToBigBed - Convert bed file to bigBed.. */
 {
 bbFileCreate(inName, chromSizes, blockSize, itemsPerSlot, bedFields, as, 
 	doCompress, outName);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 blockSize = optionInt("blockSize", blockSize);
 itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot);
 bedFields = optionInt("bedFields", bedFields);
 as = optionVal("as", as);
 doCompress = !optionExists("unc");
+tabSep = optionExists("tabs");
 if (argc != 4)
     usage();
 bedToBigBed(argv[1], argv[2], argv[3]);
 optionFree();
 if (verboseLevel() > 1)
     printVmPeak();
 return 0;
 }