c6240b7df11882be9197ba483d5df6369c98ec15 braney Thu May 12 10:21:48 2011 -0700 add the ability to have tab separated fields diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c index ce38398..06384b9 100644 --- src/utils/bedToBigBed/bedToBigBed.c +++ src/utils/bedToBigBed/bedToBigBed.c @@ -8,63 +8,67 @@ #include "asParse.h" #include "basicBed.h" #include "sig.h" #include "rangeTree.h" #include "zlibFace.h" #include "sqlNum.h" #include "bigBed.h" static char const rcsid[] = "$Id: bedToBigBed.c,v 1.24 2010/05/19 18:51:13 hiram Exp $"; int blockSize = 256; int itemsPerSlot = 512; int bedFields = 0; char *as = NULL; static boolean doCompress = FALSE; +static boolean tabSep = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "bedToBigBed v. %d - Convert bed file to bigBed.\n" "usage:\n" " bedToBigBed in.bed chrom.sizes out.bb\n" "Where in.bed is in one of the ascii bed formats, but not including track lines\n" "and chrom.sizes is two column: <chromosome name> <size in bases>\n" "and out.bb is the output indexed big bed file.\n" "The in.bed file must be sorted by chromosome,start,\n" " to sort a bed file, use the unix sort command:\n" " sort -k1,1 -k2,2n unsorted.bed > sorted.bed\n" "\n" "options:\n" " -blockSize=N - Number of items to bundle in r-tree. Default %d\n" " -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n" " -bedFields=N - Number of fields that fit standard bed definition. If undefined\n" " assumes all fields in bed are defined.\n" " -as=fields.as - If have non-standard fields, it's great to put a definition\n" " of each field in a row in AutoSql format here.\n" " -unc - If set, do not use compression." + " -tabs - If set, expect fields to be tab separated, normally\n" + " expects white space separator.\n" , bbiCurrentVersion, blockSize, itemsPerSlot ); } static struct optionSpec options[] = { {"blockSize", OPTION_INT}, {"itemsPerSlot", OPTION_INT}, {"bedFields", OPTION_INT}, {"as", OPTION_STRING}, {"unc", OPTION_BOOLEAN}, + {"tabs", OPTION_BOOLEAN}, {NULL, 0}, }; void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, bits16 definedFieldCount, int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, boolean doCompress, FILE *f, int resTryCount, int resScales[], int resSizes[], bits16 *retFieldCount, bits16 *retDefinedFieldCount, bits32 *retMaxBlockSize) /* Read through lf, writing it in f. Save starting points of blocks (every itemsPerSlot) * to boundsArray */ { int maxBlockSize = 0; struct bbiChromUsage *usage = usageList; char *line, **row = NULL; int fieldCount = 0, fieldAlloc=0, lastField = 0; @@ -82,51 +86,58 @@ resEnds[resTry] = 0; boolean atEnd = FALSE, sameChrom = FALSE; bits32 start = 0, end = 0; char *chrom = NULL; for (;;) { /* Get next line of input if any. */ if (lineFileNextReal(lf, &line)) { /* First time through figure out the field count, and if not set, the defined field count. */ if (fieldCount == 0) { if (as == NULL) { + if (tabSep) + fieldCount = chopString(line, "\t", NULL, 0); + else fieldCount = chopByWhite(line, NULL, 0); if (definedFieldCount == 0) definedFieldCount = fieldCount; char *asText = bedAsDef(definedFieldCount, fieldCount); as = asParseText(asText); allocedAs = TRUE; freeMem(asText); } else { fieldCount = slCount(as->columnList); } fieldAlloc = fieldCount + 1; lastField = fieldCount - 1; AllocArray(row, fieldAlloc); *retFieldCount = fieldCount; *retDefinedFieldCount = definedFieldCount; } /* Chop up line and make sure the word count is right. */ - int wordCount = chopByWhite(line, row, fieldAlloc); + int wordCount; + if (tabSep) + wordCount = chopString(line, "\t", row, fieldAlloc); + else + wordCount = chopByWhite(line, row, fieldAlloc); lineFileExpectWords(lf, fieldCount, wordCount); /* Parse out first three fields. */ chrom = row[0]; start = lineFileNeedNum(lf, row, 1); end = lineFileNeedNum(lf, row, 2); /* Check remaining fields are formatted right. */ if (fieldCount > 3) { /* Go through and check that numerical strings really are numerical. */ struct asColumn *asCol = slElementFromIx(as->columnList, 3); int i; for (i=3; i<fieldCount; ++i) { @@ -657,23 +668,24 @@ /* bedToBigBed - Convert bed file to bigBed.. */ { bbFileCreate(inName, chromSizes, blockSize, itemsPerSlot, bedFields, as, doCompress, outName); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); blockSize = optionInt("blockSize", blockSize); itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot); bedFields = optionInt("bedFields", bedFields); as = optionVal("as", as); doCompress = !optionExists("unc"); +tabSep = optionExists("tabs"); if (argc != 4) usage(); bedToBigBed(argv[1], argv[2], argv[3]); optionFree(); if (verboseLevel() > 1) printVmPeak(); return 0; }