433ed3d13a7fd8e09901031c223c54d5abc77178 braney Fri Apr 24 13:11:54 2020 -0700 add a flag to bedToBigBed to allow exons to have 1bp overlapping diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c index de134fd..a278fa9 100644 --- src/utils/bedToBigBed/bedToBigBed.c +++ src/utils/bedToBigBed/bedToBigBed.c @@ -25,30 +25,31 @@ * 2.6 - Made it not crash on empty input. * */ /* Things set directly or indirectly by command lne in main() routine. */ int blockSize = 256; int itemsPerSlot = 512; char *extraIndex = NULL; int bedN = 0; /* number of standard bed fields */ int bedP = 0; /* number of bed plus fields */ char *asFile = NULL; char *asText = NULL; char *udcDir = NULL; static boolean doCompress = FALSE; static boolean tabSep = FALSE; static boolean sizesIs2Bit = FALSE; +static boolean allow1bpOverlap = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "bedToBigBed v. %s - Convert bed file to bigBed. (bbi version: %d)\n" "usage:\n" " bedToBigBed in.bed chrom.sizes out.bb\n" "Where in.bed is in one of the ascii bed formats, but not including track lines\n" "and chrom.sizes is a two-column file/URL: <chromosome name> <size in bases>\n" "and out.bb is the output indexed big bed file.\n" "If the assembly <db> is hosted by UCSC, chrom.sizes can be a URL like\n" " http://hgdownload.soe.ucsc.edu/goldenPath/<db>/bigZips/<db>.chrom.sizes\n" "or you may use the script fetchChromSizes to download the chrom.sizes file.\n" "If you have bed annotations on patch sequences from NCBI, a more inclusive\n" @@ -67,44 +68,46 @@ " optional (+) if extra \"bedPlus\" fields, \n" " optional P specifies the number of extra fields. Not required, but preferred.\n" " Examples: -type=bed6 or -type=bed6+ or -type=bed6+3 \n" " (see http://genome.ucsc.edu/FAQ/FAQformat.html#format1)\n" " -as=fields.as - If you have non-standard \"bedPlus\" fields, it's great to put a definition\n" " of each field in a row in AutoSql format here.\n" " -blockSize=N - Number of items to bundle in r-tree. Default %d\n" " -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n" " -unc - If set, do not use compression.\n" " -tab - If set, expect fields to be tab separated, normally\n" " expects white space separator.\n" " -extraIndex=fieldList - If set, make an index on each field in a comma separated list\n" " extraIndex=name and extraIndex=name,id are commonly used.\n" " -sizesIs2Bit -- If set, the chrom.sizes file is assumed to be a 2bit file.\n" " -udcDir=/path/to/udcCacheDir -- sets the UDC cache dir for caching of remote files.\n" + " -allow1bpOverlap -- allow exons to overlap by at most one base pair\n" , version, bbiCurrentVersion, blockSize, itemsPerSlot ); } static struct optionSpec options[] = { {"blockSize", OPTION_INT}, {"itemsPerSlot", OPTION_INT}, {"type", OPTION_STRING}, {"as", OPTION_STRING}, {"unc", OPTION_BOOLEAN}, {"tab", OPTION_BOOLEAN}, {"sizesIs2Bit", OPTION_BOOLEAN}, {"extraIndex", OPTION_STRING}, {"udcDir", OPTION_STRING}, + {"allow1bpOverlap", OPTION_BOOLEAN}, {NULL, 0}, }; int bbNamedFileChunkCmpByName(const void *va, const void *vb) /* Compare two named offset object to facilitate qsorting by name. */ { const struct bbNamedFileChunk *a = va, *b = vb; return strcmp(a->name, b->name); } static int maxBedNameSize; void bbNamedFileChunkKey(const void *va, char *keyBuf) /* Copy name to keyBuf for bPlusTree maker */ { @@ -183,31 +186,31 @@ long sectionStartIx = 0, sectionEndIx = 0; for (;;) { /* Get next line of input if any. */ if (lineFileNextReal(lf, &line)) { /* Chop up line and make sure the word count is right. */ int wordCount; if (tabSep) wordCount = chopTabs(line, row); else wordCount = chopLine(line, row); lineFileExpectWords(lf, fieldCount, wordCount); - loadAndValidateBed(row, bedN, fieldCount, lf, bed, as, FALSE); + loadAndValidateBedExt(row, bedN, fieldCount, lf, bed, as, FALSE, allow1bpOverlap); chrom = bed->chrom; start = bed->chromStart; end = bed->chromEnd; sameChrom = sameString(chrom, usage->name); } else /* No next line */ { atEnd = TRUE; } /* Check conditions that would end block and save block info and advance to next if need be. */ if (atEnd || !sameChrom || itemIx >= itemsPerSlot) @@ -810,30 +813,31 @@ bbFileCreate(inName, chromSizes, blockSize, itemsPerSlot, asText, as, doCompress, extraIndexList, outName); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); blockSize = optionInt("blockSize", blockSize); itemsPerSlot = optionInt("itemsPerSlot", itemsPerSlot); asFile = optionVal("as", asFile); doCompress = !optionExists("unc"); sizesIs2Bit = optionExists("sizesIs2Bit"); extraIndex = optionVal("extraIndex", NULL); tabSep = optionExists("tab"); +allow1bpOverlap = optionExists("allow1bpOverlap"); udcDir = optionVal("udcDir", udcDefaultDir()); if (argc != 4) usage(); udcSetDefaultDir(udcDir); if (optionExists("type")) { // parse type char *btype = cloneString(optionVal("type", "")); char *plus = strchr(btype, '+'); if (plus) { *plus++ = 0; if (isdigit(*plus)) bedP = sqlUnsigned(plus);