e4f7eda5f5bb015ddc38f592869339263909854a braney Thu Feb 20 10:03:46 2025 -0800 tweak bedToBigBed to come up with its own temp file name diff --git src/utils/bedToBigBed/bedToBigBed.c src/utils/bedToBigBed/bedToBigBed.c index 2b859727451..dee366af81e 100644 --- src/utils/bedToBigBed/bedToBigBed.c +++ src/utils/bedToBigBed/bedToBigBed.c @@ -72,76 +72,74 @@ "a chromAlias bigBed file, the input BED file may have chromosome names\n" "matching any of the sequence name aliases in the chromAlias file.\n" "\n" "For UCSC provided genomes, the chromAlias files can be found under:\n" " https://hgdownload.soe.ucsc.edu/goldenPath//bigZips/.chromAlias.bb\n" "For UCSC GenArk assembly hubs, the chrom aliases are namedd in the form:\n" " https://hgdownload.soe.ucsc.edu/hubs/GCF/006/542/625/GCF_006542625.1/GCF_006542625.1.chromAlias.bb\n" "For a description of generating chromAlias files for your own assembly hub, see:\n" " http://genomewiki.ucsc.edu/index.php/Chrom_Alias\n" "\n" "Without the -sort option the in.bed file must be sorted by chromosome,start,\n" " to sort a bed file, use the unix sort command:\n" " sort -k1,1 -k2,2n unsorted.bed > sorted.bed\n" "Sequences must be sorted by name so all sequences with the same name\n" "are collected together, but they don't need to be in any particular order.\n" - "If the -sort=tempName option is specified then the input file will be sorted using\n" - "using a file called tempName, which will be removed after use.\n" "\n" "options:\n" " -type=bedN[+[P]] : \n" " N is between 3 and 15, \n" " optional (+) if extra \"bedPlus\" fields, \n" " optional P specifies the number of extra fields. Not required, but preferred.\n" " Examples: -type=bed6 or -type=bed6+ or -type=bed6+3 \n" " (see http://genome.ucsc.edu/FAQ/FAQformat.html#format1)\n" " -as=fields.as - If you have non-standard \"bedPlus\" fields, it's great to put a definition\n" " of each field in a row in AutoSql format here.\n" " -blockSize=N - Number of items to bundle in r-tree. Default %d\n" " -itemsPerSlot=N - Number of data points bundled at lowest level. Default %d\n" " -unc - If set, do not use compression.\n" " -tab - If set, expect fields to be tab separated, normally\n" " expects white space separator.\n" " -extraIndex=fieldList - If set, make an index on each field in a comma separated list\n" " extraIndex=name and extraIndex=name,id are commonly used.\n" " -sizesIs2Bit -- If set, the chrom.sizes file is assumed to be a 2bit file.\n" " -sizesIsChromAliasBb -- If set, then chrom.sizes file is assumed to be a chromAlias\n" " bigBed file or a URL to a such a file (see above).\n" " -sizesIsBb -- Obsolete name for -sizesIsChromAliasBb.\n" " -udcDir=/path/to/udcCacheDir -- sets the UDC cache dir for caching of remote files.\n" " -allow1bpOverlap -- allow exons to overlap by at most one base pair\n" " -maxAlloc=N -- Set the maximum memory allocation size to N bytes\n" - " -sort=tempFile -- sort the input file using tempFile. Removes tempFile afterwards. Use something like $TMPDIR/fileName\n" + " -sort -- sort the input file\n" , version, bbiCurrentVersion, blockSize, itemsPerSlot ); } static struct optionSpec options[] = { {"blockSize", OPTION_INT}, {"itemsPerSlot", OPTION_INT}, {"type", OPTION_STRING}, {"as", OPTION_STRING}, {"unc", OPTION_BOOLEAN}, {"tab", OPTION_BOOLEAN}, {"sizesIs2Bit", OPTION_BOOLEAN}, {"sizesIsChromAliasBb", OPTION_BOOLEAN}, {"sizesIsBb", OPTION_BOOLEAN}, {"extraIndex", OPTION_STRING}, {"udcDir", OPTION_STRING}, {"allow1bpOverlap", OPTION_BOOLEAN}, {"maxAlloc", OPTION_LONG_LONG}, - {"sort", OPTION_STRING}, + {"sort", OPTION_BOOLEAN}, {NULL, 0}, }; static struct lineFile *rewindFile(char *inName, struct lineFile *lf) /* set up lineFile to point at the beginning of the file. It we're reading from a decompressing * pipe, we need to close and reopen the pipe. */ { if (lf->pl) { lineFileClose(&lf); lf = lineFileOpen(inName, TRUE); } else lineFileRewind(lf); @@ -946,33 +944,47 @@ * or if more than or maximum defined fields, then for bed15+ */ bedN = fieldCount; if (bedN > bedKnownFields) { bedP = bedN - bedKnownFields; bedN = bedKnownFields; } } /* Make sure that fields are defined, from bed spec if nowhere else. */ if (asFile) readInGulp(asFile, &asText, NULL); else asText = bedAsDef(bedN, bedN + bedP); -char *sortFile = optionVal("sort", NULL); -if (sortFile) +boolean doSort = optionExists("sort"); +char *sortFile = NULL; + +if (doSort) { + char *tmpDir = getTempDir(); // this will (probably) grab $TMPDIR + char tmpName[4096]; + char *template = "bedToBigBedXXXXXX"; + safef(tmpName, sizeof tmpName, "%s/%s", tmpDir, template); + int tempFd = mkstemp(tmpName); + if (tempFd < 0) + errAbort("Cannot make temporary file like %s for sorting", tmpName); + close(tempFd); + + sortFile = cloneString(tmpName); + verbose(2, "made temporary file %s for sorting",sortFile); + char sysBuf[4096]; safef(sysBuf, sizeof sysBuf, "sort -k1,1 -k2,2n %s > %s", bedFileName, sortFile); if (system(sysBuf) != 0) errAbort("Couldn't sort %s into %s. Error code %d\n", bedFileName, sortFile, errno); bedFileName = sortFile; } struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { bedToBigBed(bedFileName, argv[2], argv[3]); } if (errCatch->gotError || errCatch->gotWarning)