2029e9afd93f32973619c48e111bdf1a8f8219b5 braney Wed Apr 29 13:39:56 2026 -0700 trackDbCache: bail out gracefully when /dev/shm is full instead of SIGBUS Use posix_fallocate() instead of ftruncate() so tmpfs commits the pages synchronously; on ENOSPC we log to stderr, drop the temp file, and return without writing the cache. Previously hgTracks would die from SIGBUS on the first write into the mmap'd region (e.g. Docker's default 64 MB /dev/shm), truncating the page mid-render. refs #37475 Co-Authored-By: Claude Opus 4.7 (1M context) diff --git src/hg/lib/trackDbCache.c src/hg/lib/trackDbCache.c index 58ad4b92696..de3aecad99f 100644 --- src/hg/lib/trackDbCache.c +++ src/hg/lib/trackDbCache.c @@ -336,31 +336,42 @@ char tempFileName[4096]; safef(tempFileName, sizeof tempFileName, "%s", rTempName(dirName, "temp", "")); int fd = open(tempFileName, oflags, 0666 ); if (fd < 0) { cacheLog("unable to open shared memory %s errno %d", tempFileName, errno); mustRemove(tempFileName); return; } else { cacheLog("open shared memory %s", tempFileName); } ftruncate(fd, 0); -ftruncate(fd, size); +// Reserve real tmpfs pages now so writes through the mmap below can't SIGBUS +// when /dev/shm is full (ftruncate alone leaves the file sparse). +int err = posix_fallocate(fd, 0, size); +if (err != 0) + { + fprintf(stderr, "trackDbCache: posix_fallocate of %ld bytes in %s failed (%s); skipping cache write\n", + size, trackDbCacheDir, strerror(err)); + cacheLog("unable to allocate %ld bytes for trackDb cache, errno %d", size, err); + close(fd); + mustRemove(tempFileName); + return; + } size_t psize = getpagesize(); unsigned long pageMask = psize - 1; unsigned long paddress = 0; unsigned char *mem; int numTries = 20; // we try numTries times to connect to a random address for(; numTries; numTries--) { unsigned long address = random(); paddress = (address + psize - 1) & ~pageMask; mem = (u_char *) mmap((void *)paddress, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);