c735712bb671de50f0edaab8212aaccf2906c93a galt Wed Jul 10 19:29:10 2013 -0700 Fixing multi-threaded race condition in errAbort.c on pidInUse by adding another mutex. diff --git src/lib/errabort.c src/lib/errabort.c index 3449572..cae6ccc 100644 --- src/lib/errabort.c +++ src/lib/errabort.c @@ -284,57 +284,88 @@ } boolean isErrAbortInProgress() /* Flag to indicate that an error abort is in progress. * Needed so that a warn handler can tell if it's really * being called because of a warning or an error. */ { struct perThreadAbortVars *ptav = getThreadVars(); return ptav->errAbortInProgress; } static struct perThreadAbortVars *getThreadVars() /* Return a pointer to the perThreadAbortVars for the current pthread. */ { -pthread_t pid = pthread_self(); // can be a pointer or a number -// Don't safef, theoretically that could abort. -char pidStr[64]; -snprintf(pidStr, sizeof(pidStr), "%lld", ptrToLL(pid)); -pidStr[ArraySize(pidStr)-1] = '\0'; - -static char pidInUse[64] = ""; // use a string since there is no known unused value for pthread_t variable -if (sameString(pidStr, pidInUse)) // avoid deadlock on self - { // this only happens when it has aborted already due to out of memory +pthread_t pid = pthread_self(); // pthread_t can be a pointer or a number, implementation-dependent. + +// Test for out-of-memory condition causing re-entrancy into this function that would block +// on its own main mutex ptavMutex. Do this by looking for its own pid. +// Some care must be exercised in testing and comparing the threads pid against one in-use. +// We need yet another mutex and a boolean to tell us when the pidInUse value may be safely compared to pid. + +// Use a boolean since there is no known unused value for pthread_t variable. NULL and -1 are not portable. +static boolean pidInUseValid = FALSE; // tells when pidInUse contains a valid pid that can be compared. +static pthread_t pidInUse; // there is no "unused" value to which we can initialize this. +static pthread_mutex_t pidInUseMutex = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_lock( &pidInUseMutex ); +// If this pid equals pidInUse, then this function has been re-entered due to severe out-of-memory error. +// But we only compare them when pidInUseValid is TRUE. +if (pidInUseValid && pthread_equal(pid, pidInUse)) + { + // Avoid deadlock on self by exiting immediately. + // Use pthread_equal because directly comparing two pthread_t vars is not allowed. + // This re-entrancy only happens when it has aborted already due to out of memory // which should be a rare occurrence. char *errMsg = "errAbort re-entered due to out-of-memory condition. Exiting."; write(STDERR_FILENO, errMsg, strlen(errMsg)); - exit(1); + _exit(1); // out of memory is a serious problem, exit immediately without running atexit cleanup. } +pthread_mutex_unlock( &pidInUseMutex ); +// This is the main mutex we really care about. +// It controls access to the hash where thread-specific data is stored. static pthread_mutex_t ptavMutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_lock( &ptavMutex ); -snprintf(pidInUse, sizeof(pidInUse), "%lld", ptrToLL(pid)); -pidInUse[ArraySize(pidInUse)-1] = '\0'; + +// safely tell threads that pidInUse +// is valid and correctly set and may be compared to pid +pthread_mutex_lock( &pidInUseMutex ); +pidInUse = pthread_self(); // setting it directly to pid is not allowed. +pidInUseValid = TRUE; +pthread_mutex_unlock( &pidInUseMutex ); + +// This means that if we crash due to out-of-memory below, +// it will be able to detect the re-entrancy and handle it above. static struct hash *perThreadVars = NULL; if (perThreadVars == NULL) perThreadVars = hashNew(0); +// convert the pid into a string for the hash key +char pidStr[64]; +safef(pidStr, sizeof(pidStr), "%lld", ptrToLL(pid)); struct hashEl *hel = hashLookup(perThreadVars, pidStr); if (hel == NULL) { // if it is the first time, initialization the perThreadAbortVars struct perThreadAbortVars *ptav; AllocVar(ptav); ptav->debugPushPopErr = FALSE; ptav->errAbortInProgress = FALSE; ptav->warnIx = 0; ptav->warnArray[0] = defaultVaWarn; ptav->abortIx = 0; ptav->abortArray[0] = defaultAbort; hel = hashAdd(perThreadVars, pidStr, ptav); } -pidInUse[0] = '\0'; + +// safely tell other threads that pidInUse +// is no longer valid and may not be compared to pid +pthread_mutex_lock( &pidInUseMutex ); +pidInUseValid = FALSE; +pthread_mutex_unlock( &pidInUseMutex ); + +// unlock our mutex controlling the hash of thread-specific data pthread_mutex_unlock( &ptavMutex ); return (struct perThreadAbortVars *)(hel->val); }