c735712bb671de50f0edaab8212aaccf2906c93a
galt
  Wed Jul 10 19:29:10 2013 -0700
Fixing multi-threaded race condition in errAbort.c on pidInUse by adding another mutex.
diff --git src/lib/errabort.c src/lib/errabort.c
index 3449572..cae6ccc 100644
--- src/lib/errabort.c
+++ src/lib/errabort.c
@@ -284,57 +284,88 @@
 }
 
 boolean isErrAbortInProgress() 
 /* Flag to indicate that an error abort is in progress.
  * Needed so that a warn handler can tell if it's really
  * being called because of a warning or an error. */
 {
 struct perThreadAbortVars *ptav = getThreadVars();
 return ptav->errAbortInProgress;
 }
 
 
 static struct perThreadAbortVars *getThreadVars()
 /* Return a pointer to the perThreadAbortVars for the current pthread. */
 {
-pthread_t pid = pthread_self(); //  can be a pointer or a number
-// Don't safef, theoretically that could abort.
-char pidStr[64];
-snprintf(pidStr, sizeof(pidStr), "%lld",  ptrToLL(pid));
-pidStr[ArraySize(pidStr)-1] = '\0';
-
-static char pidInUse[64] = "";  // use a string since there is no known unused value for pthread_t variable
-if (sameString(pidStr, pidInUse)) // avoid deadlock on self
-    {  // this only happens when it has aborted already due to out of memory
+pthread_t pid = pthread_self(); //  pthread_t can be a pointer or a number, implementation-dependent.
+
+// Test for out-of-memory condition causing re-entrancy into this function that would block
+// on its own main mutex ptavMutex.  Do this by looking for its own pid.
+// Some care must be exercised in testing and comparing the threads pid against one in-use.
+// We need yet another mutex and a boolean to tell us when the pidInUse value may be safely compared to pid.
+
+// Use a boolean since there is no known unused value for pthread_t variable. NULL and -1 are not portable.
+static boolean pidInUseValid = FALSE;  // tells when pidInUse contains a valid pid that can be compared.
+static pthread_t pidInUse; // there is no "unused" value to which we can initialize this.
+static pthread_mutex_t pidInUseMutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_lock( &pidInUseMutex );
+// If this pid equals pidInUse, then this function has been re-entered due to severe out-of-memory error.
+// But we only compare them when pidInUseValid is TRUE.
+if (pidInUseValid && pthread_equal(pid, pidInUse)) 
+    {
+    // Avoid deadlock on self by exiting immediately.
+    // Use pthread_equal because directly comparing two pthread_t vars is not allowed.
+    // This re-entrancy only happens when it has aborted already due to out of memory
        // which should be a rare occurrence.
     char *errMsg = "errAbort re-entered due to out-of-memory condition. Exiting.";
     write(STDERR_FILENO, errMsg, strlen(errMsg)); 
-    exit(1);
+    _exit(1);   // out of memory is a serious problem, exit immediately without running atexit cleanup.
     }
+pthread_mutex_unlock( &pidInUseMutex );
 
+// This is the main mutex we really care about.
+// It controls access to the hash where thread-specific data is stored.
 static pthread_mutex_t ptavMutex = PTHREAD_MUTEX_INITIALIZER;
 pthread_mutex_lock( &ptavMutex );
-snprintf(pidInUse, sizeof(pidInUse), "%lld",  ptrToLL(pid));
-pidInUse[ArraySize(pidInUse)-1] = '\0';
+
+// safely tell threads that pidInUse
+// is valid and correctly set and may be compared to pid
+pthread_mutex_lock( &pidInUseMutex );
+pidInUse = pthread_self();  // setting it directly to pid is not allowed.
+pidInUseValid = TRUE;
+pthread_mutex_unlock( &pidInUseMutex );
+
+// This means that if we crash due to out-of-memory below,
+// it will be able to detect the re-entrancy and handle it above.
 
 static struct hash *perThreadVars = NULL;
 if (perThreadVars == NULL)
     perThreadVars = hashNew(0);
+// convert the pid into a string for the hash key
+char pidStr[64];
+safef(pidStr, sizeof(pidStr), "%lld",  ptrToLL(pid));
 struct hashEl *hel = hashLookup(perThreadVars, pidStr);
 if (hel == NULL)
     {
     // if it is the first time, initialization the perThreadAbortVars
     struct perThreadAbortVars *ptav;
     AllocVar(ptav);
     ptav->debugPushPopErr = FALSE;
     ptav->errAbortInProgress = FALSE;
     ptav->warnIx = 0;
     ptav->warnArray[0] = defaultVaWarn;
     ptav->abortIx = 0;
     ptav->abortArray[0] = defaultAbort;
     hel = hashAdd(perThreadVars, pidStr, ptav);
     }
-pidInUse[0] = '\0';
+
+// safely tell other threads that pidInUse
+// is no longer valid and may not be compared to pid
+pthread_mutex_lock( &pidInUseMutex );
+pidInUseValid = FALSE;
+pthread_mutex_unlock( &pidInUseMutex );
+
+// unlock our mutex controlling the hash of thread-specific data
 pthread_mutex_unlock( &ptavMutex );
 return (struct perThreadAbortVars *)(hel->val);
 }