Hi Rainer,
sorry for the delay.
On Wed, Apr 19, 2017 at 1:36 PM, Rainer Jung <[email protected]> wrote:
>
> The wait_usec loop tying the apr_proc_mutex_timedlock() very often succeeds
> already during the first or second, sometimes the third attempt. But then
> the outer loop "while (i < MAX_ITER)" suddenly starts to fail because the
> inner loop switches behavior and apr_proc_mutex_timedlock() always gets the
> timeout until we stop trying due to wait_usec. Increasing MAX_WAIT_USEC from
> 1 sec to 5 sec didn't help. I have no idea, what the root cause for this
> behavior switching is.
I think what might happen is some pthread_cond_timedwait() spurious
wakeup (which I thought was avoided by counting the number of
waiters), or another possibility is a wakeup with timeout whereas the
cond is simultaneously signaled.
I both cases this could lead to a desynchronization between
proc_mutex_pthread_acquire_ex() and proc_mutex_pthread_release().
The attached patch prevents this by looping on
pthread_cond_[timed]wait() until the condition is satisfied.
Would you please try it?
Regards,
Yann.
Index: locks/unix/proc_mutex.c
===================================================================
--- locks/unix/proc_mutex.c (revision 1791915)
+++ locks/unix/proc_mutex.c (working copy)
@@ -21,6 +21,8 @@
#include "apr_md5.h" /* for apr_md5() */
#include "apr_atomic.h"
+/* #undef HAVE_PTHREAD_MUTEX_TIMEDLOCK */
+
APR_DECLARE(apr_status_t) apr_proc_mutex_destroy(apr_proc_mutex_t *mutex)
{
apr_status_t rv = apr_proc_mutex_cleanup(mutex);
@@ -701,30 +703,32 @@ static apr_status_t proc_mutex_pthread_acquire_ex(
return rv;
}
- if (!proc_pthread_mutex_cond_locked(mutex)) {
- proc_pthread_mutex_cond_locked(mutex) = 1;
+ if (!timeout) {
+ rv = proc_pthread_mutex_cond_locked(mutex) ? APR_TIMEUP
+ : APR_SUCCESS;
}
- else if (!timeout) {
- rv = APR_TIMEUP;
- }
- else {
+ else if (proc_pthread_mutex_cond_locked(mutex)) {
+ struct timespec abstime;
+
+ if (timeout > 0) {
+ timeout += apr_time_now();
+ abstime.tv_sec = apr_time_sec(timeout);
+ abstime.tv_nsec = apr_time_usec(timeout) * 1000; /* nsecs */
+ }
+
proc_pthread_mutex_cond_num_waiters(mutex)++;
+ do {
if (timeout < 0) {
rv = pthread_cond_wait(&proc_pthread_mutex_cond(mutex),
&proc_pthread_mutex(mutex));
+ if (rv) {
#ifdef HAVE_ZOS_PTHREADS
- if (rv) {
rv = errno;
+#endif
+ break;
}
-#endif
}
else {
- struct timespec abstime;
-
- timeout += apr_time_now();
- abstime.tv_sec = apr_time_sec(timeout);
- abstime.tv_nsec = apr_time_usec(timeout) * 1000; /* nanoseconds */
-
rv = pthread_cond_timedwait(&proc_pthread_mutex_cond(mutex),
&proc_pthread_mutex(mutex),
&abstime);
@@ -735,8 +739,10 @@ static apr_status_t proc_mutex_pthread_acquire_ex(
if (rv == ETIMEDOUT) {
rv = APR_TIMEUP;
}
+ break;
}
}
+ } while (proc_pthread_mutex_cond_locked(mutex));
proc_pthread_mutex_cond_num_waiters(mutex)--;
}
if (rv) {
@@ -743,6 +749,7 @@ static apr_status_t proc_mutex_pthread_acquire_ex(
pthread_mutex_unlock(&proc_pthread_mutex(mutex));
return rv;
}
+ proc_pthread_mutex_cond_locked(mutex) = 1;
rv = pthread_mutex_unlock(&proc_pthread_mutex(mutex));
if (rv) {
@@ -852,25 +859,29 @@ static apr_status_t proc_mutex_pthread_release(apr
return rv;
}
- if (!proc_pthread_mutex_cond_locked(mutex)) {
- rv = APR_EINVAL;
+ if (proc_pthread_mutex_cond_locked(mutex)) {
+ if (proc_pthread_mutex_cond_num_waiters(mutex)) {
+ rv = pthread_cond_signal(&proc_pthread_mutex_cond(mutex));
+ if (rv) {
+#ifdef HAVE_ZOS_PTHREADS
+ rv = errno;
+#endif
+ pthread_mutex_unlock(&proc_pthread_mutex(mutex));
+ return rv;
+ }
+ }
+ proc_pthread_mutex_cond_locked(mutex) = 0;
}
- else if (proc_pthread_mutex_cond_num_waiters(mutex)) {
- rv = pthread_cond_signal(&proc_pthread_mutex_cond(mutex));
+ else {
+ rv = pthread_mutex_unlock(&proc_pthread_mutex(mutex));
if (rv) {
#ifdef HAVE_ZOS_PTHREADS
rv = errno;
#endif
+ return rv;
}
+ return APR_EINVAL;
}
- else {
- proc_pthread_mutex_cond_locked(mutex) = 0;
- rv = APR_SUCCESS;
- }
- if (rv) {
- pthread_mutex_unlock(&proc_pthread_mutex(mutex));
- return rv;
- }
}
#endif /* APR_USE_PROC_PTHREAD_MUTEX_COND */