Re: Threads related SIGSEGV in random.c (diff, v2)
On Thu, Mar 14, 2013 at 6:48 PM, Ted Unangst wrote: > On Thu, Mar 14, 2013 at 17:24, Antoine Jacoutot wrote: >> On Thu, Mar 14, 2013 at 11:41:52AM -0400, Ted Unangst wrote: >>> On Thu, Mar 14, 2013 at 14:30, Antoine Jacoutot wrote: >>> >>> > FYI I am seeing a somehow similar crash when using sysutils/bacula (both >>> > 5.2 and 5.3). >>> > It is 100% reproducible on my setup. Obviously painful since it means I >>> > cannot run backups anymore... >>> >>> The following is brought to you without testing or warranty. It did >>> compile at least once though. >> >> Awesome, thanks! I ran several batches of concurrent backups and I cannot >> reproduce the crash anymore :-) >> I'm going to run with that patch for the time being... if I spot any >> regression, I'll let you know. > > Couple fixes. In some error cases, there are early returns I didn't > notice before. Fixed diff below, though I don't think a correct > program should be affected. > > Alexey, sorry, I didn't get to your final diff before. It's very > similar to the diff below, so you were on the right track. One thing > that's different is you created unique special functions. Thanks Ted. Glad to see this diff back. I stopped pushing the diff because of zero feedback. If you don't mind, put some credit to Roman Kravchuk when you will commit, as he did most of work. I just pushed diff.
Re: Threads related SIGSEGV in random.c (diff, v2)
On Thu, Mar 14, 2013 at 12:48:52PM -0400, Ted Unangst wrote: > On Thu, Mar 14, 2013 at 17:24, Antoine Jacoutot wrote: > > On Thu, Mar 14, 2013 at 11:41:52AM -0400, Ted Unangst wrote: > >> On Thu, Mar 14, 2013 at 14:30, Antoine Jacoutot wrote: > >> > >> > FYI I am seeing a somehow similar crash when using sysutils/bacula (both > >> > 5.2 and 5.3). > >> > It is 100% reproducible on my setup. Obviously painful since it means I > >> > cannot run backups anymore... > >> > >> The following is brought to you without testing or warranty. It did > >> compile at least once though. > > > > Awesome, thanks! I ran several batches of concurrent backups and I cannot > > reproduce the crash anymore :-) > > I'm going to run with that patch for the time being... if I spot any > > regression, I'll let you know. > > Couple fixes. In some error cases, there are early returns I didn't > notice before. Fixed diff below, though I don't think a correct > program should be affected. Oki; diff applied so I'm running with this now. > Alexey, sorry, I didn't get to your final diff before. It's very > similar to the diff below, so you were on the right track. One thing > that's different is you created unique special functions. That's only > necessary for malloc because it can't call pthread code (which in turn > calls malloc, creating a cycle). I did lock srandomdev. If a thread > stalls in sysctl, that's just bad luck. > > Index: stdlib/random.c > === > RCS file: /cvs/src/lib/libc/stdlib/random.c,v > retrieving revision 1.17 > diff -u -p -r1.17 random.c > --- stdlib/random.c 1 Jun 2012 01:01:57 - 1.17 > +++ stdlib/random.c 14 Mar 2013 16:40:15 - > @@ -36,6 +36,8 @@ > #include > #include > > +#include "thread_private.h" > + > /* > * random.c: > * > @@ -174,6 +176,12 @@ static int rand_type = TYPE_3; > static int rand_deg = DEG_3; > static int rand_sep = SEP_3; > > +_THREAD_PRIVATE_MUTEX(random); > +static long random_l(void); > + > +#define LOCK() _THREAD_PRIVATE_MUTEX_LOCK(random) > +#define UNLOCK() _THREAD_PRIVATE_MUTEX_UNLOCK(random) > + > /* > * srandom: > * > @@ -186,8 +194,8 @@ static int rand_sep = SEP_3; > * introduced by the L.C.R.N.G. Note that the initialization of randtbl[] > * for default usage relies on values produced by this routine. > */ > -void > -srandom(unsigned int x) > +static void > +srandom_l(unsigned int x) > { > int i; > int32_t test; > @@ -213,10 +221,18 @@ srandom(unsigned int x) > fptr = &state[rand_sep]; > rptr = &state[0]; > for (i = 0; i < 10 * rand_deg; i++) > - (void)random(); > + (void)random_l(); > } > } > > +void > +srandom(unsigned int x) > +{ > + LOCK(); > + srandom_l(x); > + UNLOCK(); > +} > + > /* > * srandomdev: > * > @@ -234,6 +250,7 @@ srandomdev(void) > int mib[2]; > size_t len; > > + LOCK(); > if (rand_type == TYPE_0) > len = sizeof(state[0]); > else > @@ -247,6 +264,7 @@ srandomdev(void) > fptr = &state[rand_sep]; > rptr = &state[0]; > } > + UNLOCK(); > } > > /* > @@ -273,12 +291,15 @@ initstate(u_int seed, char *arg_state, s > { > char *ostate = (char *)(&state[-1]); > > + LOCK(); > if (rand_type == TYPE_0) > state[-1] = rand_type; > else > state[-1] = MAX_TYPES * (rptr - state) + rand_type; > - if (n < BREAK_0) > + if (n < BREAK_0) { > + UNLOCK(); > return(NULL); > + } > if (n < BREAK_1) { > rand_type = TYPE_0; > rand_deg = DEG_0; > @@ -302,11 +323,12 @@ initstate(u_int seed, char *arg_state, s > } > state = &(((int32_t *)arg_state)[1]); /* first location */ > end_ptr = &state[rand_deg]; /* must set end_ptr before srandom */ > - srandom(seed); > + srandom_l(seed); > if (rand_type == TYPE_0) > state[-1] = rand_type; > else > state[-1] = MAX_TYPES*(rptr - state) + rand_type; > + UNLOCK(); > return(ostate); > } > > @@ -333,6 +355,7 @@ setstate(char *arg_state) > int32_t rear = new_state[0] / MAX_TYPES; > char *ostate = (char *)(&state[-1]); > > + LOCK(); > if (rand_type == TYPE_0) > state[-1] = rand_type; > else > @@ -348,6 +371,7 @@ setstate(char *arg_state) > rand_sep = seps[type]; > break; > default: > + UNLOCK(); > return(NULL); > } > state = &new_state[1]; > @@ -356,6 +380,7 @@ setstate(char *arg_state) > fptr = &state[(rear + rand_sep) % rand_deg]; > } > end_ptr = &state[rand_deg]; /* set end_ptr too */ > + UNLOCK(); > return(ostate); > } > > @@ -376,8 +401,8 @@ setstate(char *arg_state) > * >
Re: Threads related SIGSEGV in random.c (diff, v2)
On Thu, Mar 14, 2013 at 17:24, Antoine Jacoutot wrote: > On Thu, Mar 14, 2013 at 11:41:52AM -0400, Ted Unangst wrote: >> On Thu, Mar 14, 2013 at 14:30, Antoine Jacoutot wrote: >> >> > FYI I am seeing a somehow similar crash when using sysutils/bacula (both >> > 5.2 and 5.3). >> > It is 100% reproducible on my setup. Obviously painful since it means I >> > cannot run backups anymore... >> >> The following is brought to you without testing or warranty. It did >> compile at least once though. > > Awesome, thanks! I ran several batches of concurrent backups and I cannot > reproduce the crash anymore :-) > I'm going to run with that patch for the time being... if I spot any > regression, I'll let you know. Couple fixes. In some error cases, there are early returns I didn't notice before. Fixed diff below, though I don't think a correct program should be affected. Alexey, sorry, I didn't get to your final diff before. It's very similar to the diff below, so you were on the right track. One thing that's different is you created unique special functions. That's only necessary for malloc because it can't call pthread code (which in turn calls malloc, creating a cycle). I did lock srandomdev. If a thread stalls in sysctl, that's just bad luck. Index: stdlib/random.c === RCS file: /cvs/src/lib/libc/stdlib/random.c,v retrieving revision 1.17 diff -u -p -r1.17 random.c --- stdlib/random.c 1 Jun 2012 01:01:57 - 1.17 +++ stdlib/random.c 14 Mar 2013 16:40:15 - @@ -36,6 +36,8 @@ #include #include +#include "thread_private.h" + /* * random.c: * @@ -174,6 +176,12 @@ static int rand_type = TYPE_3; static int rand_deg = DEG_3; static int rand_sep = SEP_3; +_THREAD_PRIVATE_MUTEX(random); +static long random_l(void); + +#define LOCK() _THREAD_PRIVATE_MUTEX_LOCK(random) +#define UNLOCK() _THREAD_PRIVATE_MUTEX_UNLOCK(random) + /* * srandom: * @@ -186,8 +194,8 @@ static int rand_sep = SEP_3; * introduced by the L.C.R.N.G. Note that the initialization of randtbl[] * for default usage relies on values produced by this routine. */ -void -srandom(unsigned int x) +static void +srandom_l(unsigned int x) { int i; int32_t test; @@ -213,10 +221,18 @@ srandom(unsigned int x) fptr = &state[rand_sep]; rptr = &state[0]; for (i = 0; i < 10 * rand_deg; i++) - (void)random(); + (void)random_l(); } } +void +srandom(unsigned int x) +{ + LOCK(); + srandom_l(x); + UNLOCK(); +} + /* * srandomdev: * @@ -234,6 +250,7 @@ srandomdev(void) int mib[2]; size_t len; + LOCK(); if (rand_type == TYPE_0) len = sizeof(state[0]); else @@ -247,6 +264,7 @@ srandomdev(void) fptr = &state[rand_sep]; rptr = &state[0]; } + UNLOCK(); } /* @@ -273,12 +291,15 @@ initstate(u_int seed, char *arg_state, s { char *ostate = (char *)(&state[-1]); + LOCK(); if (rand_type == TYPE_0) state[-1] = rand_type; else state[-1] = MAX_TYPES * (rptr - state) + rand_type; - if (n < BREAK_0) + if (n < BREAK_0) { + UNLOCK(); return(NULL); + } if (n < BREAK_1) { rand_type = TYPE_0; rand_deg = DEG_0; @@ -302,11 +323,12 @@ initstate(u_int seed, char *arg_state, s } state = &(((int32_t *)arg_state)[1]); /* first location */ end_ptr = &state[rand_deg]; /* must set end_ptr before srandom */ - srandom(seed); + srandom_l(seed); if (rand_type == TYPE_0) state[-1] = rand_type; else state[-1] = MAX_TYPES*(rptr - state) + rand_type; + UNLOCK(); return(ostate); } @@ -333,6 +355,7 @@ setstate(char *arg_state) int32_t rear = new_state[0] / MAX_TYPES; char *ostate = (char *)(&state[-1]); + LOCK(); if (rand_type == TYPE_0) state[-1] = rand_type; else @@ -348,6 +371,7 @@ setstate(char *arg_state) rand_sep = seps[type]; break; default: + UNLOCK(); return(NULL); } state = &new_state[1]; @@ -356,6 +380,7 @@ setstate(char *arg_state) fptr = &state[(rear + rand_sep) % rand_deg]; } end_ptr = &state[rand_deg]; /* set end_ptr too */ + UNLOCK(); return(ostate); } @@ -376,8 +401,8 @@ setstate(char *arg_state) * * Returns a 31-bit random number. */ -long -random(void) +static long +random_l(void) { int32_t i; @@ -393,4 +418,14 @@ random(void) rptr = state; } return((long)i); +} + +long +random(void) +{ + long r; + LOCK(); + r = random_l
Re: Threads related SIGSEGV in random.c (diff, v2)
On Thu, Mar 14, 2013 at 11:41:52AM -0400, Ted Unangst wrote: > On Thu, Mar 14, 2013 at 14:30, Antoine Jacoutot wrote: > > > FYI I am seeing a somehow similar crash when using sysutils/bacula (both > > 5.2 and 5.3). > > It is 100% reproducible on my setup. Obviously painful since it means I > > cannot run backups anymore... > > The following is brought to you without testing or warranty. It did > compile at least once though. Awesome, thanks! I ran several batches of concurrent backups and I cannot reproduce the crash anymore :-) I'm going to run with that patch for the time being... if I spot any regression, I'll let you know. > Index: stdlib/random.c > === > RCS file: /cvs/src/lib/libc/stdlib/random.c,v > retrieving revision 1.17 > diff -u -p -r1.17 random.c > --- stdlib/random.c 1 Jun 2012 01:01:57 - 1.17 > +++ stdlib/random.c 14 Mar 2013 15:37:14 - > @@ -36,6 +36,8 @@ > #include > #include > > +#include "thread_private.h" > + > /* > * random.c: > * > @@ -174,6 +176,12 @@ static int rand_type = TYPE_3; > static int rand_deg = DEG_3; > static int rand_sep = SEP_3; > > +_THREAD_PRIVATE_MUTEX(random); > +static long random_l(void); > + > +#define LOCK() _THREAD_PRIVATE_MUTEX_LOCK(random) > +#define UNLOCK() _THREAD_PRIVATE_MUTEX_UNLOCK(random) > + > /* > * srandom: > * > @@ -186,8 +194,8 @@ static int rand_sep = SEP_3; > * introduced by the L.C.R.N.G. Note that the initialization of randtbl[] > * for default usage relies on values produced by this routine. > */ > -void > -srandom(unsigned int x) > +static void > +srandom_l(unsigned int x) > { > int i; > int32_t test; > @@ -213,10 +221,18 @@ srandom(unsigned int x) > fptr = &state[rand_sep]; > rptr = &state[0]; > for (i = 0; i < 10 * rand_deg; i++) > - (void)random(); > + (void)random_l(); > } > } > > +void > +srandom(unsigned int x) > +{ > + LOCK(); > + srandom_l(x); > + UNLOCK(); > +} > + > /* > * srandomdev: > * > @@ -234,6 +250,7 @@ srandomdev(void) > int mib[2]; > size_t len; > > + LOCK(); > if (rand_type == TYPE_0) > len = sizeof(state[0]); > else > @@ -247,6 +264,7 @@ srandomdev(void) > fptr = &state[rand_sep]; > rptr = &state[0]; > } > + UNLOCK(); > } > > /* > @@ -273,6 +291,7 @@ initstate(u_int seed, char *arg_state, s > { > char *ostate = (char *)(&state[-1]); > > + LOCK(); > if (rand_type == TYPE_0) > state[-1] = rand_type; > else > @@ -302,11 +321,12 @@ initstate(u_int seed, char *arg_state, s > } > state = &(((int32_t *)arg_state)[1]); /* first location */ > end_ptr = &state[rand_deg]; /* must set end_ptr before srandom */ > - srandom(seed); > + srandom_l(seed); > if (rand_type == TYPE_0) > state[-1] = rand_type; > else > state[-1] = MAX_TYPES*(rptr - state) + rand_type; > + UNLOCK(); > return(ostate); > } > > @@ -333,6 +353,7 @@ setstate(char *arg_state) > int32_t rear = new_state[0] / MAX_TYPES; > char *ostate = (char *)(&state[-1]); > > + LOCK(); > if (rand_type == TYPE_0) > state[-1] = rand_type; > else > @@ -356,6 +377,7 @@ setstate(char *arg_state) > fptr = &state[(rear + rand_sep) % rand_deg]; > } > end_ptr = &state[rand_deg]; /* set end_ptr too */ > + UNLOCK(); > return(ostate); > } > > @@ -376,8 +398,8 @@ setstate(char *arg_state) > * > * Returns a 31-bit random number. > */ > -long > -random(void) > +static long > +random_l(void) > { > int32_t i; > > @@ -393,4 +415,14 @@ random(void) > rptr = state; > } > return((long)i); > +} > + > +long > +random(void) > +{ > + long r; > + LOCK(); > + r = random_l(); > + UNLOCK(); > + return r; > } -- Antoine
Re: Threads related SIGSEGV in random.c (diff, v2)
On Thu, Mar 14, 2013 at 14:30, Antoine Jacoutot wrote: > FYI I am seeing a somehow similar crash when using sysutils/bacula (both > 5.2 and 5.3). > It is 100% reproducible on my setup. Obviously painful since it means I > cannot run backups anymore... The following is brought to you without testing or warranty. It did compile at least once though. Index: stdlib/random.c === RCS file: /cvs/src/lib/libc/stdlib/random.c,v retrieving revision 1.17 diff -u -p -r1.17 random.c --- stdlib/random.c 1 Jun 2012 01:01:57 - 1.17 +++ stdlib/random.c 14 Mar 2013 15:37:14 - @@ -36,6 +36,8 @@ #include #include +#include "thread_private.h" + /* * random.c: * @@ -174,6 +176,12 @@ static int rand_type = TYPE_3; static int rand_deg = DEG_3; static int rand_sep = SEP_3; +_THREAD_PRIVATE_MUTEX(random); +static long random_l(void); + +#define LOCK() _THREAD_PRIVATE_MUTEX_LOCK(random) +#define UNLOCK() _THREAD_PRIVATE_MUTEX_UNLOCK(random) + /* * srandom: * @@ -186,8 +194,8 @@ static int rand_sep = SEP_3; * introduced by the L.C.R.N.G. Note that the initialization of randtbl[] * for default usage relies on values produced by this routine. */ -void -srandom(unsigned int x) +static void +srandom_l(unsigned int x) { int i; int32_t test; @@ -213,10 +221,18 @@ srandom(unsigned int x) fptr = &state[rand_sep]; rptr = &state[0]; for (i = 0; i < 10 * rand_deg; i++) - (void)random(); + (void)random_l(); } } +void +srandom(unsigned int x) +{ + LOCK(); + srandom_l(x); + UNLOCK(); +} + /* * srandomdev: * @@ -234,6 +250,7 @@ srandomdev(void) int mib[2]; size_t len; + LOCK(); if (rand_type == TYPE_0) len = sizeof(state[0]); else @@ -247,6 +264,7 @@ srandomdev(void) fptr = &state[rand_sep]; rptr = &state[0]; } + UNLOCK(); } /* @@ -273,6 +291,7 @@ initstate(u_int seed, char *arg_state, s { char *ostate = (char *)(&state[-1]); + LOCK(); if (rand_type == TYPE_0) state[-1] = rand_type; else @@ -302,11 +321,12 @@ initstate(u_int seed, char *arg_state, s } state = &(((int32_t *)arg_state)[1]); /* first location */ end_ptr = &state[rand_deg]; /* must set end_ptr before srandom */ - srandom(seed); + srandom_l(seed); if (rand_type == TYPE_0) state[-1] = rand_type; else state[-1] = MAX_TYPES*(rptr - state) + rand_type; + UNLOCK(); return(ostate); } @@ -333,6 +353,7 @@ setstate(char *arg_state) int32_t rear = new_state[0] / MAX_TYPES; char *ostate = (char *)(&state[-1]); + LOCK(); if (rand_type == TYPE_0) state[-1] = rand_type; else @@ -356,6 +377,7 @@ setstate(char *arg_state) fptr = &state[(rear + rand_sep) % rand_deg]; } end_ptr = &state[rand_deg]; /* set end_ptr too */ + UNLOCK(); return(ostate); } @@ -376,8 +398,8 @@ setstate(char *arg_state) * * Returns a 31-bit random number. */ -long -random(void) +static long +random_l(void) { int32_t i; @@ -393,4 +415,14 @@ random(void) rptr = state; } return((long)i); +} + +long +random(void) +{ + long r; + LOCK(); + r = random_l(); + UNLOCK(); + return r; }
Re: Threads related SIGSEGV in random.c (diff, v2)
On Thu, Sep 27, 2012 at 12:43:24PM +0300, Alexey Suslikov wrote: > Program received signal SIGSEGV, Segmentation fault. > [Switching to thread 1006387] > 0x0cb33345cf6e in random () at /usr/src/lib/libc/stdlib/random.c:387 > 387 *fptr += *rptr; > > Back trace: > > Thread 10 (thread 1003160): > #0 0x0cb33344135a in _thread_sys___thrsleep () at :2 > #1 0x0cb3315fac2a in pthread_cond_wait (condp=0xcb32a79c4b0, > mutexp=Variable "mutexp" is not available. > ) at /usr/src/lib/librthread/rthread_sync.c:500 > #2 0x0cb129f836ba in gwlist_consume () from /usr/local/sbin/bearerbox > #3 0x0cb129f121f1 in boxc_sender () from /usr/local/sbin/bearerbox > #4 0x0cb129f828dd in new_thread () from /usr/local/sbin/bearerbox > #5 0x0cb3315f911e in _rthread_start (v=Variable "v" is not available. > ) at /usr/src/lib/librthread/rthread.c:122 > #6 0x0cb333434f9b in __tfork_thread () at > /usr/src/lib/libc/arch/amd64/sys/tfork_thread.S:75 > Cannot access memory at address 0xcb32b27c000 > 0x0cb33345cf6e 387 *fptr += *rptr; FYI I am seeing a somehow similar crash when using sysutils/bacula (both 5.2 and 5.3). It is 100% reproducible on my setup. Obviously painful since it means I cannot run backups anymore... Here's an e.g. with a test setup : Program received signal SIGSEGV, Segmentation fault. [Switching to thread 1001179] 0x0e5f89ed in random () at /usr/src/lib/libc/stdlib/random.c:387 387 *fptr += *rptr; (gdb) (gdb) bt full #0 0x0e5f89ed in random () at /usr/src/lib/libc/stdlib/random.c:387 i = Variable "i" is not available. (gdb) bt #0 0x0e5f89ed in random () at /usr/src/lib/libc/stdlib/random.c:387 #1 0x0e5f8bba in srandom (x=2239411) at /usr/src/lib/libc/stdlib/random.c:216 #2 0x09d8890f in cram_md5_challenge (bs=0x844d5418, password=0x8b00e398 "8f1fd8bec27ecfade7d33e0442f110f9", tls_local_need=2, compatible=1) at cram-md5.c:65 #3 0x1c007f2b in authenticate (rcode=1001, bs=0x844d5418, jcr=0x7fcaa018) at authenticate.c:126 #4 0x1c0083c3 in authenticate_director (jcr=0x7fcaa018) at authenticate.c:206 #5 0x1c01989f in hello_cmd (jcr=0x7fcaa018) at job.c:437 #6 0x1c019ddb in handle_client_request (dirp=0x844d5418) at job.c:287 #7 0x09db3182 in workq_server (arg=0x3c00adc0) at workq.c:344 #8 0x09dc110e in _rthread_start (v=0x844d5000) at /usr/src/lib/librthread/rthread.c:122 #9 0x0e603272 in __tfork_thread () at /usr/src/lib/libc/arch/i386/sys/tfork_thread.S:95 -- Antoine
Re: Threads related SIGSEGV in random.c (diff, v2)
On Thu, 27 Sep 2012, Alexey Suslikov wrote: > On Thursday, September 27, 2012, Alexey Suslikov wrote: > > On Thursday, September 27, 2012, Philip Guenther wrote: > >> On Thu, 27 Sep 2012, Alexey Suslikov wrote: > >> > Removing only local variables part reverts us to previous behavior > >> > (i.e. crashes). > >> > >> My guess is your program is calling srandom(), srandomdev(), > >> initstate() or setstate() as well. Your diff doesn't protect the > >> alteration of state, end_ptr, fptr, and rptr on those paths, so a > >> call to initstate() while another thread is in random() can walk fptr > >> and/or rptr out of the state array. Add the necessary locking in > >> them and run your tests again. Have you done this? You based your patch on changes made to some other BSD (NetBSD, IIRC): have you put in place all the locking calls that they did? If not, why are wasting everyone's time by ignoring their work? > >> > I'm starting to believe that static globals are not good. > >> > >> They are incredibly good at what they do. If you're trying to say > >> that they fundamentally can't be thread-safe, you'll need some > >> extraordinary evidence for such a claim. > >> > > What good they do? Static globals provide low overhead shared storage that doesn't pollute the global namespace. > Philip, can you help us to write threaded test case (spawning a number > of threads each calling random)? Sorry, this isn't near the top of my TODO list, and I've already made my suggestions on what you should be trying. Philip Guenther
Threads related SIGSEGV in random.c (diff, v2)
On Thursday, September 27, 2012, Alexey Suslikov wrote: > On Thursday, September 27, 2012, Philip Guenther wrote: > >> On Thu, 27 Sep 2012, Alexey Suslikov wrote: >> > Removing only local variables part reverts us to previous behavior (i.e. >> > crashes). >> >> My guess is your program is calling srandom(), srandomdev(), initstate() >> or setstate() as well. Your diff doesn't protect the alteration of state, >> end_ptr, fptr, and rptr on those paths, so a call to initstate() while >> another thread is in random() can walk fptr and/or rptr out of the state >> array. Add the necessary locking in them and run your tests again. >> >> If not, well, crank up your debugging skills. What was the line of code >> that actually triggered the crash? Where did the bogus pointer come from? >> >> > Crash: > > Program received signal SIGSEGV, Segmentation fault. > [Switching to thread 1006387] > 0x0cb33345cf6e in random () at /usr/src/lib/libc/stdlib/random.c:387 > 387 *fptr += *rptr; > > Back trace: > > Thread 10 (thread 1003160): > #0 0x0cb33344135a in _thread_sys___thrsleep () at :2 > #1 0x0cb3315fac2a in pthread_cond_wait (condp=0xcb32a79c4b0, > mutexp=Variable "mutexp" is not available. > ) at /usr/src/lib/librthread/rthread_sync.c:500 > #2 0x0cb129f836ba in gwlist_consume () from /usr/local/sbin/bearerbox > #3 0x0cb129f121f1 in boxc_sender () from /usr/local/sbin/bearerbox > #4 0x0cb129f828dd in new_thread () from /usr/local/sbin/bearerbox > #5 0x0cb3315f911e in _rthread_start (v=Variable "v" is not available. > ) at /usr/src/lib/librthread/rthread.c:122 > #6 0x0cb333434f9b in __tfork_thread () at > /usr/src/lib/libc/arch/amd64/sys/tfork_thread.S:75 > Cannot access memory at address 0xcb32b27c000 > 0x0cb33345cf6e 387 *fptr += *rptr; > > >> >> > I'm starting to believe that static globals are not good. >> >> They are incredibly good at what they do. If you're trying to say that >> they fundamentally can't be thread-safe, you'll need some extraordinary >> evidence for such a claim. >> >> > What good they do? > Philip, can you help us to write threaded test case (spawning a number of threads each calling random)?
Re: Threads related SIGSEGV in random.c (diff, v2)
On Thursday, September 27, 2012, Philip Guenther wrote: > On Thu, 27 Sep 2012, Alexey Suslikov wrote: > > Removing only local variables part reverts us to previous behavior (i.e. > > crashes). > > My guess is your program is calling srandom(), srandomdev(), initstate() > or setstate() as well. Your diff doesn't protect the alteration of state, > end_ptr, fptr, and rptr on those paths, so a call to initstate() while > another thread is in random() can walk fptr and/or rptr out of the state > array. Add the necessary locking in them and run your tests again. > > If not, well, crank up your debugging skills. What was the line of code > that actually triggered the crash? Where did the bogus pointer come from? > > Crash: Program received signal SIGSEGV, Segmentation fault. [Switching to thread 1006387] 0x0cb33345cf6e in random () at /usr/src/lib/libc/stdlib/random.c:387 387 *fptr += *rptr; Back trace: Thread 10 (thread 1003160): #0 0x0cb33344135a in _thread_sys___thrsleep () at :2 #1 0x0cb3315fac2a in pthread_cond_wait (condp=0xcb32a79c4b0, mutexp=Variable "mutexp" is not available. ) at /usr/src/lib/librthread/rthread_sync.c:500 #2 0x0cb129f836ba in gwlist_consume () from /usr/local/sbin/bearerbox #3 0x0cb129f121f1 in boxc_sender () from /usr/local/sbin/bearerbox #4 0x0cb129f828dd in new_thread () from /usr/local/sbin/bearerbox #5 0x0cb3315f911e in _rthread_start (v=Variable "v" is not available. ) at /usr/src/lib/librthread/rthread.c:122 #6 0x0cb333434f9b in __tfork_thread () at /usr/src/lib/libc/arch/amd64/sys/tfork_thread.S:75 Cannot access memory at address 0xcb32b27c000 0x0cb33345cf6e 387 *fptr += *rptr; > > > I'm starting to believe that static globals are not good. > > They are incredibly good at what they do. If you're trying to say that > they fundamentally can't be thread-safe, you'll need some extraordinary > evidence for such a claim. > > What good they do? Cheers, Alexey
Re: Threads related SIGSEGV in random.c (diff, v2)
On Thu, 27 Sep 2012, Alexey Suslikov wrote: > Removing only local variables part reverts us to previous behavior (i.e. > crashes). My guess is your program is calling srandom(), srandomdev(), initstate() or setstate() as well. Your diff doesn't protect the alteration of state, end_ptr, fptr, and rptr on those paths, so a call to initstate() while another thread is in random() can walk fptr and/or rptr out of the state array. Add the necessary locking in them and run your tests again. If not, well, crank up your debugging skills. What was the line of code that actually triggered the crash? Where did the bogus pointer come from? > I'm starting to believe that static globals are not good. They are incredibly good at what they do. If you're trying to say that they fundamentally can't be thread-safe, you'll need some extraordinary evidence for such a claim. Philip Guenther
Re: Threads related SIGSEGV in random.c (diff, v2)
On Thu, Sep 27, 2012 at 00:18, Alexey Suslikov wrote: > On Wed, Sep 26, 2012 at 9:51 PM, Ted Unangst wrote: >> On Wed, Sep 26, 2012 at 11:18, Alexey Suslikov wrote: >>> Hi. >>> >>> Any news on that? >> >> Can we do it without the local variables for speed part? I am not >> interested in making this function faster. >> > > Removing only local variables part reverts us to previous > behavior (i.e. crashes). > > However, leaving current code as is but adding only local > variables (see below) passes our test with no crashes. If that's the case, then the locking wasn't done correctly. Switching to locals just means you're going to get weird results. > I'm starting to believe that static globals are not good. Well, no, but they're what we're stuck with. As for Kannel, patching it to use rand_r may be a better solution. That's the official "bad random number generator for threads" in posix.
Re: Threads related SIGSEGV in random.c (diff, v2)
On Wed, Sep 26, 2012 at 9:51 PM, Ted Unangst wrote: > On Wed, Sep 26, 2012 at 11:18, Alexey Suslikov wrote: >> Hi. >> >> Any news on that? > > Can we do it without the local variables for speed part? I am not > interested in making this function faster. > Removing only local variables part reverts us to previous behavior (i.e. crashes). However, leaving current code as is but adding only local variables (see below) passes our test with no crashes. I'm starting to believe that static globals are not good. Can somebody help us with writing threaded test case? As I mentioned above, we use Kannel port as a test which is somewhat hard to share. Alexey Index: lib/libc/stdlib/random.c === RCS file: /cvs/src/lib/libc/stdlib/random.c,v retrieving revision 1.17 diff -u -p -r1.17 random.c --- lib/libc/stdlib/random.c1 Jun 2012 01:01:57 - 1.17 +++ lib/libc/stdlib/random.c26 Sep 2012 20:30:46 - @@ -380,17 +380,20 @@ long random(void) { int32_t i; + int32_t *f, *r; if (rand_type == TYPE_0) i = state[0] = (state[0] * 1103515245 + 12345) & 0x7fff; else { - *fptr += *rptr; - i = (*fptr >> 1) & 0x7fff; /* chucking least random bit */ - if (++fptr >= end_ptr) { - fptr = state; - ++rptr; - } else if (++rptr >= end_ptr) - rptr = state; + f = fptr; r = rptr; + *f += *r; + i = (*f >> 1) & 0x7fff; /* chucking least random bit */ + if (++f >= end_ptr) { + f = state; + ++r; + } else if (++r >= end_ptr) + r = state; + fptr = f; rptr = r; } return((long)i); }
Re: Threads related SIGSEGV in random.c (diff, v2)
On Wed, Sep 26, 2012 at 11:18, Alexey Suslikov wrote: > Hi. > > Any news on that? Can we do it without the local variables for speed part? I am not interested in making this function faster.
Re: Threads related SIGSEGV in random.c (diff, v2)
Hi. Any news on that? On Friday, September 21, 2012, Alexey Suslikov wrote: > On Fri, Sep 21, 2012 at 10:36 AM, Alexey Suslikov > > wrote: > > On Wed, Sep 19, 2012 at 10:24 PM, Ted Unangst > > > > wrote: > >> On Wed, Sep 19, 2012 at 18:50, Alexey Suslikov wrote: > >>> On Wednesday, September 19, 2012, Theo de Raadt wrote: > >>> > > arc4random() is also thread-safe (it has interal locking) and very > > desirable for other reasons. But no way to save state. > > The last part of this is intentional. Saving the state of pseudo > random number generators is a stupid concept from the 80's. > > >>> > >>> I see many rng functions behaving very differently. Is it a good idea > >>> to create a common locking layer on top of need-to-be-safe rng > >>> functions? Or we should deal only with original problem (and only > >>> port random.c code from netbsd)? > >> > >> just slap a mutex around it. > > > > With the diff below Kannel no longer crashes. Only protecting random() > > for now. > > > > Make random() thread-safe by surrounding real call with a mutex locking. > > Found by and diff from Roman Kravchuk. Mainly from NetBSD. > > Sorry. Here is correct diff. > > We kinda unsure about the approach. For now, we follow arc4random pattern. > Should we use generic _thread_mutex_lock/_thread_mutex_unlock instead? > > Index: lib/libc/include/thread_private.h > === > RCS file: /cvs/src/lib/libc/include/thread_private.h,v > retrieving revision 1.25 > diff -u -p -r1.25 thread_private.h > --- lib/libc/include/thread_private.h 16 Oct 2011 06:29:56 - > 1.25 > +++ lib/libc/include/thread_private.h 21 Sep 2012 07:59:34 - > @@ -172,4 +172,16 @@ void _thread_arc4_unlock(void); > _thread_arc4_unlock();\ > } while (0) > > +void _thread_random_lock(void); > +void _thread_random_unlock(void); > + > +#define _RANDOM_LOCK() do {\ > + if (__isthreaded) \ > + _thread_random_lock(); \ > + } while (0) > +#define _RANDOM_UNLOCK() do {\ > + if (__isthreaded) \ > + _thread_random_unlock();\ > + } while (0) > + > #endif /* _THREAD_PRIVATE_H_ */ > Index: lib/libc/stdlib/random.c > === > RCS file: /cvs/src/lib/libc/stdlib/random.c,v > retrieving revision 1.17 > diff -u -p -r1.17 random.c > --- lib/libc/stdlib/random.c1 Jun 2012 01:01:57 - 1.17 > +++ lib/libc/stdlib/random.c21 Sep 2012 07:59:35 - > @@ -35,6 +35,7 @@ > #include > #include > #include > +#include "thread_private.h" > > /* > * random.c: > @@ -376,21 +377,38 @@ setstate(char *arg_state) > * > * Returns a 31-bit random number. > */ > -long > -random(void) > +static long > +random_unlocked(void) > { > int32_t i; > + int32_t *f, *r; > > if (rand_type == TYPE_0) > i = state[0] = (state[0] * 1103515245 + 12345) & > 0x7fff; > else { > - *fptr += *rptr; > - i = (*fptr >> 1) & 0x7fff; /* chucking least random > bit */ > - if (++fptr >= end_ptr) { > - fptr = state; > - ++rptr; > - } else if (++rptr >= end_ptr) > - rptr = state; > + /* > +* Use local variables rather than static variables for > speed. > +*/ > + f = fptr; r = rptr; > + *f += *r; > + i = (*f >> 1) & 0x7fff; /* chucking least random > bit */ > + if (++f >= end_ptr) { > + f = state; > + ++r; > + } else if (++r >= end_ptr) > + r = state; > + fptr = f; rptr = r; > } > return((long)i); > +} > + > +long > +random(void) > +{ > + long r; > + > + _RANDOM_LOCK(); > + r = random_unlocked(); > + _RANDOM_UNLOCK(); > + return (r); > } > Index: lib/libc/thread/unithread_malloc_lock.c > === > RCS file: /cvs/src/lib/libc/thread/unithread_malloc_lock.c,v > retrieving revision 1.8 > diff -u -p -r1.8 unithread_malloc_lock.c > --- lib/libc/thread/unithread_malloc_lock.c 13 Jun 2008 21:18:43 - > 1.8 > +++ lib/libc/thread/unithread_malloc_lock.c 21 Sep 2012 07:59:35 - > @@ -21,6 +21,12 @@ WEAK_PROTOTYPE(_thread_arc4_unlock); > WEAK_ALIAS(_thread_arc4_lock); > WEAK_ALIAS(_thread_arc4_unlock); > > +WEAK_PROTOTYPE(_thread
Threads related SIGSEGV in random.c (diff, v2)
On Fri, Sep 21, 2012 at 10:36 AM, Alexey Suslikov wrote: > On Wed, Sep 19, 2012 at 10:24 PM, Ted Unangst wrote: >> On Wed, Sep 19, 2012 at 18:50, Alexey Suslikov wrote: >>> On Wednesday, September 19, 2012, Theo de Raadt wrote: >>> > arc4random() is also thread-safe (it has interal locking) and very > desirable for other reasons. But no way to save state. The last part of this is intentional. Saving the state of pseudo random number generators is a stupid concept from the 80's. >>> >>> I see many rng functions behaving very differently. Is it a good idea >>> to create a common locking layer on top of need-to-be-safe rng >>> functions? Or we should deal only with original problem (and only >>> port random.c code from netbsd)? >> >> just slap a mutex around it. > > With the diff below Kannel no longer crashes. Only protecting random() > for now. > > Make random() thread-safe by surrounding real call with a mutex locking. > Found by and diff from Roman Kravchuk. Mainly from NetBSD. Sorry. Here is correct diff. We kinda unsure about the approach. For now, we follow arc4random pattern. Should we use generic _thread_mutex_lock/_thread_mutex_unlock instead? Index: lib/libc/include/thread_private.h === RCS file: /cvs/src/lib/libc/include/thread_private.h,v retrieving revision 1.25 diff -u -p -r1.25 thread_private.h --- lib/libc/include/thread_private.h 16 Oct 2011 06:29:56 - 1.25 +++ lib/libc/include/thread_private.h 21 Sep 2012 07:59:34 - @@ -172,4 +172,16 @@ void _thread_arc4_unlock(void); _thread_arc4_unlock();\ } while (0) +void _thread_random_lock(void); +void _thread_random_unlock(void); + +#define _RANDOM_LOCK() do {\ + if (__isthreaded) \ + _thread_random_lock(); \ + } while (0) +#define _RANDOM_UNLOCK() do {\ + if (__isthreaded) \ + _thread_random_unlock();\ + } while (0) + #endif /* _THREAD_PRIVATE_H_ */ Index: lib/libc/stdlib/random.c === RCS file: /cvs/src/lib/libc/stdlib/random.c,v retrieving revision 1.17 diff -u -p -r1.17 random.c --- lib/libc/stdlib/random.c1 Jun 2012 01:01:57 - 1.17 +++ lib/libc/stdlib/random.c21 Sep 2012 07:59:35 - @@ -35,6 +35,7 @@ #include #include #include +#include "thread_private.h" /* * random.c: @@ -376,21 +377,38 @@ setstate(char *arg_state) * * Returns a 31-bit random number. */ -long -random(void) +static long +random_unlocked(void) { int32_t i; + int32_t *f, *r; if (rand_type == TYPE_0) i = state[0] = (state[0] * 1103515245 + 12345) & 0x7fff; else { - *fptr += *rptr; - i = (*fptr >> 1) & 0x7fff; /* chucking least random bit */ - if (++fptr >= end_ptr) { - fptr = state; - ++rptr; - } else if (++rptr >= end_ptr) - rptr = state; + /* +* Use local variables rather than static variables for speed. +*/ + f = fptr; r = rptr; + *f += *r; + i = (*f >> 1) & 0x7fff; /* chucking least random bit */ + if (++f >= end_ptr) { + f = state; + ++r; + } else if (++r >= end_ptr) + r = state; + fptr = f; rptr = r; } return((long)i); +} + +long +random(void) +{ + long r; + + _RANDOM_LOCK(); + r = random_unlocked(); + _RANDOM_UNLOCK(); + return (r); } Index: lib/libc/thread/unithread_malloc_lock.c === RCS file: /cvs/src/lib/libc/thread/unithread_malloc_lock.c,v retrieving revision 1.8 diff -u -p -r1.8 unithread_malloc_lock.c --- lib/libc/thread/unithread_malloc_lock.c 13 Jun 2008 21:18:43 - 1.8 +++ lib/libc/thread/unithread_malloc_lock.c 21 Sep 2012 07:59:35 - @@ -21,6 +21,12 @@ WEAK_PROTOTYPE(_thread_arc4_unlock); WEAK_ALIAS(_thread_arc4_lock); WEAK_ALIAS(_thread_arc4_unlock); +WEAK_PROTOTYPE(_thread_random_lock); +WEAK_PROTOTYPE(_thread_random_unlock); + +WEAK_ALIAS(_thread_random_lock); +WEAK_ALIAS(_thread_random_unlock); + void WEAK_NAME(_thread_malloc_lock)(void) { @@ -53,6 +59,18 @@ WEAK_NAME(_thread_arc4_lock)(void) void WEAK_NAME(_thread_arc4_unlock)(void) +{ + return; +} + +void +WEAK_NAME(_thread_random_lock)(void) +{ +