New submission from Ján Stanček: Following crash is sporadically observed in RHEL7 anaconda:
(gdb) f 0 #0 PyThread_ReInitTLS () at /usr/src/debug/Python-2.7.5/Python/thread.c:411 411 if (p->id != id) { (gdb) list 406 keymutex = PyThread_allocate_lock(); 407 408 /* Delete all keys which do not match the current thread id */ 409 q = &keyhead; 410 while ((p = *q) != NULL) { 411 if (p->id != id) { 412 *q = p->next; 413 free((void *)p); 414 /* NB This does *not* free p->value! */ 415 } (gdb) p p $1 = (struct key *) 0x3333333333333333 (gdb) p keyhead $2 = (struct key *) 0x3333333333333333 key list is protected by keymutex (except for PyThread_ReInitTLS), but there doesn't seem to be any protection against concurrent fork(). What seems to happen is fork() at the moment when key list is not consistent. For example, if I initialize new key to 0xfe: static struct key *find_key(int key, void *value) // find_key with extra memset() ... p = (struct key *)malloc(sizeof(struct key)); memset(p, 0xfe, sizeof(struct key)); if (p != NULL) { p->id = id; p->key = key; p->value = value; p->next = keyhead; keyhead = p; } ... Looking at disassembly, compiler reordered last 2 writes: 0x00000000004fcb50 <+272>: callq 0x413d10 <malloc@plt> 0x00000000004fcb55 <+277>: movabs $0xfefefefefefefefe,%rcx 0x00000000004fcb5f <+287>: test %rax,%rax 0x00000000004fcb62 <+290>: mov %rcx,(%rax) 0x00000000004fcb65 <+293>: mov %rcx,0x8(%rax) 0x00000000004fcb69 <+297>: mov %rcx,0x10(%rax) 0x00000000004fcb6d <+301>: mov %rcx,0x18(%rax) 0x00000000004fcb71 <+305>: je 0x4fcaff <PyThread_set_key_value+191> 0x00000000004fcb73 <+307>: mov 0x2f1e26(%rip),%rdx # 0x7ee9a0 <keyhead> 0x00000000004fcb7a <+314>: mov 0x2f1dff(%rip),%rdi # 0x7ee980 <keymutex> 0x00000000004fcb81 <+321>: xor %r14d,%r14d 0x00000000004fcb84 <+324>: mov %rbp,0x8(%rax) 0x00000000004fcb88 <+328>: mov %r12d,0x10(%rax) 0x00000000004fcb8c <+332>: mov %r13,0x18(%rax) 0x00000000004fcb90 <+336>: mov %rax,0x2f1e09(%rip) # 0x7ee9a0 <keyhead> 0x00000000004fcb97 <+343>: mov %rdx,(%rax) Now consider what happens, when different threads call fork() in between these 2 writes: we updated keyhead, but keyhead->next has not been updated yet. Now when anaconda hangs, I get: (gdb) list 407 keymutex = PyThread_allocate_lock(); 408 409 /* Delete all keys which do not match the current thread id */ 410 q = &keyhead; 411 while ((p = *q) != NULL) { 412 if (p->id != id) { 413 *q = p->next; 414 free((void *)p); 415 /* NB This does *not* free p->value! */ 416 } (gdb) p p $1 = (struct key *) 0xfefefefefefefefe (gdb) p keyhead $2 = (struct key *) 0xfefefefefefefefe Here's how I think we get into this state: -------------------------> thread 1 # has GIL Thread.start _start_new_thread(self.__bootstrap, ()) PyThread_start_new_thread(t_bootstrap) # spawns thread 3 -------------------------> thread 2 ... # waiting for GIL -------------------------> thread 3 t_bootstrap _PyThreadState_Init # does not have GIL yet at this point _PyGILState_NoteThreadState PyThread_set_key_value(autoTLSkey, (void *)tstate) find_key # key list is temporarily not consistent # due to compiler reordering couple writes in find_key -------------------------> thread 1 continuing Thread.start self.__started.wait() Event.wait() self.__cond.wait Condition.wait() waiter = _allocate_lock() waiter.acquire() lock_PyThread_acquire_lock Py_BEGIN_ALLOW_THREADS PyEval_SaveThread PyThread_release_lock(interpreter_lock); -------------------------> thread 2 ... # acquired GIL os.fork() # forks inconsistent list -------------------------> child PyOS_AfterFork() PyThread_ReInitTLS() SIGSEGV Attached patch for python makes it easier to reproduce, by adding delays to couple places to make window key list is not consistent larger. ---------- components: Interpreter Core files: bz1268226_reproducer2.tar.gz messages: 288510 nosy: Ján Stanček priority: normal severity: normal status: open title: _PyThreadState_Init and fork race leads to inconsistent key list type: crash versions: Python 2.7 Added file: http://bugs.python.org/file46666/bz1268226_reproducer2.tar.gz _______________________________________ Python tracker <rep...@bugs.python.org> <http://bugs.python.org/issue29640> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com