New submission from Ján Stanček:

Following crash is sporadically observed in RHEL7 anaconda:

(gdb) f 0
#0  PyThread_ReInitTLS () at /usr/src/debug/Python-2.7.5/Python/thread.c:411
411             if (p->id != id) {
(gdb) list
406         keymutex = PyThread_allocate_lock();
407
408         /* Delete all keys which do not match the current thread id */
409         q = &keyhead;
410         while ((p = *q) != NULL) {
411             if (p->id != id) {
412                 *q = p->next;
413                 free((void *)p);
414                 /* NB This does *not* free p->value! */
415             }
(gdb) p p
$1 = (struct key *) 0x3333333333333333
(gdb) p keyhead
$2 = (struct key *) 0x3333333333333333

key list is protected by keymutex (except for PyThread_ReInitTLS), but there 
doesn't seem to be any protection against concurrent fork(). What seems to 
happen is fork() at the moment when key list is not consistent.

For example, if I initialize new key to 0xfe:

static struct key *find_key(int key, void *value)
    // find_key with extra memset()
    ...
    p = (struct key *)malloc(sizeof(struct key));
    memset(p, 0xfe, sizeof(struct key));
    if (p != NULL) {
        p->id = id;
        p->key = key;
        p->value = value;
        p->next = keyhead;
        keyhead = p;
    }
    ...

Looking at disassembly, compiler reordered last 2 writes:

   0x00000000004fcb50 <+272>:   callq  0x413d10 <malloc@plt>
   0x00000000004fcb55 <+277>:   movabs $0xfefefefefefefefe,%rcx
   0x00000000004fcb5f <+287>:   test   %rax,%rax
   0x00000000004fcb62 <+290>:   mov    %rcx,(%rax)
   0x00000000004fcb65 <+293>:   mov    %rcx,0x8(%rax)
   0x00000000004fcb69 <+297>:   mov    %rcx,0x10(%rax)
   0x00000000004fcb6d <+301>:   mov    %rcx,0x18(%rax)
   0x00000000004fcb71 <+305>:   je     0x4fcaff <PyThread_set_key_value+191>
   0x00000000004fcb73 <+307>:   mov    0x2f1e26(%rip),%rdx        # 0x7ee9a0 
<keyhead>
   0x00000000004fcb7a <+314>:   mov    0x2f1dff(%rip),%rdi        # 0x7ee980 
<keymutex>
   0x00000000004fcb81 <+321>:   xor    %r14d,%r14d
   0x00000000004fcb84 <+324>:   mov    %rbp,0x8(%rax)
   0x00000000004fcb88 <+328>:   mov    %r12d,0x10(%rax)
   0x00000000004fcb8c <+332>:   mov    %r13,0x18(%rax)
   0x00000000004fcb90 <+336>:   mov    %rax,0x2f1e09(%rip)        # 0x7ee9a0 
<keyhead>
   0x00000000004fcb97 <+343>:   mov    %rdx,(%rax)

Now consider what happens, when different threads call fork() in between these 
2 writes: we updated keyhead, but keyhead->next has not been updated yet.

Now when anaconda hangs, I get:

(gdb) list
407         keymutex = PyThread_allocate_lock();
408
409         /* Delete all keys which do not match the current thread id */
410         q = &keyhead;
411         while ((p = *q) != NULL) {
412             if (p->id != id) {
413                 *q = p->next;
414                 free((void *)p);
415                 /* NB This does *not* free p->value! */
416             }
(gdb) p p
$1 = (struct key *) 0xfefefefefefefefe
(gdb) p keyhead
$2 = (struct key *) 0xfefefefefefefefe


Here's how I think we get into this state:

-------------------------> thread 1
# has GIL
Thread.start
  _start_new_thread(self.__bootstrap, ())
    PyThread_start_new_thread(t_bootstrap)
      # spawns thread 3

-------------------------> thread 2
...
  # waiting for GIL

-------------------------> thread 3
t_bootstrap
  _PyThreadState_Init # does not have GIL yet at this point
    _PyGILState_NoteThreadState
      PyThread_set_key_value(autoTLSkey, (void *)tstate)
        find_key
          # key list is temporarily not consistent
          # due to compiler reordering couple writes in find_key

-------------------------> thread 1
continuing Thread.start
  self.__started.wait()
    Event.wait()
      self.__cond.wait 
        Condition.wait()
          waiter = _allocate_lock()
          waiter.acquire()
            lock_PyThread_acquire_lock
              Py_BEGIN_ALLOW_THREADS
                PyEval_SaveThread
                  PyThread_release_lock(interpreter_lock);

-------------------------> thread 2
...
  # acquired GIL
  os.fork() # forks inconsistent list

-------------------------> child
PyOS_AfterFork()
  PyThread_ReInitTLS()
    SIGSEGV

Attached patch for python makes it easier to reproduce, by adding delays to 
couple places to make window key list is not consistent larger.

----------
components: Interpreter Core
files: bz1268226_reproducer2.tar.gz
messages: 288510
nosy: Ján Stanček
priority: normal
severity: normal
status: open
title: _PyThreadState_Init and fork race leads to inconsistent key list
type: crash
versions: Python 2.7
Added file: http://bugs.python.org/file46666/bz1268226_reproducer2.tar.gz

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue29640>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to