Author: Remi Meier <[email protected]>
Branch: stmgc-c8
Changeset: r82394:616abaac8a22
Date: 2016-02-21 16:32 +0100
http://bitbucket.org/pypy/pypy/changeset/616abaac8a22/

Log:    Partial merge with default

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -73,3 +73,7 @@
 Move wrappers for OS functions from `rpython/rtyper` to `rpython/rlib` and 
 turn them into regular RPython functions. Most RPython-compatible `os.*` 
 functions are now directly accessible as `rpython.rposix.*`.
+
+.. branch: always-enable-gil
+
+Simplify a bit the GIL handling in non-jitted code.  Fixes issue #2205.
diff --git a/pypy/module/_cffi_backend/call_python.py 
b/pypy/module/_cffi_backend/call_python.py
--- a/pypy/module/_cffi_backend/call_python.py
+++ b/pypy/module/_cffi_backend/call_python.py
@@ -40,10 +40,9 @@
        at least 8 bytes in size.
     """
     from pypy.module._cffi_backend.ccallback import reveal_callback
+    from rpython.rlib import rgil
 
-    after = rffi.aroundstate.after
-    if after:
-        after()
+    rgil.acquire()
     rffi.stackcounter.stacks_counter += 1
     llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
 
@@ -71,9 +70,7 @@
     cerrno._errno_before(rffi.RFFI_ERR_ALL | rffi.RFFI_ALT_ERRNO)
 
     rffi.stackcounter.stacks_counter -= 1
-    before = rffi.aroundstate.before
-    if before:
-        before()
+    rgil.release()
 
 
 def get_ll_cffi_call_python():
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -602,6 +602,7 @@
 # Make the wrapper for the cases (1) and (2)
 def make_wrapper(space, callable, gil=None):
     "NOT_RPYTHON"
+    from rpython.rlib import rgil
     names = callable.api_func.argnames
     argtypes_enum_ui = 
unrolling_iterable(enumerate(zip(callable.api_func.argtypes,
         [name.startswith("w_") for name in names])))
@@ -617,9 +618,7 @@
         # we hope that malloc removal removes the newtuple() that is
         # inserted exactly here by the varargs specializer
         if gil_acquire:
-            after = rffi.aroundstate.after
-            if after:
-                after()
+            rgil.acquire()
         rffi.stackcounter.stacks_counter += 1
         llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
         retval = fatal_value
@@ -692,9 +691,7 @@
                 pypy_debug_catch_fatal_exception()
         rffi.stackcounter.stacks_counter -= 1
         if gil_release:
-            before = rffi.aroundstate.before
-            if before:
-                before()
+            rgil.release()
         return retval
     callable._always_inline_ = 'try'
     wrapper.__name__ = "wrapper for %r" % (callable, )
diff --git a/pypy/module/signal/__init__.py b/pypy/module/signal/__init__.py
--- a/pypy/module/signal/__init__.py
+++ b/pypy/module/signal/__init__.py
@@ -48,3 +48,6 @@
                                                   use_bytecode_counter=False)
         space.actionflag.__class__ = interp_signal.SignalActionFlag
         # xxx yes I know the previous line is a hack
+
+    def startup(self, space):
+        space.check_signal_action.startup(space)
diff --git a/pypy/module/signal/interp_signal.py 
b/pypy/module/signal/interp_signal.py
--- a/pypy/module/signal/interp_signal.py
+++ b/pypy/module/signal/interp_signal.py
@@ -67,19 +67,25 @@
         AsyncAction.__init__(self, space)
         self.pending_signal = -1
         self.fire_in_another_thread = False
-        if self.space.config.objspace.usemodules.thread:
-            from pypy.module.thread import gil
-            gil.after_thread_switch = self._after_thread_switch
+        #
+        @rgc.no_collect
+        def _after_thread_switch():
+            if self.fire_in_another_thread:
+                if self.space.threadlocals.signals_enabled():
+                    self.fire_in_another_thread = False
+                    self.space.actionflag.rearm_ticker()
+                    # this occurs when we just switched to the main thread
+                    # and there is a signal pending: we force the ticker to
+                    # -1, which should ensure perform() is called quickly.
+        self._after_thread_switch = _after_thread_switch
+        # ^^^ so that 'self._after_thread_switch' can be annotated as a
+        # constant
 
-    @rgc.no_collect
-    def _after_thread_switch(self):
-        if self.fire_in_another_thread:
-            if self.space.threadlocals.signals_enabled():
-                self.fire_in_another_thread = False
-                self.space.actionflag.rearm_ticker()
-                # this occurs when we just switched to the main thread
-                # and there is a signal pending: we force the ticker to
-                # -1, which should ensure perform() is called quickly.
+    def startup(self, space):
+        # this is translated
+        if space.config.objspace.usemodules.thread:
+            from rpython.rlib import rgil
+            rgil.invoke_after_thread_switch(self._after_thread_switch)
 
     def perform(self, executioncontext, frame):
         self._poll_for_signals()
diff --git a/pypy/module/thread/gil.py b/pypy/module/thread/gil.py
--- a/pypy/module/thread/gil.py
+++ b/pypy/module/thread/gil.py
@@ -11,12 +11,10 @@
 from pypy.module.thread.error import wrap_thread_error
 from pypy.interpreter.executioncontext import PeriodicAsyncAction
 from pypy.module.thread.threadlocals import OSThreadLocals
-from rpython.rlib.objectmodel import invoke_around_extcall, has_around_extcall
 
 class GILThreadLocals(OSThreadLocals):
     """A version of OSThreadLocals that enforces a GIL."""
     gil_ready = False
-    is_atomic = 0
     _immutable_fields_ = ['gil_ready?']
 
     def initialize(self, space):
@@ -24,45 +22,21 @@
         space.actionflag.register_periodic_action(GILReleaseAction(space),
                                                   use_bytecode_counter=True)
 
-    def _initialize_gil(self, space):
-        rgil.gil_allocate()
-
     def setup_threads(self, space):
         """Enable threads in the object space, if they haven't already been."""
         if not self.gil_ready:
-            self._initialize_gil(space)
+            # Note: this is a quasi-immutable read by module/pypyjit/interp_jit
+            # It must be changed (to True) only if it was really False before
+            rgil.allocate()
             self.gil_ready = True
             result = True
         else:
             result = False      # already set up
-
-        # add the GIL-releasing callback around external function calls.
-        #
-        # XXX we assume a single space, but this is not quite true during
-        # testing; for example, if you run the whole of test_lock you get
-        # a deadlock caused by the first test's space being reused by
-        # test_lock_again after the global state was cleared by
-        # test_compile_lock.  As a workaround, we repatch these global
-        # fields systematically.
-        self.set_gil_releasing_calls()
         return result
 
-    def set_gil_releasing_calls(self):
-        if self.is_atomic == 0:
-            # not running atomically so far, so we register the
-            # functions that will be called around external calls
-            invoke_around_extcall(before_external_call, after_external_call)
-        else:
-            # running atomically: we have the GIL here, so if we
-            # just un-register the functions, we won't release the GIL
-            # any more.
-            invoke_around_extcall(None, None)
-
-    def reinit_threads(self, space):
-        "Called in the child process after a fork()"
-        OSThreadLocals.reinit_threads(self, space)
-        if self.gil_ready:     # re-initialize the gil if needed
-            self._initialize_gil(space)
+    ## def reinit_threads(self, space):
+    ##     "Called in the child process after a fork()"
+    ##     OSThreadLocals.reinit_threads(self, space)
 
 
 class GILReleaseAction(PeriodicAsyncAction):
@@ -71,44 +45,4 @@
     """
 
     def perform(self, executioncontext, frame):
-        do_yield_thread()
-
-
-after_thread_switch = lambda: None     # hook for signal.py
-
-def before_external_call():
-    # this function must not raise, in such a way that the exception
-    # transformer knows that it cannot raise!
-    rgil.gil_release()
-before_external_call._gctransformer_hint_cannot_collect_ = True
-before_external_call._dont_reach_me_in_del_ = True
-
-def after_external_call():
-    rgil.gil_acquire()
-    rthread.gc_thread_run()
-    after_thread_switch()
-after_external_call._gctransformer_hint_cannot_collect_ = True
-after_external_call._dont_reach_me_in_del_ = True
-
-# The _gctransformer_hint_cannot_collect_ hack is needed for
-# translations in which the *_external_call() functions are not inlined.
-# They tell the gctransformer not to save and restore the local GC
-# pointers in the shadow stack.  This is necessary because the GIL is
-# not held after the call to before_external_call() or before the call
-# to after_external_call().
-
-def do_yield_thread():
-    # explicitly release the gil, in a way that tries to give more
-    # priority to other threads (as opposed to continuing to run in
-    # the same thread).
-    if has_around_extcall():
-        if rgil.gil_yield_thread():
-            rthread.gc_thread_run()
-            after_thread_switch()
-do_yield_thread._gctransformer_hint_close_stack_ = True
-do_yield_thread._dont_reach_me_in_del_ = True
-do_yield_thread._dont_inline_ = True
-
-# do_yield_thread() needs a different hint: _gctransformer_hint_close_stack_.
-# The *_external_call() functions are themselves called only from the rffi
-# module from a helper function that also has this hint.
+        rgil.yield_thread()
diff --git a/pypy/module/thread/test/support.py 
b/pypy/module/thread/test/support.py
--- a/pypy/module/thread/test/support.py
+++ b/pypy/module/thread/test/support.py
@@ -5,19 +5,19 @@
 import errno
 
 from pypy.interpreter.gateway import interp2app, unwrap_spec
+from rpython.rlib import rgil
 
 
 NORMAL_TIMEOUT = 300.0   # 5 minutes
 
 
 def waitfor(space, w_condition, delay=1):
-    from pypy.module.thread import gil
     adaptivedelay = 0.04
     limit = time.time() + delay * NORMAL_TIMEOUT
     while time.time() <= limit:
-        gil.before_external_call()
+        rgil.release()
         time.sleep(adaptivedelay)
-        gil.after_external_call()
+        rgil.acquire()
         gc.collect()
         if space.is_true(space.call_function(w_condition)):
             return
diff --git a/pypy/module/thread/test/test_gil.py 
b/pypy/module/thread/test/test_gil.py
--- a/pypy/module/thread/test/test_gil.py
+++ b/pypy/module/thread/test/test_gil.py
@@ -1,5 +1,6 @@
 import time
 from pypy.module.thread import gil
+from rpython.rlib import rgil
 from rpython.rlib.test import test_rthread
 from rpython.rlib import rthread as thread
 from rpython.rlib.objectmodel import we_are_translated
@@ -55,7 +56,7 @@
                 assert state.datalen3 == len(state.data)
                 assert state.datalen4 == len(state.data)
                 debug_print(main, i, state.datalen4)
-                gil.do_yield_thread()
+                rgil.yield_thread()
                 assert i == j
                 j += 1
         def bootstrap():
@@ -82,9 +83,9 @@
                 if not still_waiting:
                     raise ValueError("time out")
                 still_waiting -= 1
-                if not we_are_translated(): gil.before_external_call()
+                if not we_are_translated(): rgil.release()
                 time.sleep(0.01)
-                if not we_are_translated(): gil.after_external_call()
+                if not we_are_translated(): rgil.acquire()
             debug_print("leaving!")
             i1 = i2 = 0
             for tid, i in state.data:
diff --git a/rpython/jit/backend/llsupport/assembler.py 
b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -390,6 +390,8 @@
         # the call that it is no longer equal to css.  See description
         # in translator/c/src/thread_pthread.c.
 
+        # XXX some duplicated logic here, but note that rgil.acquire()
+        # does more than just RPyGilAcquire()
         if old_rpy_fastgil == 0:
             # this case occurs if some other thread stole the GIL but
             # released it again.  What occurred here is that we changed
@@ -400,9 +402,8 @@
         elif old_rpy_fastgil == 1:
             # 'rpy_fastgil' was (and still is) locked by someone else.
             # We need to wait for the regular mutex.
-            after = rffi.aroundstate.after
-            if after:
-                after()
+            from rpython.rlib import rgil
+            rgil.acquire()
         else:
             # stole the GIL from a different thread that is also
             # currently in an external call from the jit.  Attach
@@ -431,9 +432,8 @@
         # 'rpy_fastgil' contains only zero or non-zero, and this is only
         # called when the old value stored in 'rpy_fastgil' was non-zero
         # (i.e. still locked, must wait with the regular mutex)
-        after = rffi.aroundstate.after
-        if after:
-            after()
+        from rpython.rlib import rgil
+        rgil.acquire()
 
     _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void))
     _REACQGIL2_FUNC = lltype.Ptr(lltype.FuncType([rffi.CCHARP, lltype.Signed],
diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py 
b/rpython/jit/backend/llsupport/test/test_gc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_gc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py
@@ -17,7 +17,6 @@
 from rpython.jit.backend.llsupport.test.test_regalloc_integration import 
BaseTestRegalloc
 from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.jit.codewriter import longlong
-from rpython.rlib.objectmodel import invoke_around_extcall
 
 CPU = getcpuclass()
 
@@ -625,9 +624,6 @@
         self.S = S
         self.cpu = cpu
 
-    def teardown_method(self, meth):
-        rffi.aroundstate._cleanup_()
-
     def test_shadowstack_call(self):
         cpu = self.cpu
         cpu.gc_ll_descr.init_nursery(100)
diff --git a/rpython/jit/backend/llsupport/test/zrpy_releasegil_test.py 
b/rpython/jit/backend/llsupport/test/zrpy_releasegil_test.py
--- a/rpython/jit/backend/llsupport/test/zrpy_releasegil_test.py
+++ b/rpython/jit/backend/llsupport/test/zrpy_releasegil_test.py
@@ -1,6 +1,5 @@
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rlib.jit import dont_look_inside
-from rpython.rlib.objectmodel import invoke_around_extcall
 from rpython.jit.metainterp.optimizeopt import ALL_OPTS_NAMES
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rlib import rposix
@@ -16,20 +15,10 @@
     compile_kwds = dict(enable_opts=ALL_OPTS_NAMES, thread=True)
 
     def define_simple(self):
-        class Glob:
-            def __init__(self):
-                self.event = 0
-        glob = Glob()
-        #
-
         c_strchr = rffi.llexternal('strchr', [rffi.CCHARP, lltype.Signed],
                                    rffi.CCHARP)
 
-        def func():
-            glob.event += 1
-
         def before(n, x):
-            invoke_around_extcall(func, func)
             return (n, None, None, None, None, None,
                     None, None, None, None, None, None)
         #
@@ -73,7 +62,8 @@
         def f42(n):
             length = len(glob.lst)
             raw = alloc1()
-            fn = llhelper(CALLBACK, rffi._make_wrapper_for(CALLBACK, callback))
+            wrapper = rffi._make_wrapper_for(CALLBACK, callback, None, True)
+            fn = llhelper(CALLBACK, wrapper)
             if n & 1:    # to create a loop and a bridge, and also
                 pass     # to run the qsort() call in the blackhole interp
             c_qsort(rffi.cast(rffi.VOIDP, raw), rffi.cast(rffi.SIZE_T, 2),
diff --git a/rpython/jit/metainterp/test/test_ajit.py 
b/rpython/jit/metainterp/test/test_ajit.py
--- a/rpython/jit/metainterp/test/test_ajit.py
+++ b/rpython/jit/metainterp/test/test_ajit.py
@@ -4044,7 +4044,7 @@
         self.interp_operations(f, [])
 
     def test_external_call(self):
-        from rpython.rlib.objectmodel import invoke_around_extcall
+        from rpython.rlib import rgil
 
         TIME_T = lltype.Signed
         # ^^^ some 32-bit platforms have a 64-bit rffi.TIME_T, but we
@@ -4058,11 +4058,6 @@
             pass
         state = State()
 
-        def before():
-            if we_are_jitted():
-                raise Oups
-            state.l.append("before")
-
         def after():
             if we_are_jitted():
                 raise Oups
@@ -4070,14 +4065,14 @@
 
         def f():
             state.l = []
-            invoke_around_extcall(before, after)
+            rgil.invoke_after_thread_switch(after)
             external(lltype.nullptr(T.TO))
             return len(state.l)
 
         res = self.interp_operations(f, [])
-        assert res == 2
+        assert res == 1
         res = self.interp_operations(f, [])
-        assert res == 2
+        assert res == 1
         self.check_operations_history(call_release_gil_i=1, call_may_force_i=0)
 
     def test_unescaped_write_zero(self):
diff --git a/rpython/memory/gctransform/framework.py 
b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -1323,8 +1323,8 @@
                   resultvar=op.result)
 
     def gct_gc_thread_run(self, hop):
-        assert self.translator.config.translation.thread
-        if hasattr(self.root_walker, 'thread_run_ptr'):
+        if (self.translator.config.translation.thread and
+                hasattr(self.root_walker, 'thread_run_ptr')):
             livevars = self.push_roots(hop)
             assert not livevars, "live GC var around %s!" % (hop.spaceop,)
             hop.genop("direct_call", [self.root_walker.thread_run_ptr])
diff --git a/rpython/memory/gcwrapper.py b/rpython/memory/gcwrapper.py
--- a/rpython/memory/gcwrapper.py
+++ b/rpython/memory/gcwrapper.py
@@ -184,6 +184,9 @@
                 hdr.tid |= self.gc.gcflag_extra
         return (hdr.tid & self.gc.gcflag_extra) != 0
 
+    def thread_run(self):
+        pass
+
 # ____________________________________________________________
 
 class LLInterpRootWalker:
diff --git a/rpython/rlib/entrypoint.py b/rpython/rlib/entrypoint.py
--- a/rpython/rlib/entrypoint.py
+++ b/rpython/rlib/entrypoint.py
@@ -56,10 +56,11 @@
     """
     def deco(func):
         source = py.code.Source("""
+        from rpython.rlib import rgil
+
         def wrapper(%(args)s):
             # acquire the GIL
-            after = rffi.aroundstate.after
-            if after: after()
+            rgil.acquire()
             #
             rffi.stackcounter.stacks_counter += 1
             llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
@@ -78,8 +79,7 @@
                     assert 0 # dead code
             rffi.stackcounter.stacks_counter -= 1
             # release the GIL
-            before = rffi.aroundstate.before
-            if before: before()
+            rgil.release()
             #
             return res
         """ % {'args': ', '.join(['arg%d' % i for i in range(len(argtypes))])})
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -594,7 +594,7 @@
         from rpython.rtyper.lltypesystem import lltype
         if isinstance(vobj.concretetype, lltype.Ptr):
             return hop.genop('cast_ptr_to_int', [vobj],
-                             resulttype = lltype.Signed)
+                                resulttype = lltype.Signed)
         from rpython.rtyper.error import TyperError
         raise TyperError("current_object_addr_as_int() cannot be applied to"
                          " %r" % (vobj.concretetype,))
@@ -604,30 +604,10 @@
 def hlinvoke(repr, llcallable, *args):
     raise TypeError("hlinvoke is meant to be rtyped and not called direclty")
 
-def invoke_around_extcall(before, after,
-                          enter_callback=None, leave_callback=None):
-    """Call before() before any external function call, and after() after.
-    At the moment only one pair before()/after() can be registered at a time.
+def is_in_callback():
+    """Returns True if we're currently in a callback *or* if there are
+    multiple threads around.
     """
-    # NOTE: the hooks are cleared during translation!  To be effective
-    # in a compiled program they must be set at run-time.
-    from rpython.rtyper.lltypesystem import rffi
-    rffi.aroundstate.before = before
-    rffi.aroundstate.after = after
-    # the 'aroundstate' contains regular function and not ll pointers to them,
-    # but let's call llhelper() anyway to force their annotation
-    from rpython.rtyper.annlowlevel import llhelper
-    if before is not None: llhelper(rffi.AroundFnPtr, before)
-    if after  is not None: llhelper(rffi.AroundFnPtr, after)
-    # do the same thing about enter/leave_callback
-    if enter_callback is not None:
-        rffi.aroundstate.enter_callback = enter_callback
-        llhelper(rffi.EnterCallbackFnPtr, enter_callback)
-    if leave_callback is not None:
-        rffi.aroundstate.leave_callback = leave_callback
-        llhelper(rffi.LeaveCallbackFnPtr, leave_callback)
-
-def is_in_callback():
     from rpython.rtyper.lltypesystem import rffi
     return rffi.stackcounter.stacks_counter > 1
 
diff --git a/rpython/rlib/rgil.py b/rpython/rlib/rgil.py
--- a/rpython/rlib/rgil.py
+++ b/rpython/rlib/rgil.py
@@ -2,6 +2,7 @@
 from rpython.translator import cdir
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rtyper.extregistry import ExtRegistryEntry
 
 # these functions manipulate directly the GIL, whose definition does not
 # escape the C code itself
@@ -10,27 +11,135 @@
 eci = ExternalCompilationInfo(
     includes = ['src/thread.h'],
     separate_module_files = [translator_c_dir / 'src' / 'thread.c'],
-    include_dirs = [translator_c_dir])
+    include_dirs = [translator_c_dir],
+    post_include_bits = ['#define RPY_WITH_GIL'])
 
 llexternal = rffi.llexternal
 
 
-gil_allocate      = llexternal('RPyGilAllocate', [], lltype.Void,
+_gil_allocate = llexternal('RPyGilAllocate', [], lltype.Void,
+                           _nowrapper=True, sandboxsafe=True,
+                           compilation_info=eci)
+
+_gil_yield_thread = llexternal('RPyGilYieldThread', [], lltype.Signed,
                                _nowrapper=True, sandboxsafe=True,
                                compilation_info=eci)
 
-gil_yield_thread  = llexternal('RPyGilYieldThread', [], lltype.Signed,
+_gil_release      = llexternal('RPyGilRelease', [], lltype.Void,
                                _nowrapper=True, sandboxsafe=True,
                                compilation_info=eci)
 
-gil_release       = llexternal('RPyGilRelease', [], lltype.Void,
-                               _nowrapper=True, sandboxsafe=True,
-                               compilation_info=eci)
-
-gil_acquire       = llexternal('RPyGilAcquire', [], lltype.Void,
+_gil_acquire      = llexternal('RPyGilAcquire', [], lltype.Void,
                               _nowrapper=True, sandboxsafe=True,
                               compilation_info=eci)
 
 gil_fetch_fastgil = llexternal('RPyFetchFastGil', [], llmemory.Address,
                                _nowrapper=True, sandboxsafe=True,
                                compilation_info=eci)
+
+# ____________________________________________________________
+
+
+def invoke_after_thread_switch(callback):
+    """Invoke callback() after a thread switch.
+
+    This is a hook used by pypy.module.signal.  Several callbacks should
+    be easy to support (but not right now).
+
+    This function should be called from the translated RPython program
+    (i.e. *not* at module level!), but registers the callback
+    statically.  The exact point at which invoke_after_thread_switch()
+    is called has no importance: the callback() will be called anyway.
+    """
+    print "NOTE: invoke_after_thread_switch() is meant to be translated "
+    print "and not called directly.  Using some emulation."
+    global _emulated_after_thread_switch
+    _emulated_after_thread_switch = callback
+
+_emulated_after_thread_switch = None
+
+def _after_thread_switch():
+    """NOT_RPYTHON"""
+    if _emulated_after_thread_switch is not None:
+        _emulated_after_thread_switch()
+
+
+class Entry(ExtRegistryEntry):
+    _about_ = invoke_after_thread_switch
+
+    def compute_result_annotation(self, s_callback):
+        assert s_callback.is_constant()
+        callback = s_callback.const
+        bk = self.bookkeeper
+        translator = bk.annotator.translator
+        if hasattr(translator, '_rgil_invoke_after_thread_switch'):
+            assert translator._rgil_invoke_after_thread_switch == callback, (
+                "not implemented yet: several invoke_after_thread_switch()")
+        else:
+            translator._rgil_invoke_after_thread_switch = callback
+        bk.emulate_pbc_call("rgil.invoke_after_thread_switch", s_callback, [])
+
+    def specialize_call(self, hop):
+        # the actual call is not done here
+        hop.exception_cannot_occur()
+
+class Entry(ExtRegistryEntry):
+    _about_ = _after_thread_switch
+
+    def compute_result_annotation(self):
+        # the call has been emulated already in invoke_after_thread_switch()
+        pass
+
+    def specialize_call(self, hop):
+        translator = hop.rtyper.annotator.translator
+        if hasattr(translator, '_rgil_invoke_after_thread_switch'):
+            func = translator._rgil_invoke_after_thread_switch
+            graph = translator._graphof(func)
+            llfn = hop.rtyper.getcallable(graph)
+            c_callback = hop.inputconst(lltype.typeOf(llfn), llfn)
+            hop.exception_is_here()
+            hop.genop("direct_call", [c_callback])
+        else:
+            hop.exception_cannot_occur()
+
+
+def allocate():
+    _gil_allocate()
+
+def release():
+    # this function must not raise, in such a way that the exception
+    # transformer knows that it cannot raise!
+    _gil_release()
+release._gctransformer_hint_cannot_collect_ = True
+release._dont_reach_me_in_del_ = True
+
+def acquire():
+    from rpython.rlib import rthread
+    _gil_acquire()
+    rthread.gc_thread_run()
+    _after_thread_switch()
+acquire._gctransformer_hint_cannot_collect_ = True
+acquire._dont_reach_me_in_del_ = True
+
+# The _gctransformer_hint_cannot_collect_ hack is needed for
+# translations in which the *_external_call() functions are not inlined.
+# They tell the gctransformer not to save and restore the local GC
+# pointers in the shadow stack.  This is necessary because the GIL is
+# not held after the call to gil.release() or before the call
+# to gil.acquire().
+
+def yield_thread():
+    # explicitly release the gil, in a way that tries to give more
+    # priority to other threads (as opposed to continuing to run in
+    # the same thread).
+    if _gil_yield_thread():
+        from rpython.rlib import rthread
+        rthread.gc_thread_run()
+        _after_thread_switch()
+yield_thread._gctransformer_hint_close_stack_ = True
+yield_thread._dont_reach_me_in_del_ = True
+yield_thread._dont_inline_ = True
+
+# yield_thread() needs a different hint: _gctransformer_hint_close_stack_.
+# The *_external_call() functions are themselves called only from the rffi
+# module from a helper function that also has this hint.
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -237,7 +237,7 @@
 else:
     includes = ['unistd.h',  'sys/types.h', 'sys/wait.h',
                 'utime.h', 'sys/time.h', 'sys/times.h',
-                'grp.h', 'dirent.h']
+                'grp.h', 'dirent.h', 'sys/stat.h', 'fcntl.h']
     libraries = ['util']
 eci = ExternalCompilationInfo(
     includes=includes,
diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py
--- a/rpython/rlib/rthread.py
+++ b/rpython/rlib/rthread.py
@@ -79,12 +79,18 @@
 
 @specialize.arg(0)
 def ll_start_new_thread(func):
+    from rpython.rlib import rgil
     _check_thread_enabled()
     if rgc.stm_is_enabled():
         from rpython.rlib.rstm import (register_invoke_around_extcall,
                                        set_transaction_length)
         register_invoke_around_extcall()
         set_transaction_length(1.0)
+    else:
+        rgil.allocate()
+        # ^^^ convenience: any RPython program which uses explicitly
+        # rthread.start_new_thread() will initialize the GIL at that
+        # point.
     ident = c_thread_start(func)
     if ident == -1:
         raise error("can't start new thread")
diff --git a/rpython/rlib/test/test_rgil.py b/rpython/rlib/test/test_rgil.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/test/test_rgil.py
@@ -0,0 +1,47 @@
+from rpython.rlib import rgil
+from rpython.translator.c.test.test_standalone import StandaloneTests
+
+
+class BaseTestGIL(StandaloneTests):
+
+    def test_simple(self):
+        def main(argv):
+            rgil.release()
+            # don't have the GIL here
+            rgil.acquire()
+            rgil.yield_thread()
+            print "OK"   # there is also a release/acquire pair here
+            return 0
+
+        main([])
+
+        t, cbuilder = self.compile(main)
+        data = cbuilder.cmdexec('')
+        assert data == "OK\n"
+
+    def test_after_thread_switch(self):
+        class Foo:
+            pass
+        foo = Foo()
+        foo.counter = 0
+        def seeme():
+            foo.counter += 1
+        def main(argv):
+            rgil.invoke_after_thread_switch(seeme)
+            print "Test"     # one release/acquire pair here
+            print foo.counter
+            print foo.counter
+            return 0
+
+        t, cbuilder = self.compile(main)
+        data = cbuilder.cmdexec('')
+        assert data == "Test\n1\n2\n"
+
+
+class TestGILAsmGcc(BaseTestGIL):
+    gc = 'minimark'
+    gcrootfinder = 'asmgcc'
+
+class TestGILShadowStack(BaseTestGIL):
+    gc = 'minimark'
+    gcrootfinder = 'shadowstack'
diff --git a/rpython/rlib/test/test_rsocket.py 
b/rpython/rlib/test/test_rsocket.py
--- a/rpython/rlib/test/test_rsocket.py
+++ b/rpython/rlib/test/test_rsocket.py
@@ -143,7 +143,7 @@
 
 
 def test_simple_tcp():
-    import thread
+    from rpython.rlib import rthread
     sock = RSocket()
     try_ports = [1023] + range(20000, 30000, 437)
     for port in try_ports:
@@ -169,14 +169,14 @@
             connected[0] = True
         finally:
             lock.release()
-    lock = thread.allocate_lock()
-    lock.acquire()
-    thread.start_new_thread(connecting, ())
+    lock = rthread.allocate_lock()
+    lock.acquire(True)
+    rthread.start_new_thread(connecting, ())
     print 'waiting for connection'
     fd1, addr2 = sock.accept()
     s1 = RSocket(fd=fd1)
     print 'connection accepted'
-    lock.acquire()
+    lock.acquire(True)
     assert connected[0]
     print 'connecting side knows that the connection was accepted too'
     assert addr.eq(s2.getpeername())
@@ -188,7 +188,9 @@
     buf = s2.recv(100)
     assert buf == '?'
     print 'received ok'
-    thread.start_new_thread(s2.sendall, ('x'*50000,))
+    def sendstuff():
+        s2.sendall('x'*50000)
+    rthread.start_new_thread(sendstuff, ())
     buf = ''
     while len(buf) < 50000:
         data = s1.recv(50100)
diff --git a/rpython/rlib/test/test_rthread.py 
b/rpython/rlib/test/test_rthread.py
--- a/rpython/rlib/test/test_rthread.py
+++ b/rpython/rlib/test/test_rthread.py
@@ -5,13 +5,6 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 import py
 
-def setup_module(mod):
-    # Hack to avoid a deadlock if the module is run after other test files :-(
-    # In this module, we assume that rthread.start_new_thread() is not
-    # providing us with a GIL equivalent, except in test_gc_locking
-    # which installs its own aroundstate.
-    rffi.aroundstate._cleanup_()
-
 def test_lock():
     l = allocate_lock()
     ok1 = l.acquire(True)
@@ -31,6 +24,7 @@
         py.test.fail("Did not raise")
 
 def test_tlref_untranslated():
+    import thread
     class FooBar(object):
         pass
     t = ThreadLocalReference(FooBar)
@@ -43,7 +37,7 @@
         time.sleep(0.2)
         results.append(t.get() is x)
     for i in range(5):
-        start_new_thread(subthread, ())
+        thread.start_new_thread(subthread, ())
     time.sleep(0.5)
     assert results == [True] * 15
 
@@ -99,7 +93,6 @@
 
     def test_gc_locking(self):
         import time
-        from rpython.rlib.objectmodel import invoke_around_extcall
         from rpython.rlib.debug import ll_assert
 
         class State:
@@ -123,17 +116,6 @@
                 ll_assert(j == self.j, "2: bad j")
             run._dont_inline_ = True
 
-        def before_extcall():
-            release_NOAUTO(state.gil)
-        before_extcall._gctransformer_hint_cannot_collect_ = True
-        # ^^^ see comments in gil.py about this hint
-
-        def after_extcall():
-            acquire_NOAUTO(state.gil, True)
-            gc_thread_run()
-        after_extcall._gctransformer_hint_cannot_collect_ = True
-        # ^^^ see comments in gil.py about this hint
-
         def bootstrap():
             # after_extcall() is called before we arrive here.
             # We can't just acquire and release the GIL manually here,
@@ -154,14 +136,9 @@
             start_new_thread(bootstrap, ())
 
         def f():
-            state.gil = allocate_ll_lock()
-            acquire_NOAUTO(state.gil, True)
             state.bootstrapping = allocate_lock()
             state.answers = []
             state.finished = 0
-            # the next line installs before_extcall() and after_extcall()
-            # to be called automatically around external function calls.
-            invoke_around_extcall(before_extcall, after_extcall)
 
             g(10, 1)
             done = False
@@ -179,10 +156,7 @@
             return len(state.answers)
 
         expected = 89
-        try:
-            fn = self.getcompiled(f, [])
-        finally:
-            rffi.aroundstate._cleanup_()
+        fn = self.getcompiled(f, [])
         answers = fn()
         assert answers == expected
 
diff --git a/rpython/rtyper/lltypesystem/rffi.py 
b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -178,9 +178,9 @@
 
         argnames = ', '.join(['a%d' % i for i in range(len(args))])
         source = py.code.Source("""
+            from rpython.rlib import rgil
             def call_external_function(%(argnames)s):
-                before = aroundstate.before
-                if before: before()
+                rgil.release()
                 # NB. it is essential that no exception checking occurs here!
                 if %(save_err)d:
                     from rpython.rlib import rposix
@@ -189,12 +189,10 @@
                 if %(save_err)d:
                     from rpython.rlib import rposix
                     rposix._errno_after(%(save_err)d)
-                after = aroundstate.after
-                if after: after()
+                rgil.acquire()
                 return res
         """ % locals())
-        miniglobals = {'aroundstate': aroundstate,
-                       'funcptr':     funcptr,
+        miniglobals = {'funcptr':     funcptr,
                        '__name__':    __name__, # for module name propagation
                        }
         exec source.compile() in miniglobals
@@ -214,7 +212,7 @@
         # don't inline, as a hack to guarantee that no GC pointer is alive
         # anywhere in call_external_function
     else:
-        # if we don't have to invoke the aroundstate, we can just call
+        # if we don't have to invoke the GIL handling, we can just call
         # the low-level function pointer carelessly
         if macro is None and save_err == RFFI_ERR_NONE:
             call_external_function = funcptr
@@ -279,13 +277,10 @@
                     freeme = arg
             elif _isfunctype(TARGET) and not _isllptr(arg):
                 # XXX pass additional arguments
-                if invoke_around_handlers:
-                    arg = llhelper(TARGET, _make_wrapper_for(TARGET, arg,
-                                                             callbackholder,
-                                                             aroundstate))
-                else:
-                    arg = llhelper(TARGET, _make_wrapper_for(TARGET, arg,
-                                                             callbackholder))
+                use_gil = invoke_around_handlers
+                arg = llhelper(TARGET, _make_wrapper_for(TARGET, arg,
+                                                         callbackholder,
+                                                         use_gil))
             else:
                 SOURCE = lltype.typeOf(arg)
                 if SOURCE != TARGET:
@@ -324,7 +319,7 @@
     def __init__(self):
         self.callbacks = {}
 
-def _make_wrapper_for(TP, callable, callbackholder=None, aroundstate=None):
+def _make_wrapper_for(TP, callable, callbackholder, use_gil):
     """ Function creating wrappers for callbacks. Note that this is
     cheating as we assume constant callbacks and we just memoize wrappers
     """
@@ -339,22 +334,21 @@
         callbackholder.callbacks[callable] = True
     args = ', '.join(['a%d' % i for i in range(len(TP.TO.ARGS))])
     source = py.code.Source(r"""
+        rgil = None
+        if use_gil:
+            from rpython.rlib import rgil
+
         def wrapper(%(args)s):    # no *args - no GIL for mallocing the tuple
             token = 0
             if rgc.stm_is_enabled():
                 rjbuf = llop.stm_rewind_jmp_frame(llmemory.Address, 1)
             else:
                 rjbuf = llmemory.NULL
-            if aroundstate is not None:
-                if aroundstate.enter_callback is not None:
-                    if rgc.stm_is_enabled():
-                        token = aroundstate.enter_callback(rjbuf)
-                    else:
-                        aroundstate.enter_callback()
+            if rgil is not None:
+                if rgc.stm_is_enabled():
+                    token = aroundstate.enter_callback(rjbuf)
                 else:
-                    after = aroundstate.after
-                    if after is not None:
-                        after()
+                    rgil.acquire()
             # from now on we hold the GIL
             stackcounter.stacks_counter += 1
             llop.gc_stack_bottom(lltype.Void)   # marker for trackgcroot.py
@@ -370,18 +364,13 @@
                 result = errorcode
             stackcounter.stacks_counter -= 1
             if aroundstate is not None:
-                if aroundstate.leave_callback is not None:
-                    if rgc.stm_is_enabled():
-                        aroundstate.leave_callback(rjbuf, token)
-                    else:
-                        aroundstate.leave_callback()
+                if rgc.stm_is_enabled():
+                    aroundstate.leave_callback(rjbuf, token)
                 else:
-                    before = aroundstate.before
-                    if before is not None:
-                        before()
+                    rgil.release()
             # here we don't hold the GIL any more. As in the wrapper() produced
             # by llexternal, it is essential that no exception checking occurs
-            # after the call to before().
+            # after the call to rgil.release().
             return result
     """ % locals())
     miniglobals = locals().copy()
@@ -401,18 +390,6 @@
 LeaveCallbackFnPtr = lltype.Ptr(lltype.FuncType([llmemory.Address,
                                                  lltype.Signed], lltype.Void))
 
-class AroundState:
-    _alloc_flavor_ = "raw"
-    _stm_dont_track_raw_accesses_ = True
-
-    def _cleanup_(self):
-        self.before = None        # or a regular RPython function
-        self.after = None         # or a regular RPython function
-        self.enter_callback = None
-        self.leave_callback = None
-aroundstate = AroundState()
-aroundstate._cleanup_()
-
 class StackCounter:
     _alloc_flavor_ = "raw"
     def _cleanup_(self):
diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py 
b/rpython/rtyper/lltypesystem/test/test_rffi.py
--- a/rpython/rtyper/lltypesystem/test/test_rffi.py
+++ b/rpython/rtyper/lltypesystem/test/test_rffi.py
@@ -688,42 +688,6 @@
 
         assert interpret(f, []) == 4
 
-    def test_around_extcall(self):
-        if sys.platform == "win32":
-            py.test.skip('No pipes on windows')
-        import os
-        from rpython.annotator import model as annmodel
-        from rpython.rlib.objectmodel import invoke_around_extcall
-        from rpython.rtyper.extfuncregistry import register_external
-        read_fd, write_fd = os.pipe()
-        try:
-            # we need an external function that is not going to get wrapped 
around
-            # before()/after() calls, in order to call it from 
before()/after()...
-            def mywrite(s):
-                os.write(write_fd, s)
-            def llimpl(s):
-                s = ''.join(s.chars)
-                os.write(write_fd, s)
-            register_external(mywrite, [str], annmodel.s_None, 'll_mywrite',
-                              llfakeimpl=llimpl, sandboxsafe=True)
-
-            def before():
-                mywrite("B")
-            def after():
-                mywrite("A")
-            def f():
-                os.write(write_fd, "-")
-                invoke_around_extcall(before, after)
-                os.write(write_fd, "E")
-
-            interpret(f, [])
-            data = os.read(read_fd, 99)
-            assert data == "-BEA"
-
-        finally:
-            os.close(write_fd)
-            os.close(read_fd)
-
     def test_external_callable(self):
         """ Try to call some llexternal function with llinterp
         """
diff --git a/rpython/translator/backendopt/test/test_malloc.py 
b/rpython/translator/backendopt/test/test_malloc.py
--- a/rpython/translator/backendopt/test/test_malloc.py
+++ b/rpython/translator/backendopt/test/test_malloc.py
@@ -159,7 +159,7 @@
 
             def __del__(self):
                 delcalls[0] += 1
-                os.write(1, "__del__\n")
+                #os.write(1, "__del__\n")
 
         def f(x=int):
             a = A()
diff --git a/rpython/translator/c/src/entrypoint.c 
b/rpython/translator/c/src/entrypoint.c
--- a/rpython/translator/c/src/entrypoint.c
+++ b/rpython/translator/c/src/entrypoint.c
@@ -33,6 +33,10 @@
 #  include <io.h>
 #endif
 
+#ifdef RPY_WITH_GIL
+# include <src/thread.h>
+#endif
+
 
 RPY_EXTERN
 int pypy_main_function(int argc, char *argv[])
@@ -45,6 +49,14 @@
     _setmode(1, _O_BINARY);
 #endif
 
+#ifdef RPY_WITH_GIL
+    /* Note that the GIL's mutexes are not automatically made; if the
+       program starts threads, it needs to call rgil.gil_allocate().
+       RPyGilAcquire() still works without that, but crash if it finds
+       that it really needs to wait on a mutex. */
+    RPyGilAcquire();
+#endif
+
 #ifdef PYPY_USE_ASMGCC
     
pypy_g_rpython_rtyper_lltypesystem_rffi_StackCounter.sc_inst_stacks_counter++;
 #endif
@@ -78,6 +90,10 @@
 
     pypy_malloc_counters_results();
 
+#ifdef RPY_WITH_GIL
+    RPyGilRelease();
+#endif
+
 #ifdef RPY_STM
     stm_rewind_jmp_leaveframe(&stm_thread_local, &rjbuf);
 #endif
diff --git a/rpython/translator/c/src/mem.c b/rpython/translator/c/src/mem.c
--- a/rpython/translator/c/src/mem.c
+++ b/rpython/translator/c/src/mem.c
@@ -187,11 +187,8 @@
         got += 1;
         fd = ((void* *) (((char *)fd) + sizeof(void*)))[0];
     }
-    if (rpy_fastgil != 1) {
-        RPyAssert(rpy_fastgil != 0,
-                          "pypy_check_stack_count doesn't have the GIL");
-        got++;  /* <= the extra one currently stored in rpy_fastgil */
-    }
+    RPyAssert(rpy_fastgil == 1,
+              "pypy_check_stack_count doesn't have the GIL");
     RPyAssert(got == stacks_counter - 1,
               "bad stacks_counter or non-closed stacks around");
 # endif
diff --git a/rpython/translator/c/src/thread.h 
b/rpython/translator/c/src/thread.h
--- a/rpython/translator/c/src/thread.h
+++ b/rpython/translator/c/src/thread.h
@@ -28,7 +28,8 @@
 
 RPY_EXTERN void RPyGilAllocate(void);
 RPY_EXTERN long RPyGilYieldThread(void);
-RPY_EXTERN void RPyGilAcquire(void);
+RPY_EXTERN void RPyGilAcquireSlowPath(long);
+#define RPyGilAcquire _RPyGilAcquire
 #define RPyGilRelease _RPyGilRelease
 #define RPyFetchFastGil _RPyFetchFastGil
 
@@ -40,6 +41,11 @@
 
 RPY_EXTERN long rpy_fastgil;
 
+static inline void _RPyGilAcquire(void) {
+    long old_fastgil = lock_test_and_set(&rpy_fastgil, 1);
+    if (old_fastgil != 0)
+        RPyGilAcquireSlowPath(old_fastgil);
+}
 static inline void _RPyGilRelease(void) {
     assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
     rpy_fastgil = 0;
diff --git a/rpython/translator/c/src/thread_gil.c 
b/rpython/translator/c/src/thread_gil.c
--- a/rpython/translator/c/src/thread_gil.c
+++ b/rpython/translator/c/src/thread_gil.c
@@ -36,25 +36,42 @@
      value of 'rpy_fastgil' to 1.
 */
 
-long rpy_fastgil = 1;
-long rpy_waiting_threads = -42;    /* GIL not initialized */
+
+/* The GIL is initially released; see pypy_main_function(), which calls
+   RPyGilAcquire/RPyGilRelease.  The point is that when building
+   RPython libraries, they can be a collection of regular functions that
+   also call RPyGilAcquire/RPyGilRelease; see test_standalone.TestShared.
+*/
+long rpy_fastgil = 0;
+static long rpy_waiting_threads = -42;    /* GIL not initialized */
 static mutex1_t mutex_gil_stealer;
 static mutex2_t mutex_gil;
 
-void RPyGilAllocate(void)
+
+static void rpy_init_mutexes(void)
 {
-    assert(RPY_FASTGIL_LOCKED(rpy_fastgil));
     mutex1_init(&mutex_gil_stealer);
     mutex2_init_locked(&mutex_gil);
     rpy_waiting_threads = 0;
 }
 
-void RPyGilAcquire(void)
+void RPyGilAllocate(void)
 {
-    /* Acquires the GIL.
+    if (rpy_waiting_threads < 0) {
+        assert(rpy_waiting_threads == -42);
+        rpy_init_mutexes();
+#ifdef HAVE_PTHREAD_ATFORK
+        pthread_atfork(NULL, NULL, rpy_init_mutexes);
+#endif
+    }
+}
+
+void RPyGilAcquireSlowPath(long old_fastgil)
+{
+    /* Acquires the GIL.  This assumes that we already did:
+
+          old_fastgil = lock_test_and_set(&rpy_fastgil, 1);
      */
-    long old_fastgil = lock_test_and_set(&rpy_fastgil, 1);
-
     if (!RPY_FASTGIL_LOCKED(old_fastgil)) {
         /* The fastgil was not previously locked: success.
            'mutex_gil' should still be locked at this point.
@@ -63,10 +80,22 @@
     else {
         /* Otherwise, another thread is busy with the GIL. */
 
+        if (rpy_waiting_threads < 0) {
+            /* <arigo> I tried to have RPyGilAllocate() called from
+             * here, but it fails occasionally on an example
+             * (2.7/test/test_threading.py).  I think what occurs is
+             * that if one thread runs RPyGilAllocate(), it still
+             * doesn't have the GIL; then the other thread might fork()
+             * at precisely this moment, killing the first thread.
+             */
+            fprintf(stderr, "Fatal RPython error: a thread is trying to wait "
+                            "for the GIL, but the GIL was not initialized\n");
+            abort();
+        }
+
         /* Register me as one of the threads that is actively waiting
            for the GIL.  The number of such threads is found in
            rpy_waiting_threads. */
-        assert(rpy_waiting_threads >= 0);
         atomic_increment(&rpy_waiting_threads);
 
         /* Enter the waiting queue from the end.  Assuming a roughly
@@ -164,6 +193,13 @@
     _RPyGilRelease();
 }
 
+#undef RPyGilAcquire
+RPY_EXTERN
+void RPyGilAcquire(void)
+{
+    _RPyGilAcquire();
+}
+
 #undef RPyFetchFastGil
 RPY_EXTERN
 long *RPyFetchFastGil(void)
diff --git a/rpython/translator/c/src/thread_nt.c 
b/rpython/translator/c/src/thread_nt.c
--- a/rpython/translator/c/src/thread_nt.c
+++ b/rpython/translator/c/src/thread_nt.c
@@ -236,12 +236,7 @@
 #define mutex1_lock   mutex2_lock
 #define mutex1_unlock mutex2_unlock
 
-#ifdef _M_IA64
-/* On Itanium, use 'acquire' memory ordering semantics */
-#define lock_test_and_set(ptr, value)  InterlockedExchangeAcquire(ptr, value)
-#else
-#define lock_test_and_set(ptr, value)  InterlockedExchange(ptr, value)
-#endif
+//#define lock_test_and_set(ptr, value)  see thread_nt.h
 #define atomic_increment(ptr)          InterlockedIncrement(ptr)
 #define atomic_decrement(ptr)          InterlockedDecrement(ptr)
 
diff --git a/rpython/translator/c/src/thread_nt.h 
b/rpython/translator/c/src/thread_nt.h
--- a/rpython/translator/c/src/thread_nt.h
+++ b/rpython/translator/c/src/thread_nt.h
@@ -30,3 +30,11 @@
 RPY_EXTERN
 long RPyThreadSetStackSize(long);
 #endif
+
+
+#ifdef _M_IA64
+/* On Itanium, use 'acquire' memory ordering semantics */
+#define lock_test_and_set(ptr, value)  InterlockedExchangeAcquire(ptr, value)
+#else
+#define lock_test_and_set(ptr, value)  InterlockedExchange(ptr, value)
+#endif
diff --git a/rpython/translator/c/src/thread_pthread.c 
b/rpython/translator/c/src/thread_pthread.c
--- a/rpython/translator/c/src/thread_pthread.c
+++ b/rpython/translator/c/src/thread_pthread.c
@@ -546,8 +546,9 @@
     return result;
 }
 
-#define lock_test_and_set(ptr, value)  __sync_lock_test_and_set(ptr, value)
+//#define lock_test_and_set(ptr, value)  see thread_pthread.h
 #define atomic_increment(ptr)          __sync_fetch_and_add(ptr, 1)
 #define atomic_decrement(ptr)          __sync_fetch_and_sub(ptr, 1)
+#define HAVE_PTHREAD_ATFORK            1
 
 #include "src/thread_gil.c"
diff --git a/rpython/translator/c/src/thread_pthread.h 
b/rpython/translator/c/src/thread_pthread.h
--- a/rpython/translator/c/src/thread_pthread.h
+++ b/rpython/translator/c/src/thread_pthread.h
@@ -78,3 +78,6 @@
 long RPyThreadSetStackSize(long);
 RPY_EXTERN
 void RPyThreadAfterFork(void);
+
+
+#define lock_test_and_set(ptr, value)  __sync_lock_test_and_set(ptr, value)
diff --git a/rpython/translator/c/test/test_newgc.py 
b/rpython/translator/c/test/test_newgc.py
--- a/rpython/translator/c/test/test_newgc.py
+++ b/rpython/translator/c/test/test_newgc.py
@@ -406,7 +406,7 @@
                 try:
                     g()
                 except:
-                    os.write(1, "hallo")
+                    pass #os.write(1, "hallo")
         def f1(i):
             if i:
                 raise TypeError
diff --git a/rpython/translator/c/test/test_refcount.py 
b/rpython/translator/c/test/test_refcount.py
--- a/rpython/translator/c/test/test_refcount.py
+++ b/rpython/translator/c/test/test_refcount.py
@@ -146,7 +146,7 @@
                 try:
                     g()
                 except:
-                    os.write(1, "hallo")
+                    pass  #os.write(1, "hallo")
         def f1(i):
             if i:
                 raise TypeError
diff --git a/rpython/translator/c/test/test_standalone.py 
b/rpython/translator/c/test/test_standalone.py
--- a/rpython/translator/c/test/test_standalone.py
+++ b/rpython/translator/c/test/test_standalone.py
@@ -1116,23 +1116,11 @@
         import time
         from rpython.rlib import rthread
         from rpython.rtyper.lltypesystem import lltype
-        from rpython.rlib.objectmodel import invoke_around_extcall
 
         class State:
             pass
         state = State()
 
-        def before():
-            debug_print("releasing...")
-            ll_assert(not rthread.acquire_NOAUTO(state.ll_lock, False),
-                      "lock not held!")
-            rthread.release_NOAUTO(state.ll_lock)
-            debug_print("released")
-        def after():
-            debug_print("waiting...")
-            rthread.acquire_NOAUTO(state.ll_lock, True)
-            debug_print("acquired")
-
         def recurse(n):
             if n > 0:
                 return recurse(n-1)+1
@@ -1168,10 +1156,7 @@
             s1 = State(); s2 = State(); s3 = State()
             s1.x = 0x11111111; s2.x = 0x22222222; s3.x = 0x33333333
             # start 3 new threads
-            state.ll_lock = rthread.allocate_ll_lock()
-            after()
             state.count = 0
-            invoke_around_extcall(before, after)
             ident1 = rthread.start_new_thread(bootstrap, ())
             ident2 = rthread.start_new_thread(bootstrap, ())
             ident3 = rthread.start_new_thread(bootstrap, ())
@@ -1215,20 +1200,11 @@
         import time, gc
         from rpython.rlib import rthread, rposix
         from rpython.rtyper.lltypesystem import lltype
-        from rpython.rlib.objectmodel import invoke_around_extcall
 
         class State:
             pass
         state = State()
 
-        def before():
-            ll_assert(not rthread.acquire_NOAUTO(state.ll_lock, False),
-                      "lock not held!")
-            rthread.release_NOAUTO(state.ll_lock)
-        def after():
-            rthread.acquire_NOAUTO(state.ll_lock, True)
-            rthread.gc_thread_run()
-
         class Cons:
             def __init__(self, head, tail):
                 self.head = head
@@ -1258,9 +1234,6 @@
             state.xlist = []
             x2 = Cons(51, Cons(62, Cons(74, None)))
             # start 5 new threads
-            state.ll_lock = rthread.allocate_ll_lock()
-            after()
-            invoke_around_extcall(before, after)
             ident1 = new_thread()
             ident2 = new_thread()
             #
@@ -1304,7 +1277,6 @@
 
 
     def test_gc_with_fork_without_threads(self):
-        from rpython.rlib.objectmodel import invoke_around_extcall
         if not hasattr(os, 'fork'):
             py.test.skip("requires fork()")
 
@@ -1331,22 +1303,18 @@
         # alive are really freed.
         import time, gc, os
         from rpython.rlib import rthread
-        from rpython.rlib.objectmodel import invoke_around_extcall
         if not hasattr(os, 'fork'):
             py.test.skip("requires fork()")
 
+        from rpython.rtyper.lltypesystem import rffi, lltype
+        direct_write = rffi.llexternal(
+            "write", [rffi.INT, rffi.CCHARP, rffi.SIZE_T], lltype.Void,
+            _nowrapper=True)
+
         class State:
             pass
         state = State()
 
-        def before():
-            ll_assert(not rthread.acquire_NOAUTO(state.ll_lock, False),
-                      "lock not held!")
-            rthread.release_NOAUTO(state.ll_lock)
-        def after():
-            rthread.acquire_NOAUTO(state.ll_lock, True)
-            rthread.gc_thread_run()
-
         class Cons:
             def __init__(self, head, tail):
                 self.head = head
@@ -1354,7 +1322,10 @@
 
         class Stuff:
             def __del__(self):
-                os.write(state.write_end, 'd')
+                p = rffi.str2charp('d')
+                one = rffi.cast(rffi.SIZE_T, 1)
+                direct_write(rffi.cast(rffi.INT, state.write_end), p, one)
+                rffi.free_charp(p)
 
         def allocate_stuff():
             s = Stuff()
@@ -1403,9 +1374,6 @@
             state.read_end, state.write_end = os.pipe()
             x2 = Cons(51, Cons(62, Cons(74, None)))
             # start 5 new threads
-            state.ll_lock = rthread.allocate_ll_lock()
-            after()
-            invoke_around_extcall(before, after)
             start_arthreads()
             # force freeing
             gc.collect()
diff --git a/rpython/translator/test/test_simplify.py 
b/rpython/translator/test/test_simplify.py
--- a/rpython/translator/test/test_simplify.py
+++ b/rpython/translator/test/test_simplify.py
@@ -1,7 +1,7 @@
 import py
 from rpython.translator.translator import TranslationContext, graphof
 from rpython.translator.backendopt.all import backend_optimizations
-from rpython.translator.simplify import (get_graph, transform_dead_op_vars)
+from rpython.translator.simplify import get_graph, transform_dead_op_vars
 from rpython.flowspace.model import Block, Constant, summary
 from rpython.conftest import option
 
@@ -183,8 +183,10 @@
                     print op
                     subgraph = get_graph(op.args[0], t)
                     if subgraph is None:
-                        # ignore 'get_errno' and 'set_errno'
-                        if 'et_errno' not in repr(op.args[0]):
+                        # ignore 'get_errno' and 'set_errno', and
+                        # 'RPyGilRelease' and 'RPyGilAcquire'
+                        if ('et_errno' not in repr(op.args[0]) and
+                            'RPyGil' not in repr(op.args[0])):
                             found.append(op)
                     else:
                         walkgraph(subgraph)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to