Author: Brian Kearns <bdkea...@gmail.com> Branch: Changeset: r71420:e6b55fa0713c Date: 2014-05-08 17:07 -0400 http://bitbucket.org/pypy/pypy/changeset/e6b55fa0713c/
Log: merge heads diff --git a/lib_pypy/_pypy_interact.py b/lib_pypy/_pypy_interact.py --- a/lib_pypy/_pypy_interact.py +++ b/lib_pypy/_pypy_interact.py @@ -3,6 +3,8 @@ import sys import os +irc_header = "And now for something completely different" + def interactive_console(mainmodule=None, quiet=False): # set sys.{ps1,ps2} just before invoking the interactive interpreter. This @@ -15,8 +17,7 @@ if not quiet: try: from _pypy_irc_topic import some_topic - text = "And now for something completely different: ``%s''" % ( - some_topic(),) + text = "%s: ``%s''" % ( irc_header, some_topic()) while len(text) >= 80: i = text[:80].rfind(' ') print(text[:i]) diff --git a/lib_pypy/_tkinter/tklib.py b/lib_pypy/_tkinter/tklib.py --- a/lib_pypy/_tkinter/tklib.py +++ b/lib_pypy/_tkinter/tklib.py @@ -121,6 +121,10 @@ incdirs = [] linklibs = ['tcl85', 'tk85'] libdirs = [] +elif sys.platform == 'darwin': + incdirs = ['/System/Library/Frameworks/Tk.framework/Versions/Current/Headers/'] + linklibs = ['tcl', 'tk'] + libdirs = [] else: incdirs=['/usr/include/tcl'] linklibs=['tcl', 'tk'] diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -348,4 +348,9 @@ type and vice versa. For builtin types, a dictionary will be returned that cannot be changed (but still looks and behaves like a normal dictionary). +* PyPy prints a random line from past #pypy IRC topics at startup in + interactive mode. In a released version, this behaviour is supressed, but + setting the environment variable PYPY_IRC_TOPIC will bring it back. Note that + downstream package providers have been known to totally disable this feature. + .. include:: _ref.txt diff --git a/pypy/doc/release-2.3.0.rst b/pypy/doc/release-2.3.0.rst --- a/pypy/doc/release-2.3.0.rst +++ b/pypy/doc/release-2.3.0.rst @@ -1,5 +1,5 @@ ======================================= -PyPy 2.3 - Easier Than Ever +PyPy 2.3 - Terrestrial Arthropod Trap ======================================= We're pleased to announce PyPy 2.3, which targets version 2.7.6 of the Python diff --git a/pypy/doc/stm.rst b/pypy/doc/stm.rst --- a/pypy/doc/stm.rst +++ b/pypy/doc/stm.rst @@ -1,70 +1,77 @@ -====================== -Transactional Memory -====================== + +============================= +Software Transactional Memory +============================= .. contents:: This page is about ``pypy-stm``, a special in-development version of PyPy which can run multiple independent CPU-hungry threads in the same -process in parallel. It is side-stepping what is known in the Python -world as the "global interpreter lock (GIL)" problem. +process in parallel. It is a solution to what is known in the Python +world as the "global interpreter lock (GIL)" problem --- it is an +implementation of Python without the GIL. -"STM" stands for Software Transactional Memory, the technique used +"STM" stands for Software `Transactional Memory`_, the technique used internally. This page describes ``pypy-stm`` from the perspective of a user, describes work in progress, and finally gives references to more implementation details. -This work was done mostly by Remi Meier and Armin Rigo. Thanks to all -donors for crowd-funding the work so far! Please have a look at the -`2nd call for donation`_. +This work was done by Remi Meier and Armin Rigo. Thanks to all donors +for crowd-funding the work so far! Please have a look at the `2nd call +for donation`_. +.. _`Transactional Memory`: http://en.wikipedia.org/wiki/Transactional_memory .. _`2nd call for donation`: http://pypy.org/tmdonate2.html Introduction ============ -``pypy-stm`` is a variant of the regular PyPy interpreter. With caveats -listed below, it should be in theory within 25%-50% slower than a +``pypy-stm`` is a variant of the regular PyPy interpreter. With caveats_ +listed below, it should be in theory within 20%-50% slower than a regular PyPy, comparing the JIT version in both cases. It is called STM for Software Transactional Memory, which is the internal technique used (see `Reference to implementation details`_). -What you get in exchange for this slow-down is that ``pypy-stm`` runs -any multithreaded Python program on multiple CPUs at once. Programs -running two threads or more in parallel should ideally run faster than -in a regular PyPy, either now or soon as issues are fixed. In one way, -that's all there is to it: this is a GIL-less Python, feel free to -`download and try it`__. However, the deeper idea behind the -``pypy-stm`` project is to improve what is so far the state-of-the-art -for using multiple CPUs, which for cases where separate processes don't -work is done by writing explicitly multi-threaded programs. Instead, -``pypy-stm`` is pushing forward an approach to *hide* the threads, as -described below in `atomic sections`_. +The benefit is that the resulting ``pypy-stm`` can execute multiple +threads of Python code in parallel. Programs running two threads or +more in parallel should ideally run faster than in a regular PyPy +(either now, or soon as bugs are fixed). +* ``pypy-stm`` is fully compatible with a GIL-based PyPy; you can use + it as a drop-in replacement and multithreaded programs will run on + multiple cores. -.. __: +* ``pypy-stm`` does not impose any special API to the user, but it + provides a new pure Python module called `transactional_memory`_ with + features to inspect the state or debug conflicts_ that prevent + parallelization. This module can also be imported on top of a non-STM + PyPy or CPython. -Current status -============== +* Building on top of the way the GIL is removed, we will talk + about `Atomic sections, Transactions, etc.: a better way to write + parallel programs`_. + + +Getting Started +=============== **pypy-stm requires 64-bit Linux for now.** Development is done in the branch `stmgc-c7`_. If you are only -interested in trying it out, you can download a Ubuntu 12.04 binary -here__ (``pypy-2.2.x-stm*.tar.bz2``; this version is a release mode, -but not stripped of debug symbols). The current version supports four -"segments", which means that it will run up to four threads in parallel, -in other words it is running a thread pool up to 4 threads emulating normal -threads. +interested in trying it out, you can download a Ubuntu binary here__ +(``pypy-2.3.x-stm*.tar.bz2``, Ubuntu 12.04-14.04; these versions are +release mode, but not stripped of debug symbols). The current version +supports four "segments", which means that it will run up to four +threads in parallel. To build a version from sources, you first need to compile a custom -version of clang; we recommend downloading `llvm and clang like -described here`__, but at revision 201645 (use ``svn co -r 201645 ...`` +version of clang(!); we recommend downloading `llvm and clang like +described here`__, but at revision 201645 (use ``svn co -r 201645 <path>`` for all checkouts). Then apply all the patches in `this directory`__: -they are fixes for the very extensive usage that pypy-stm does of a -clang-only feature (without them, you get crashes of clang). Then get +they are fixes for a clang-only feature that hasn't been used so heavily +in the past (without the patches, you get crashes of clang). Then get the branch `stmgc-c7`_ of PyPy and run:: rpython/bin/rpython -Ojit --stm pypy/goal/targetpypystandalone.py @@ -75,23 +82,26 @@ .. __: https://bitbucket.org/pypy/stmgc/src/default/c7/llvmfix/ -Caveats: +.. _caveats: -* So far, small examples work fine, but there are still a number of - bugs. We're busy fixing them. +Current status +-------------- + +* So far, small examples work fine, but there are still a few bugs. + We're busy fixing them as we find them; feel free to `report bugs`_. * Currently limited to 1.5 GB of RAM (this is just a parameter in - `core.h`__). Memory overflows are not detected correctly, so may - cause segmentation faults. + `core.h`__). Memory overflows are not correctly handled; they cause + segfaults. -* The JIT warm-up time is abysmal (as opposed to the regular PyPy's, - which is "only" bad). Moreover, you should run it with a command like - ``pypy-stm --jit trace_limit=60000 args...``; the default value of - 6000 for ``trace_limit`` is currently too low (6000 should become - reasonable again as we improve). Also, in order to produce machine - code, the JIT needs to enter a special single-threaded mode for now. - This all means that you *will* get very bad performance results if - your program doesn't run for *many* seconds for now. +* The JIT warm-up time improved recently but is still bad. In order to + produce machine code, the JIT needs to enter a special single-threaded + mode for now. This means that you will get bad performance results if + your program doesn't run for several seconds, where *several* can mean + *many.* When trying benchmarks, be sure to check that you have + reached the warmed state, i.e. the performance is not improving any + more. This should be clear from the fact that as long as it's + producing more machine code, ``pypy-stm`` will run on a single core. * The GC is new; although clearly inspired by PyPy's regular GC, it misses a number of optimizations for now. Programs allocating large @@ -108,111 +118,197 @@ * The STM system is based on very efficient read/write barriers, which are mostly done (their placement could be improved a bit in JIT-generated machine code). But the overall bookkeeping logic could - see more improvements (see Statistics_ below). - -* You can use `atomic sections`_, but the most visible missing thing is - that you don't get reports about the "conflicts" you get. This would - be the first thing that you need in order to start using atomic - sections more extensively. Also, for now: for better results, try to - explicitly force a transaction break just before (and possibly after) - each large atomic section, with ``time.sleep(0)``. + see more improvements (see `Low-level statistics`_ below). * Forking the process is slow because the complete memory needs to be - copied manually right now. + copied manually. A warning is printed to this effect. -* Very long-running processes should eventually crash on an assertion - error because of a non-implemented overflow of an internal 29-bit - number, but this requires at the very least ten hours --- more - probably, several days or more. +* Very long-running processes (on the order of days) will eventually + crash on an assertion error because of a non-implemented overflow of + an internal 29-bit number. .. _`report bugs`: https://bugs.pypy.org/ .. __: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/rpython/translator/stm/src_stm/stm/core.h -Statistics +User Guide ========== + -When a non-main thread finishes, you get statistics printed to stderr, -looking like that:: +Drop-in replacement +------------------- - thread 0x7f73377fe600: - outside transaction 42182 0.506 s - run current 85466 0.000 s - run committed 34262 3.178 s - run aborted write write 6982 0.083 s - run aborted write read 550 0.005 s - run aborted inevitable 388 0.010 s - run aborted other 0 0.000 s - wait free segment 0 0.000 s - wait write read 78 0.027 s - wait inevitable 887 0.490 s - wait other 0 0.000 s - bookkeeping 51418 0.606 s - minor gc 162970 1.135 s - major gc 1 0.019 s - sync pause 59173 1.738 s - spin loop 129512 0.094 s +Multithreaded, CPU-intensive Python programs should work unchanged on +``pypy-stm``. They will run using multiple CPU cores in parallel. -The first number is a counter; the second number gives the associated -time (the amount of real time that the thread was in this state; the sum -of all the times should be equal to the total time between the thread's -start and the thread's end). The most important points are "run -committed", which gives the amount of useful work, and "outside -transaction", which should give the time spent e.g. in library calls -(right now it seems to be a bit larger than that; to investigate). -Everything else is overhead of various forms. (Short-, medium- and -long-term future work involves reducing this overhead :-) +The existing semantics of the GIL (Global Interpreter Lock) are +unchanged: although running on multiple cores in parallel, ``pypy-stm`` +gives the illusion that threads are run serially, with switches only +occurring between bytecodes, not in the middle of them. Programs can +rely on this: using ``shared_list.append()/pop()`` or +``shared_dict.setdefault()`` as synchronization mecanisms continues to +work as expected. -These statistics are not printed out for the main thread, for now. +This works by internally considering the points where a standard PyPy or +CPython would release the GIL, and replacing them with the boundaries of +"transaction". Like their database equivalent, multiple transactions +can execute in parallel, but will commit in some serial order. They +appear to behave as if they were completely run in this serialization +order. Atomic sections -=============== +--------------- -While one of the goal of pypy-stm is to give a GIL-free but otherwise -unmodified Python, the other goal is to push for a better way to use -multithreading. For this, you (as the Python programmer) get an API -in the ``__pypy__.thread`` submodule: +PyPy supports *atomic sections,* which are blocks of code which you want +to execute without "releasing the GIL". *This is experimental and may +be removed in the future.* In STM terms, this means blocks of code that +are executed while guaranteeing that the transaction is not interrupted +in the middle. -* ``__pypy__.thread.atomic``: a context manager (i.e. you use it in - a ``with __pypy__.thread.atomic:`` statement). It runs the whole - block of code without breaking the current transaction --- from - the point of view of a regular CPython/PyPy, this is equivalent to - saying that the GIL will not be released at all between the start and - the end of this block of code. +Here is a usage example:: -The obvious usage is to use atomic blocks in the same way as one would -use locks: to protect changes to some shared data, you do them in a -``with atomic`` block, just like you would otherwise do them in a ``with -mylock`` block after ``mylock = thread.allocate_lock()``. This allows -you not to care about acquiring the correct locks in the correct order; -it is equivalent to having only one global lock. This is how -transactional memory is `generally described`__: as a way to efficiently -execute such atomic blocks, running them in parallel while giving the -illusion that they run in some serial order. + with __pypy__.thread.atomic: + assert len(lst1) == 10 + x = lst1.pop(0) + lst1.append(x) -.. __: http://en.wikipedia.org/wiki/Transactional_memory +In this (bad) example, we are sure that the item popped off one end of +the list is appened again at the other end atomically. It means that +another thread can run ``len(lst1)`` or ``x in lst1`` without any +particular synchronization, and always see the same results, +respectively ``10`` and ``True``. It will never see the intermediate +state where ``lst1`` only contains 9 elements. Atomic sections are +similar to re-entrant locks (they can be nested), but additionally they +protect against the concurrent execution of *any* code instead of just +code that happens to be protected by the same lock in other threads. -However, the less obvious intended usage of atomic sections is as a -wide-ranging replacement of explicit threads. You can turn a program -that is not multi-threaded at all into a program that uses threads -internally, together with large atomic sections to keep the behavior -unchanged. This capability can be hidden in a library or in the -framework you use; the end user's code does not need to be explicitly -aware of using threads. For a simple example of this, see -`transaction.py`_ in ``lib_pypy``. The idea is that if you have a -program where the function ``f(key, value)`` runs on every item of some -big dictionary, you can replace the loop with:: +Note that the notion of atomic sections is very strong. If you write +code like this:: + + with __pypy__.thread.atomic: + time.sleep(10) + +then, if you think about it as if we had a GIL, you are executing a +10-seconds-long atomic transaction without releasing the GIL at all. +This prevents all other threads from progressing at all. While it is +not strictly true in ``pypy-stm``, the exact rules for when other +threads can progress or not are rather complicated; you have to consider +it likely that such a piece of code will eventually block all other +threads anyway. + +Note that if you want to experiment with ``atomic``, you may have to add +manually a transaction break just before the atomic block. This is +because the boundaries of the block are not guaranteed to be the +boundaries of the transaction: the latter is at least as big as the +block, but maybe bigger. Therefore, if you run a big atomic block, it +is a good idea to break the transaction just before. This can be done +e.g. by the hack of calling ``time.sleep(0)``. (This may be fixed at +some point.) + +There are also issues with the interaction of locks and atomic blocks. +This can be seen if you write to files (which have locks), including +with a ``print`` to standard output. If one thread tries to acquire a +lock while running in an atomic block, and another thread has got the +same lock, then the former may fail with a ``thread.error``. The reason +is that "waiting" for some condition to become true --while running in +an atomic block-- does not really make sense. For now you can work +around it by making sure that, say, all your prints are either in an +``atomic`` block or none of them are. (This kind of issue is +theoretically hard to solve.) + + +Locks +----- + +**Not Implemented Yet** + +The thread module's locks have their basic semantic unchanged. However, +using them (e.g. in ``with my_lock:`` blocks) starts an alternative +running mode, called `Software lock elision`_. This means that PyPy +will try to make sure that the transaction extends until the point where +the lock is released, and if it succeeds, then the acquiring and +releasing of the lock will be "elided". This means that in this case, +the whole transaction will technically not cause any write into the lock +object --- it was unacquired before, and is still unacquired after the +transaction. + +This is specially useful if two threads run ``with my_lock:`` blocks +with the same lock. If they each run a transaction that is long enough +to contain the whole block, then all writes into the lock will be elided +and the two transactions will not conflict with each other. As usual, +they will be serialized in some order: one of the two will appear to run +before the other. Simply, each of them executes an "acquire" followed +by a "release" in the same transaction. As explained above, the lock +state goes from "unacquired" to "unacquired" and can thus be left +unchanged. + +This approach can gracefully fail: unlike atomic sections, there is no +guarantee that the transaction runs until the end of the block. If you +perform any input/output while you hold the lock, the transaction will +end as usual just before the input/output operation. If this occurs, +then the lock elision mode is cancelled and the lock's "acquired" state +is really written. + +Even if the lock is really acquired already, a transaction doesn't have +to wait for it to become free again. It can enter the elision-mode anyway +and tentatively execute the content of the block. It is only at the end, +when trying to commit, that the thread will pause. As soon as the real +value stored in the lock is switched back to "unacquired", it can then +proceed and attempt to commit its already-executed transaction (which +can fail and abort and restart from the scratch, as usual). + +Note that this is all *not implemented yet,* but we expect it to work +even if you acquire and release several locks. The elision-mode +transaction will extend until the first lock you acquired is released, +or until the code performs an input/output or a wait operation (for +example, waiting for another lock that is currently not free). In the +common case of acquiring several locks in nested order, they will all be +elided by the same transaction. + +.. _`software lock elision`: https://www.repository.cam.ac.uk/handle/1810/239410 + + +Atomic sections, Transactions, etc.: a better way to write parallel programs +---------------------------------------------------------------------------- + +(This section is based on locks as we plan to implement them, but also +works with the existing atomic sections.) + +In the cases where elision works, the block of code can run in parallel +with other blocks of code *even if they are protected by the same lock.* +You still get the illusion that the blocks are run sequentially. This +works even for multiple threads that run each a series of such blocks +and nothing else, protected by one single global lock. This is +basically the Python application-level equivalent of what was done with +the interpreter in ``pypy-stm``: while you think you are writing +thread-unfriendly code because of this global lock, actually the +underlying system is able to make it run on multiple cores anyway. + +This capability can be hidden in a library or in the framework you use; +the end user's code does not need to be explicitly aware of using +threads. For a simple example of this, there is `transaction.py`_ in +``lib_pypy``. The idea is that you write, or already have, some program +where the function ``f(key, value)`` runs on every item of some big +dictionary, say:: + + for key, value in bigdict.items(): + f(key, value) + +Then you simply replace the loop with:: for key, value in bigdict.items(): transaction.add(f, key, value) transaction.run() This code runs the various calls to ``f(key, value)`` using a thread -pool, but every single call is done in an atomic section. The end -result is that the behavior should be exactly equivalent: you don't get -any extra multithreading issue. +pool, but every single call is executed under the protection of a unique +lock. The end result is that the behavior is exactly equivalent --- in +fact it makes little sense to do it in this way on a non-STM PyPy or on +CPython. But on ``pypy-stm``, the various locked calls to ``f(key, +value)`` can tentatively be executed in parallel, even if the observable +result is as if they were executed in some serial order. This approach hides the notion of threads from the end programmer, including all the hard multithreading-related issues. This is not the @@ -223,41 +319,176 @@ only requires that the end programmer identifies where this parallelism is likely to be found, and communicates it to the system, using for example the ``transaction.add()`` scheme. - + .. _`transaction.py`: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/lib_pypy/transaction.py .. _OpenMP: http://en.wikipedia.org/wiki/OpenMP -================== -Other APIs in pypy-stm: +.. _`transactional_memory`: -* ``__pypy__.thread.getsegmentlimit()``: return the number of "segments" - in this pypy-stm. This is the limit above which more threads will not - be able to execute on more cores. (Right now it is limited to 4 due - to inter-segment overhead, but should be increased in the future. It +API of transactional_memory +--------------------------- + +The new pure Python module ``transactional_memory`` runs on both CPython +and PyPy, both with and without STM. It contains: + +* ``getsegmentlimit()``: return the number of "segments" in + this pypy-stm. This is the limit above which more threads will not be + able to execute on more cores. (Right now it is limited to 4 due to + inter-segment overhead, but should be increased in the future. It should also be settable, and the default value should depend on the - number of actual CPUs.) + number of actual CPUs.) If STM is not available, this returns 1. -* ``__pypy__.thread.exclusive_atomic``: same as ``atomic``, but - raises an exception if you attempt to nest it inside another - ``atomic``. +* ``print_abort_info(minimum_time=0.0)``: debugging help. Each thread + remembers the longest abort or pause it did because of cross-thread + contention_. This function prints it to ``stderr`` if the time lost + is greater than ``minimum_time`` seconds. The record is then + cleared, to make it ready for new events. This function returns + ``True`` if it printed a report, and ``False`` otherwise. -* ``__pypy__.thread.signals_enabled``: a context manager that runs - its block with signals enabled. By default, signals are only - enabled in the main thread; a non-main thread will not receive - signals (this is like CPython). Enabling signals in non-main threads - is useful for libraries where threads are hidden and the end user is - not expecting his code to run elsewhere than in the main thread. -Note that all of this API is (or will be) implemented in a regular PyPy -too: for example, ``with atomic`` will simply mean "don't release the -GIL" and ``getsegmentlimit()`` will return 1. +API of __pypy__.thread +---------------------- -================== +The ``__pypy__.thread`` submodule is a built-in module of PyPy that +contains a few internal built-in functions used by the +``transactional_memory`` module, plus the following: + +* ``__pypy__.thread.atomic``: a context manager to run a block in + fully atomic mode, without "releasing the GIL". (May be eventually + removed?) + +* ``__pypy__.thread.signals_enabled``: a context manager that runs its + block with signals enabled. By default, signals are only enabled in + the main thread; a non-main thread will not receive signals (this is + like CPython). Enabling signals in non-main threads is useful for + libraries where threads are hidden and the end user is not expecting + his code to run elsewhere than in the main thread. + + +.. _contention: + +Conflicts +--------- + +Based on Software Transactional Memory, the ``pypy-stm`` solution is +prone to "conflicts". To repeat the basic idea, threads execute their code +speculatively, and at known points (e.g. between bytecodes) they +coordinate with each other to agree on which order their respective +actions should be "committed", i.e. become globally visible. Each +duration of time between two commit-points is called a transaction. + +A conflict occurs when there is no consistent ordering. The classical +example is if two threads both tried to change the value of the same +global variable. In that case, only one of them can be allowed to +proceed, and the other one must be either paused or aborted (restarting +the transaction). If this occurs too often, parallelization fails. + +How much actual parallelization a multithreaded program can see is a bit +subtle. Basically, a program not using ``__pypy__.thread.atomic`` or +eliding locks, or doing so for very short amounts of time, will +parallelize almost freely (as long as it's not some artificial example +where, say, all threads try to increase the same global counter and do +nothing else). + +However, using if the program requires longer transactions, it comes +with less obvious rules. The exact details may vary from version to +version, too, until they are a bit more stabilized. Here is an +overview. + +Parallelization works as long as two principles are respected. The +first one is that the transactions must not *conflict* with each other. +The most obvious sources of conflicts are threads that all increment a +global shared counter, or that all store the result of their +computations into the same list --- or, more subtly, that all ``pop()`` +the work to do from the same list, because that is also a mutation of +the list. (It is expected that some STM-aware library will eventually +be designed to help with conflict problems, like a STM-aware queue.) + +A conflict occurs as follows: when a transaction commits (i.e. finishes +successfully) it may cause other transactions that are still in progress +to abort and retry. This is a waste of CPU time, but even in the worst +case senario it is not worse than a GIL, because at least one +transaction succeeds (so we get at worst N-1 CPUs doing useless jobs and +1 CPU doing a job that commits successfully). + +Conflicts do occur, of course, and it is pointless to try to avoid them +all. For example they can be abundant during some warm-up phase. What +is important is to keep them rare enough in total. + +Another issue is that of avoiding long-running so-called "inevitable" +transactions ("inevitable" is taken in the sense of "which cannot be +avoided", i.e. transactions which cannot abort any more). Transactions +like that should only occur if you use ``__pypy__.thread.atomic``, +generally become of I/O in atomic blocks. They work, but the +transaction is turned inevitable before the I/O is performed. For all +the remaining execution time of the atomic block, they will impede +parallel work. The best is to organize the code so that such operations +are done completely outside ``__pypy__.thread.atomic``. + +(This is related to the fact that blocking I/O operations are +discouraged with Twisted, and if you really need them, you should do +them on their own separate thread.) + +In case of lock elision, we don't get long-running inevitable +transactions, but a different problem can occur: doing I/O cancels lock +elision, and the lock turns into a real lock, preventing other threads +from committing if they also need this lock. (More about it when lock +elision is implemented and tested.) + + + +Implementation +============== + +XXX this section mostly empty for now + + +Low-level statistics +-------------------- + +When a non-main thread finishes, you get low-level statistics printed to +stderr, looking like that:: + + thread 0x7f73377fe600: + outside transaction 42182 0.506 s + run current 85466 0.000 s + run committed 34262 3.178 s + run aborted write write 6982 0.083 s + run aborted write read 550 0.005 s + run aborted inevitable 388 0.010 s + run aborted other 0 0.000 s + wait free segment 0 0.000 s + wait write read 78 0.027 s + wait inevitable 887 0.490 s + wait other 0 0.000 s + sync commit soon 1 0.000 s + bookkeeping 51418 0.606 s + minor gc 162970 1.135 s + major gc 1 0.019 s + sync pause 59173 1.738 s + longest recordered marker 0.000826 s + "File "x.py", line 5, in f" + +On each line, the first number is a counter, and the second number gives +the associated time --- the amount of real time that the thread was in +this state. The sum of all the times should be equal to the total time +between the thread's start and the thread's end. The most important +points are "run committed", which gives the amount of useful work, and +"outside transaction", which should give the time spent e.g. in library +calls (right now it seems to be larger than that; to investigate). The +various "run aborted" and "wait" entries are time lost due to +conflicts_. Everything else is overhead of various forms. (Short-, +medium- and long-term future work involves reducing this overhead :-) + +The last two lines are special; they are an internal marker read by +``transactional_memory.print_abort_info()``. + +These statistics are not printed out for the main thread, for now. Reference to implementation details -=================================== +----------------------------------- The core of the implementation is in a separate C library called stmgc_, in the c7_ subdirectory. Please see the `README.txt`_ for more @@ -282,3 +513,15 @@ .. __: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/rpython/translator/stm/src_stm/stmgcintf.c .. __: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/rpython/jit/backend/llsupport/stmrewrite.py .. __: https://bitbucket.org/pypy/pypy/raw/stmgc-c7/rpython/jit/backend/x86/assembler.py + + + +See also +======== + +See also +https://bitbucket.org/pypy/pypy/raw/default/pypy/doc/project-ideas.rst +(section about STM). + + +.. include:: _ref.txt diff --git a/pypy/doc/whatsnew-2.3.0.rst b/pypy/doc/whatsnew-2.3.0.rst --- a/pypy/doc/whatsnew-2.3.0.rst +++ b/pypy/doc/whatsnew-2.3.0.rst @@ -167,3 +167,6 @@ .. branch: fix-tpname Changes hacks surrounding W_TypeObject.name to match CPython's tp_name + +.. branch: tkinter_osx_packaging +OS/X specific header path diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -3,4 +3,4 @@ ======================= .. this is a revision shortly after release-2.3.x -.. startrev: ec864bd08d50 +.. startrev: b2cc67adbaad diff --git a/pypy/interpreter/test/test_app_main.py b/pypy/interpreter/test/test_app_main.py --- a/pypy/interpreter/test/test_app_main.py +++ b/pypy/interpreter/test/test_app_main.py @@ -7,6 +7,11 @@ from rpython.tool.udir import udir from contextlib import contextmanager from pypy.conftest import pypydir +from pypy.module.sys.version import PYPY_VERSION +from lib_pypy._pypy_interact import irc_header + +is_release = PYPY_VERSION[3] == "final" + banner = sys.version.splitlines()[0] @@ -241,6 +246,10 @@ child = self.spawn([]) child.expect('Python ') # banner child.expect('>>> ') # prompt + if is_release: + assert irc_header not in child.before + else: + assert irc_header in child.before child.sendline('[6*7]') child.expect(re.escape('[42]')) child.sendline('def f(x):') diff --git a/pypy/module/fcntl/interp_fcntl.py b/pypy/module/fcntl/interp_fcntl.py --- a/pypy/module/fcntl/interp_fcntl.py +++ b/pypy/module/fcntl/interp_fcntl.py @@ -62,8 +62,8 @@ fcntl_int = external('fcntl', [rffi.INT, rffi.INT, rffi.INT], rffi.INT) fcntl_str = external('fcntl', [rffi.INT, rffi.INT, rffi.CCHARP], rffi.INT) fcntl_flock = external('fcntl', [rffi.INT, rffi.INT, _flock], rffi.INT) -ioctl_int = external('ioctl', [rffi.INT, rffi.INT, rffi.INT], rffi.INT) -ioctl_str = external('ioctl', [rffi.INT, rffi.INT, rffi.CCHARP], rffi.INT) +ioctl_int = external('ioctl', [rffi.INT, rffi.UINT, rffi.INT], rffi.INT) +ioctl_str = external('ioctl', [rffi.INT, rffi.UINT, rffi.CCHARP], rffi.INT) has_flock = cConfig.has_flock if has_flock: diff --git a/pypy/module/fcntl/test/test_fcntl.py b/pypy/module/fcntl/test/test_fcntl.py --- a/pypy/module/fcntl/test/test_fcntl.py +++ b/pypy/module/fcntl/test/test_fcntl.py @@ -11,7 +11,9 @@ os.unlink(i) class AppTestFcntl: - spaceconfig = dict(usemodules=('fcntl', 'array', 'struct', 'termios', 'select', 'rctime')) + spaceconfig = dict(usemodules=('fcntl', 'array', 'struct', 'termios', + 'select', 'rctime')) + def setup_class(cls): tmpprefix = str(udir.ensure('test_fcntl', dir=1).join('tmp_')) cls.w_tmp = cls.space.wrap(tmpprefix) @@ -267,6 +269,31 @@ os.close(mfd) os.close(sfd) + def test_ioctl_signed_unsigned_code_param(self): + import fcntl + import os + import pty + import struct + import termios + + mfd, sfd = pty.openpty() + try: + if termios.TIOCSWINSZ < 0: + set_winsz_opcode_maybe_neg = termios.TIOCSWINSZ + set_winsz_opcode_pos = termios.TIOCSWINSZ & 0xffffffffL + else: + set_winsz_opcode_pos = termios.TIOCSWINSZ + set_winsz_opcode_maybe_neg, = struct.unpack("i", + struct.pack("I", termios.TIOCSWINSZ)) + + our_winsz = struct.pack("HHHH",80,25,0,0) + # test both with a positive and potentially negative ioctl code + new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_pos, our_winsz) + new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_maybe_neg, our_winsz) + finally: + os.close(mfd) + os.close(sfd) + def test_large_flag(self): import sys if any(plat in sys.platform diff --git a/pypy/module/operator/test/test_operator.py b/pypy/module/operator/test/test_operator.py --- a/pypy/module/operator/test/test_operator.py +++ b/pypy/module/operator/test/test_operator.py @@ -195,4 +195,5 @@ import operator assert operator.index(42) == 42 assert operator.__index__(42) == 42 - raises(TypeError, operator.index, "abc") + exc = raises(TypeError, operator.index, "abc") + assert str(exc.value) == "'str' object cannot be interpreted as an index" diff --git a/pypy/objspace/descroperation.py b/pypy/objspace/descroperation.py --- a/pypy/objspace/descroperation.py +++ b/pypy/objspace/descroperation.py @@ -794,13 +794,18 @@ l = ["space.isinstance_w(w_result, %s)" % x for x in checkerspec] checker = " or ".join(l) + if targetname == 'index': + msg = "'%%T' object cannot be interpreted as an index" + else: + msg = "unsupported operand type for %(targetname)s(): '%%T'" + msg = msg % locals() source = """if 1: def %(targetname)s(space, w_obj): w_impl = space.lookup(w_obj, %(specialname)r) if w_impl is None: raise oefmt(space.w_TypeError, - "unsupported operand type for %(targetname)s(): " - "'%%T'", w_obj) + %(msg)r, + w_obj) w_result = space.get_and_call_function(w_impl, w_obj) if %(checker)s: diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py --- a/pypy/tool/release/force-builds.py +++ b/pypy/tool/release/force-builds.py @@ -20,11 +20,12 @@ 'own-linux-x86-32', 'own-linux-x86-64', 'own-linux-armhf', + 'own-win-x86-32', # 'own-macosx-x86-32', # 'pypy-c-app-level-linux-x86-32', # 'pypy-c-app-level-linux-x86-64', # 'pypy-c-stackless-app-level-linux-x86-32', - 'pypy-c-app-level-win-x86-32', +# 'pypy-c-app-level-win-x86-32', 'pypy-c-jit-linux-x86-32', 'pypy-c-jit-linux-x86-64', 'pypy-c-jit-macosx-x86-64', diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -17,13 +17,8 @@ if sys.platform.startswith("linux"): DEFL_ROOTFINDER_WITHJIT = "asmgcc" - ROOTFINDERS = ["n/a", "shadowstack", "asmgcc"] -elif compiler.name == 'msvc': - DEFL_ROOTFINDER_WITHJIT = "shadowstack" - ROOTFINDERS = ["n/a", "shadowstack"] else: DEFL_ROOTFINDER_WITHJIT = "shadowstack" - ROOTFINDERS = ["n/a", "shadowstack", "asmgcc"] IS_64_BITS = sys.maxint > 2147483647 @@ -91,7 +86,7 @@ default=IS_64_BITS, cmdline="--gcremovetypeptr"), ChoiceOption("gcrootfinder", "Strategy for finding GC Roots (framework GCs only)", - ROOTFINDERS, + ["n/a", "shadowstack", "asmgcc"], "shadowstack", cmdline="--gcrootfinder", requires={ @@ -372,9 +367,10 @@ # if we have specified strange inconsistent settings. config.translation.gc = config.translation.gc - # disallow asmgcc on OS/X + # disallow asmgcc on OS/X and on Win32 if config.translation.gcrootfinder == "asmgcc": - assert sys.platform != "darwin" + assert sys.platform != "darwin", "'asmgcc' not supported on OS/X" + assert sys.platform != "win32", "'asmgcc' not supported on Win32" # ---------------------------------------------------------------- diff --git a/rpython/translator/c/gcc/trackgcroot.py b/rpython/translator/c/gcc/trackgcroot.py --- a/rpython/translator/c/gcc/trackgcroot.py +++ b/rpython/translator/c/gcc/trackgcroot.py @@ -296,10 +296,11 @@ # trim: instructions with no framesize are removed from self.insns, # and from the 'previous_insns' lists - assert hasattr(self.insns[0], 'framesize') - old = self.insns[1:] - del self.insns[1:] - for insn in old: + if 0: # <- XXX disabled because it seems bogus, investigate more + assert hasattr(self.insns[0], 'framesize') + old = self.insns[1:] + del self.insns[1:] + for insn in old: if hasattr(insn, 'framesize'): self.insns.append(insn) insn.previous_insns = [previnsn for previnsn in insn.previous_insns diff --git a/rpython/translator/c/src/asm.c b/rpython/translator/c/src/asm.c --- a/rpython/translator/c/src/asm.c +++ b/rpython/translator/c/src/asm.c @@ -12,6 +12,6 @@ # include "src/asm_ppc.c" #endif -#if defined(MS_WINDOWS) && defined(_MSC_VER) +#if defined(_MSC_VER) # include "src/asm_msvc.c" #endif diff --git a/rpython/translator/c/src/asm_msvc.c b/rpython/translator/c/src/asm_msvc.c --- a/rpython/translator/c/src/asm_msvc.c +++ b/rpython/translator/c/src/asm_msvc.c @@ -1,5 +1,6 @@ #ifdef PYPY_X86_CHECK_SSE2 #include <intrin.h> +#include <stdio.h> void pypy_x86_check_sse2(void) { int features; _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit