Author: Ronan Lamy <[email protected]>
Branch: testing-cleanup
Changeset: r85081:80829afb3cac
Date: 2016-06-10 15:18 +0100
http://bitbucket.org/pypy/pypy/changeset/80829afb3cac/
Log: hg merge default
diff too long, truncating to 2000 out of 2410 lines
diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -25,3 +25,4 @@
80ef432a32d9baa4b3c5a54c215e8ebe499f6374 release-5.1.2
40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2
40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2
+c09c19272c990a0611b17569a0085ad1ab00c8ff release-pypy2.7-v5.3
diff --git a/pypy/doc/release-pypy2.7-v5.3.0.rst
b/pypy/doc/release-pypy2.7-v5.3.0.rst
--- a/pypy/doc/release-pypy2.7-v5.3.0.rst
+++ b/pypy/doc/release-pypy2.7-v5.3.0.rst
@@ -176,8 +176,8 @@
* Reduce the size of generated code by using the same function objects in
all generated subclasses
- * Share cpyext Py* function wrappers according to the signature, shrining the
- translated libpypy.so by about
+ * Share cpyext Py* function wrappers according to the signature, shrinking
the
+ translated libpypy.so by about 10% (measured without the JIT)
* Compile c snippets with -Werror, and fix warnings it exposed
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,3 +5,20 @@
.. this is a revision shortly after release-pypy2.7-v5.3
.. startrev: 873218a739f1
+.. branch: fix-gen-dfa
+
+Resolves an issue with the generator script to build the dfa for Python syntax.
+
+.. branch: z196-support
+
+Fixes a critical issue in the register allocator and extends support on s390x.
+PyPy runs and translates on the s390x revisions z10 (released February 2008,
experimental)
+and z196 (released August 2010) in addition to zEC12 and z13.
+To target e.g. z196 on a zEC12 machine supply CFLAGS="-march=z196" to your
shell environment.
+
+.. branch: s390x-5.3-catchup
+
+Implement the backend related changes for s390x.
+
+.. branch: incminimark-ll_assert
+.. branch: vmprof-openbsd
diff --git a/pypy/interpreter/pyparser/genpytokenize.py
b/pypy/interpreter/pyparser/genpytokenize.py
--- a/pypy/interpreter/pyparser/genpytokenize.py
+++ b/pypy/interpreter/pyparser/genpytokenize.py
@@ -191,7 +191,7 @@
newArcPair(states, EMPTY),
pseudoExtras, number, funny, contStr, name))
dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken)
- return DFA(dfaStates, dfaAccepts)
+ return DFA(dfaStates, dfaAccepts), dfaStates
# ______________________________________________________________________
@@ -205,7 +205,9 @@
newArcPair(states, DEFAULT),
any(states, notGroupStr(states, "'\\")))),
newArcPair(states, "'"))
- singleDFA = DFA(*nfaToDfa(states, *single))
+ states, accepts = nfaToDfa(states, *single)
+ singleDFA = DFA(states, accepts)
+ states_singleDFA = states
states = []
double = chain(states,
any(states, notGroupStr(states, '"\\')),
@@ -215,7 +217,9 @@
newArcPair(states, DEFAULT),
any(states, notGroupStr(states, '"\\')))),
newArcPair(states, '"'))
- doubleDFA = DFA(*nfaToDfa(states, *double))
+ states, accepts = nfaToDfa(states, *double)
+ doubleDFA = DFA(states, accepts)
+ states_doubleDFA = states
states = []
single3 = chain(states,
any(states, notGroupStr(states, "'\\")),
@@ -230,7 +234,9 @@
notChainStr(states, "''"))),
any(states, notGroupStr(states, "'\\")))),
chainStr(states, "'''"))
- single3DFA = NonGreedyDFA(*nfaToDfa(states, *single3))
+ states, accepts = nfaToDfa(states, *single3)
+ single3DFA = NonGreedyDFA(states, accepts)
+ states_single3DFA = states
states = []
double3 = chain(states,
any(states, notGroupStr(states, '"\\')),
@@ -245,9 +251,11 @@
notChainStr(states, '""'))),
any(states, notGroupStr(states, '"\\')))),
chainStr(states, '"""'))
- double3DFA = NonGreedyDFA(*nfaToDfa(states, *double3))
- map = {"'" : singleDFA,
- '"' : doubleDFA,
+ states, accepts = nfaToDfa(states, *double3)
+ double3DFA = NonGreedyDFA(states, accepts)
+ states_double3DFA = states
+ map = {"'" : (singleDFA, states_singleDFA),
+ '"' : (doubleDFA, states_doubleDFA),
"r" : None,
"R" : None,
"u" : None,
@@ -257,25 +265,30 @@
for uniPrefix in ("", "u", "U", "b", "B", ):
for rawPrefix in ("", "r", "R"):
prefix = uniPrefix + rawPrefix
- map[prefix + "'''"] = single3DFA
- map[prefix + '"""'] = double3DFA
+ map[prefix + "'''"] = (single3DFA, states_single3DFA)
+ map[prefix + '"""'] = (double3DFA, states_double3DFA)
return map
# ______________________________________________________________________
-def output(name, dfa_class, dfa):
+def output(name, dfa_class, dfa, states):
import textwrap
+ lines = []
i = 0
for line in textwrap.wrap(repr(dfa.accepts), width = 50):
if i == 0:
- print "accepts =", line
+ lines.append("accepts = ")
else:
- print " ", line
+ lines.append(" ")
+ lines.append(line)
+ lines.append("\n")
i += 1
import StringIO
- print "states = ["
- for numstate, state in enumerate(dfa.states):
- print " #", numstate
+ lines.append("states = [\n")
+ for numstate, state in enumerate(states):
+ lines.append(" # ")
+ lines.append(str(numstate))
+ lines.append('\n')
s = StringIO.StringIO()
i = 0
for k, v in sorted(state.items()):
@@ -298,22 +311,28 @@
for line in text:
line = line.replace('::', ': ')
if i == 0:
- print ' {' + line
+ lines.append(' {')
else:
- print ' ' + line
+ lines.append(' ')
+ lines.append(line)
+ lines.append('\n')
i += 1
- print " ]"
- print "%s = automata.%s(states, accepts)" % (name, dfa_class)
- print
+ lines.append(" ]\n")
+ lines.append("%s = automata.%s(states, accepts)\n" % (name, dfa_class))
+ return ''.join(lines)
def main ():
- pseudoDFA = makePyPseudoDFA()
- output("pseudoDFA", "DFA", pseudoDFA)
+ pseudoDFA, states_pseudoDFA = makePyPseudoDFA()
+ print output("pseudoDFA", "DFA", pseudoDFA, states_pseudoDFA)
endDFAMap = makePyEndDFAMap()
- output("double3DFA", "NonGreedyDFA", endDFAMap['"""'])
- output("single3DFA", "NonGreedyDFA", endDFAMap["'''"])
- output("singleDFA", "DFA", endDFAMap["'"])
- output("doubleDFA", "DFA", endDFAMap['"'])
+ dfa, states = endDFAMap['"""']
+ print output("double3DFA", "NonGreedyDFA", dfa, states)
+ dfa, states = endDFAMap["'''"]
+ print output("single3DFA", "NonGreedyDFA", dfa, states)
+ dfa, states = endDFAMap["'"]
+ print output("singleDFA", "DFA", dfa, states)
+ dfa, states = endDFAMap["\""]
+ print output("doubleDFA", "DFA", dfa, states)
# ______________________________________________________________________
diff --git a/pypy/interpreter/pyparser/test/test_gendfa.py
b/pypy/interpreter/pyparser/test/test_gendfa.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/pyparser/test/test_gendfa.py
@@ -0,0 +1,16 @@
+from pypy.interpreter.pyparser.automata import DFA, DEFAULT
+from pypy.interpreter.pyparser.genpytokenize import output
+
+def test_states():
+ states = [{"\x00": 1}, {"\x01": 0}]
+ d = DFA(states[:], [False, True])
+ assert output('test', DFA, d, states) == """\
+accepts = [False, True]
+states = [
+ # 0
+ {'\\x00': 1},
+ # 1
+ {'\\x01': 0},
+ ]
+test = automata.pypy.interpreter.pyparser.automata.DFA(states, accepts)
+"""
diff --git a/pypy/module/__pypy__/interp_intop.py
b/pypy/module/__pypy__/interp_intop.py
--- a/pypy/module/__pypy__/interp_intop.py
+++ b/pypy/module/__pypy__/interp_intop.py
@@ -2,21 +2,10 @@
from rpython.rtyper.lltypesystem import lltype
from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.rlib.rarithmetic import r_uint, intmask
+from rpython.rlib.rarithmetic import int_c_div, int_c_mod
from rpython.rlib import jit
-# XXX maybe temporary: hide llop.int_{floordiv,mod} from the JIT,
-# because now it expects only Python-style divisions, not the
-# C-style divisions of these two ll operations
[email protected]_look_inside
-def _int_floordiv(n, m):
- return llop.int_floordiv(lltype.Signed, n, m)
-
[email protected]_look_inside
-def _int_mod(n, m):
- return llop.int_mod(lltype.Signed, n, m)
-
-
@unwrap_spec(n=int, m=int)
def int_add(space, n, m):
return space.wrap(llop.int_add(lltype.Signed, n, m))
@@ -31,11 +20,11 @@
@unwrap_spec(n=int, m=int)
def int_floordiv(space, n, m):
- return space.wrap(_int_floordiv(n, m))
+ return space.wrap(int_c_div(n, m))
@unwrap_spec(n=int, m=int)
def int_mod(space, n, m):
- return space.wrap(_int_mod(n, m))
+ return space.wrap(int_c_mod(n, m))
@unwrap_spec(n=int, m=int)
def int_lshift(space, n, m):
diff --git a/pypy/module/_cffi_backend/ccallback.py
b/pypy/module/_cffi_backend/ccallback.py
--- a/pypy/module/_cffi_backend/ccallback.py
+++ b/pypy/module/_cffi_backend/ccallback.py
@@ -220,6 +220,11 @@
if rffi.cast(lltype.Signed, res) != clibffi.FFI_OK:
raise oefmt(space.w_SystemError,
"libffi failed to build this callback")
+ if closure_ptr.c_user_data != unique_id:
+ raise oefmt(space.w_SystemError,
+ "ffi_prep_closure(): bad user_data (it seems that the "
+ "version of the libffi library seen at runtime is "
+ "different from the 'ffi.h' file seen at compile-time)")
def py_invoke(self, ll_res, ll_args):
jitdriver1.jit_merge_point(callback=self,
diff --git a/pypy/module/_cffi_backend/func.py
b/pypy/module/_cffi_backend/func.py
--- a/pypy/module/_cffi_backend/func.py
+++ b/pypy/module/_cffi_backend/func.py
@@ -201,6 +201,9 @@
else:
copy_string_to_raw(llstr(src_string), dest_data, 0, n)
else:
+ # nowadays this case should be rare or impossible: as far as
+ # I know, all common types implementing the *writable* buffer
+ # interface now support get_raw_address()
if src_is_ptr:
for i in range(n):
dest_buf.setitem(i, src_data[i])
diff --git a/pypy/module/cpyext/include/pymem.h
b/pypy/module/cpyext/include/pymem.h
--- a/pypy/module/cpyext/include/pymem.h
+++ b/pypy/module/cpyext/include/pymem.h
@@ -1,5 +1,11 @@
#include <stdlib.h>
+#ifndef Py_PYMEM_H
+#define Py_PYMEM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
#define PyMem_MALLOC(n) malloc((n) ? (n) : 1)
#define PyMem_REALLOC(p, n) realloc((p), (n) ? (n) : 1)
@@ -44,3 +50,9 @@
*/
#define PyMem_Del PyMem_Free
#define PyMem_DEL PyMem_FREE
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !Py_PYMEM_H */
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py
b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -23,7 +23,7 @@
guard_true(i14, descr=...)
guard_not_invalidated(descr=...)
i16 = int_eq(i6, %d)
- i19 = call_i(ConstClass(ll_int_mod__Signed_Signed), i6, i10,
descr=<Calli . ii EF=0 OS=14>)
+ i19 = call_i(ConstClass(ll_int_py_mod__Signed_Signed), i6, i10,
descr=<Calli . ii EF=0 OS=14>)
i21 = int_lt(i19, 0)
guard_false(i21, descr=...)
i22 = int_ge(i19, i10)
diff --git a/pypy/module/select/test/test_epoll.py
b/pypy/module/select/test/test_epoll.py
--- a/pypy/module/select/test/test_epoll.py
+++ b/pypy/module/select/test/test_epoll.py
@@ -20,6 +20,10 @@
self.w_sockets = self.space.wrap([])
if platform.machine().startswith('arm'):
self.w_timeout = self.space.wrap(0.06)
+ if platform.machine().startswith('s390x'):
+ # s390x is not slow, but it seems there is one case when epoll
+ # modify method is called that takes longer on s390x
+ self.w_timeout = self.space.wrap(0.06)
else:
self.w_timeout = self.space.wrap(0.02)
diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -3,10 +3,12 @@
It uses 'pypy/goal/pypy-c' and parts of the rest of the working
copy. Usage:
- package.py [--options] pypy-VER-PLATFORM
+ package.py [--options] --archive-name=pypy-VER-PLATFORM
The output is found in the directory from --builddir,
by default /tmp/usession-YOURNAME/build/.
+
+For a list of all options, see 'package.py --help'.
"""
import shutil
@@ -61,6 +63,7 @@
name = options.name
if not name:
name = 'pypy-nightly'
+ assert '/' not in name
rename_pypy_c = options.pypy_c
override_pypy_c = options.override_pypy_c
@@ -288,26 +291,12 @@
help='destination dir for archive')
parser.add_argument('--override_pypy_c', type=str, default='',
help='use as pypy exe instead of pypy/goal/pypy-c')
- # Positional arguments, for backward compatability with buldbots
- parser.add_argument('extra_args', help='optional interface to positional
arguments', nargs=argparse.REMAINDER,
- metavar='[archive-name] [rename_pypy_c] [targetdir] [override_pypy_c]',
- )
options = parser.parse_args(args)
- # Handle positional arguments, choke if both methods are used
- for i,target, default in ([1, 'name', ''], [2, 'pypy_c', pypy_exe],
- [3, 'targetdir', ''], [4,'override_pypy_c', '']):
- if len(options.extra_args)>i:
- if getattr(options, target) != default:
- print 'positional argument',i,target,'already has
value',getattr(options, target)
- parser.print_help()
- return
- setattr(options, target, options.extra_args[i])
if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"):
options.nostrip = True
-
if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"):
- options.tk = True
+ options.no_tk = True
if not options.builddir:
# The import actually creates the udir directory
from rpython.tool.udir import udir
diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh
--- a/pypy/tool/release/repackage.sh
+++ b/pypy/tool/release/repackage.sh
@@ -3,7 +3,7 @@
min=3
rev=0
branchname=release-$maj.x # ==OR== release-$maj.$min.x
-tagname=release-$maj.$min.$rev # ==OR== release-$maj.$min
+tagname=release-pypy2.7-v$maj.$min # ==OR== release-$maj.$min
echo checking hg log -r $branchname
hg log -r $branchname || exit 1
@@ -34,17 +34,19 @@
plat=win32
wget http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.zip
unzip pypy-c-jit-latest-$plat.zip
+rm pypy-c-jit-latest-$plat.zip
mv pypy-c-jit-*-$plat $rel-$plat
-zip -r $rel-$plat.zip $rel-$plat
+zip -rq $rel-$plat.zip $rel-$plat
rm -rf $rel-$plat
# Do this after creating a tag, note the untarred directory is pypy-pypy-<hash>
# so make sure there is not another one
wget https://bitbucket.org/pypy/pypy/get/$tagname.tar.bz2
tar -xf $tagname.tar.bz2
+rm $tagname.tar.bz2
mv pypy-pypy-* $rel-src
tar --owner=root --group=root --numeric-owner -cjf $rel-src.tar.bz2 $rel-src
-zip -r $rel-src.zip $rel-src
+zip -rq $rel-src.zip $rel-src
rm -rf $rel-src
# Print out the md5, sha1, sha256
diff --git a/pypy/tool/release/test/test_package.py
b/pypy/tool/release/test/test_package.py
--- a/pypy/tool/release/test/test_package.py
+++ b/pypy/tool/release/test/test_package.py
@@ -21,8 +21,10 @@
def test_dir_structure(self, test='test'):
retval, builddir = package.package(
- '--without-cffi', str(py.path.local(pypydir).dirpath()),
- test, self.rename_pypy_c, _fake=True)
+ '--without-cffi',
+ '--archive-name', test,
+ '--rename_pypy_c', self.rename_pypy_c,
+ _fake=True)
assert retval == 0
prefix = builddir.join(test)
cpyver = '%d.%d' % CPYTHON_VERSION[:2]
@@ -71,8 +73,9 @@
builddir = udir.ensure("build", dir=True)
retval, builddir = package.package(
'--without-cffi', '--builddir', str(builddir),
- str(py.path.local(pypydir).dirpath()),
- test, self.rename_pypy_c, _fake=True)
+ '--archive-name', test,
+ '--rename_pypy_c', self.rename_pypy_c,
+ _fake=True)
def test_with_zipfile_module(self):
prev = package.USE_ZIPFILE_MODULE
diff --git a/rpython/annotator/test/test_annrpython.py
b/rpython/annotator/test/test_annrpython.py
--- a/rpython/annotator/test/test_annrpython.py
+++ b/rpython/annotator/test/test_annrpython.py
@@ -4610,6 +4610,19 @@
a.build_types(fd, [])
py.test.raises(AnnotatorError, a.build_types, fb, [])
+ def test_annotate_generator_with_unreachable_yields(self):
+ def f(n):
+ if n < 0:
+ yield 42
+ yield n
+ yield n
+ def main(n):
+ for x in f(abs(n)):
+ pass
+ #
+ a = self.RPythonAnnotator()
+ a.build_types(main, [int])
+
def g(n):
return [0, 1, 2, n]
diff --git a/rpython/doc/arch/index.rst b/rpython/doc/arch/index.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/arch/index.rst
@@ -0,0 +1,11 @@
+.. _arch_index:
+
+Architecture specific notes
+===========================
+
+Here you can find some architecture specific notes.
+
+.. toctree::
+ :maxdepth: 1
+
+ s390x
diff --git a/rpython/doc/arch/s390x.rst b/rpython/doc/arch/s390x.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/arch/s390x.rst
@@ -0,0 +1,34 @@
+.. _s390x:
+
+IBM Mainframe S390X
+===================
+
+Our JIT implements the 64 bit version of the IBM Mainframe called s390x.
+Note that this architecture is big endian.
+
+Currently supported ISAs:
+
+* z13 (released January 2015)
+* zEC12 (released September 2012)
+* z196 (released August 2010)
+* z10 (released February 2008)
+
+To check if all the necessary CPU facilities are installed
+on the subject machine, please run the test using a copy of the pypy
+source code::
+
+ $ ./pytest.py rpython/jit/backend/zarch/test/test_assembler -v -k
'test_facility'
+
+In addition you can run the auto encoding test to check if your Linux GCC tool
chain
+is able to compile all instructions used in the JIT backend::
+
+ $ ./pytest.py rpython/jit/backend/zarch/test/test_auto_encoding.py -v
+
+Translating
+-----------
+
+Specifically check for these two dependencies. On old versions of some
+Linux distributions ship older versions.
+
+* libffi (version should do > 3.0.+).
+* CPython 2.7.+.
diff --git a/rpython/doc/index.rst b/rpython/doc/index.rst
--- a/rpython/doc/index.rst
+++ b/rpython/doc/index.rst
@@ -37,7 +37,6 @@
arm
logging
- s390x
Writing your own interpreter in RPython
@@ -61,6 +60,7 @@
getting-started
dir-reference
jit/index
+ arch/index
translation
rtyper
garbage_collection
diff --git a/rpython/doc/s390x.rst b/rpython/doc/s390x.rst
deleted file mode 100644
--- a/rpython/doc/s390x.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-.. _s390x:
-
-S390X JIT Backend
-=================
-
-Our JIT implements the 64 bit version of the IBM Mainframe called s390x.
-Note that this architecture is big endian.
-
-The following facilities need to be installed to operate
-correctly (all of the machines used for development these where installed):
-
-* General-Instructions-Extension
-* Long-Displacement
-* Binary Floating Point (IEEE)
-
-Translating
------------
-
-Ensure that libffi is installed (version should do > 3.0.+).
-CPython should be version 2.7.+.
diff --git a/rpython/flowspace/generator.py b/rpython/flowspace/generator.py
--- a/rpython/flowspace/generator.py
+++ b/rpython/flowspace/generator.py
@@ -132,13 +132,14 @@
del block.operations[index]
newlink = split_block(block, index)
newblock = newlink.target
+ varnames = get_variable_names(newlink.args)
#
class Resume(AbstractPosition):
_immutable_ = True
+ _attrs_ = varnames
block = newblock
Resume.__name__ = 'Resume%d' % len(mappings)
mappings.append(Resume)
- varnames = get_variable_names(newlink.args)
#
_insert_reads(newblock, varnames)
#
diff --git a/rpython/jit/backend/arm/regalloc.py
b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -901,6 +901,8 @@
size_box = op.getarg(0)
assert isinstance(size_box, ConstInt)
size = size_box.getint()
+ # hint: try to move unrelated registers away from r0 and r1 now
+ self.rm.spill_or_move_registers_before_call([r.r0, r.r1])
self.rm.force_allocate_reg(op, selected_reg=r.r0)
t = TempInt()
@@ -924,6 +926,7 @@
# sizeloc must be in a register, but we can free it now
# (we take care explicitly of conflicts with r0 or r1)
sizeloc = self.rm.make_sure_var_in_reg(size_box)
+ self.rm.spill_or_move_registers_before_call([r.r0, r.r1]) # sizeloc
safe
self.rm.possibly_free_var(size_box)
#
self.rm.force_allocate_reg(op, selected_reg=r.r0)
@@ -951,6 +954,11 @@
arraydescr = op.getdescr()
length_box = op.getarg(2)
assert not isinstance(length_box, Const) # we cannot have a const here!
+ # can only use spill_or_move_registers_before_call() as a hint if
+ # we are sure that length_box stays alive and won't be freed now
+ # (it should always be the case, see below, but better safe than sorry)
+ if self.rm.stays_alive(length_box):
+ self.rm.spill_or_move_registers_before_call([r.r0, r.r1])
# the result will be in r0
self.rm.force_allocate_reg(op, selected_reg=r.r0)
# we need r1 as a temporary
diff --git a/rpython/jit/backend/llsupport/regalloc.py
b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -579,11 +579,26 @@
new_free_regs.append(self.reg_bindings.pop(v))
def before_call(self, force_store=[], save_all_regs=0):
- """Spill or move some registers before a call. By default,
- this means: for every register in 'self.save_around_call_regs',
+ self.spill_or_move_registers_before_call(self.save_around_call_regs,
+ force_store, save_all_regs)
+
+ def spill_or_move_registers_before_call(self, save_sublist,
+ force_store=[], save_all_regs=0):
+ """Spill or move some registers before a call.
+
+ By default, this means: for every register in 'save_sublist',
if there is a variable there and it survives longer than
the current operation, then it is spilled/moved somewhere else.
+ WARNING: this might do the equivalent of possibly_free_vars()
+ on variables dying in the current operation. It won't
+ immediately overwrite registers that used to be occupied by
+ these variables, though. Use this function *after* you finished
+ calling self.loc() or self.make_sure_var_in_reg(), i.e. when you
+ know the location of all input arguments. These locations stay
+ valid, but only *if they are in self.save_around_call_regs,*
+ not if they are callee-saved registers!
+
'save_all_regs' can be 0 (default set of registers), 1 (do that
for all registers), or 2 (default + gc ptrs).
@@ -612,6 +627,16 @@
anyway, as a local hack in this function, because on x86 CPUs
such register-register moves are almost free.
"""
+ if not we_are_translated():
+ # 'save_sublist' is either the whole
+ # 'self.save_around_call_regs', or a sublist thereof, and
+ # then only those registers are spilled/moved. But when
+ # we move them, we never move them to other registers in
+ # 'self.save_around_call_regs', to avoid ping-pong effects
+ # where the same value is constantly moved around.
+ for reg in save_sublist:
+ assert reg in self.save_around_call_regs
+
new_free_regs = []
move_or_spill = []
@@ -631,7 +656,7 @@
# we need to spill all GC ptrs in this mode
self._bc_spill(v, new_free_regs)
#
- elif reg not in self.save_around_call_regs:
+ elif reg not in save_sublist:
continue # in a register like ebx/rbx: it is fine where it is
#
else:
@@ -663,6 +688,7 @@
if not we_are_translated():
if move_or_spill:
assert max_age <= min([_a for _, _a in move_or_spill])
+ assert reg in save_sublist
assert reg in self.save_around_call_regs
assert new_reg not in self.save_around_call_regs
self.assembler.regalloc_mov(reg, new_reg)
diff --git a/rpython/jit/backend/llsupport/test/test_gc_integration.py
b/rpython/jit/backend/llsupport/test/test_gc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_gc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_gc_integration.py
@@ -324,17 +324,19 @@
def check(frame):
expected_size = 1
idx = 0
+ fixed_size = self.cpu.JITFRAME_FIXED_SIZE
if self.cpu.backend_name.startswith('arm'):
# jitframe fixed part is larger here
expected_size = 2
idx = 1
+ fixed_size -= 32
assert len(frame.jf_gcmap) == expected_size
- if self.cpu.IS_64_BIT:
- exp_idx = self.cpu.JITFRAME_FIXED_SIZE + 1 # +1 from i0
- else:
- assert frame.jf_gcmap[idx]
- exp_idx = self.cpu.JITFRAME_FIXED_SIZE - 32 * idx + 1 # +1
from i0
- assert frame.jf_gcmap[idx] == (1 << (exp_idx + 1)) | (1 << exp_idx)
+ # check that we have two bits set, and that they are in two
+ # registers (p0 and p1 are moved away when doing p2, but not
+ # spilled, just moved to different registers)
+ bits = [n for n in range(fixed_size)
+ if frame.jf_gcmap[idx] & (1<<n)]
+ assert len(bits) == 2
self.cpu = self.getcpu(check)
ops = '''
diff --git a/rpython/jit/backend/test/runner_test.py
b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -2825,6 +2825,7 @@
from rpython.rlib.rarithmetic import r_singlefloat
from rpython.translator.c import primitive
+
def same_as_for_box(b):
if b.type == 'i':
return rop.SAME_AS_I
@@ -2835,6 +2836,8 @@
cpu = self.cpu
rnd = random.Random(525)
+ seed = py.test.config.option.randomseed
+ print("random seed %d" % seed)
ALL_TYPES = [
(types.ulong, lltype.Unsigned),
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -204,20 +204,20 @@
def _build_malloc_slowpath(self, kind):
""" While arriving on slowpath, we have a gcpattern on stack 0.
- The arguments are passed in eax and edi, as follows:
+ The arguments are passed in ecx and edx, as follows:
- kind == 'fixed': nursery_head in eax and the size in edi - eax.
+ kind == 'fixed': nursery_head in ecx and the size in (edx - ecx).
- kind == 'str/unicode': length of the string to allocate in edi.
+ kind == 'str/unicode': length of the string to allocate in edx.
- kind == 'var': length to allocate in edi, tid in eax,
+ kind == 'var': length to allocate in edx, tid in ecx,
and itemsize in the stack 1 (position esp+WORD).
- This function must preserve all registers apart from eax and edi.
+ This function must preserve all registers apart from ecx and edx.
"""
assert kind in ['fixed', 'str', 'unicode', 'var']
mc = codebuf.MachineCodeBlockWrapper()
- self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
+ self._push_all_regs_to_frame(mc, [ecx, edx], self.cpu.supports_floats)
# the caller already did push_gcmap(store=True)
#
if kind == 'fixed':
@@ -231,32 +231,32 @@
mc.SUB_ri(esp.value, 16 - WORD) # restore 16-byte alignment
# magically, the above is enough on X86_32 to reserve 3 stack places
if kind == 'fixed':
- mc.SUB_rr(edi.value, eax.value) # compute the size we want
- # the arg is already in edi
+ mc.SUB_rr(edx.value, ecx.value) # compute the size we want
if IS_X86_32:
- mc.MOV_sr(0, edi.value)
+ mc.MOV_sr(0, edx.value) # store the length
if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
- mc.MOV_sr(WORD, ebp.value)
- elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
- # for tests only
- mc.MOV_rr(esi.value, ebp.value)
+ mc.MOV_sr(WORD, ebp.value) # for tests only
+ else:
+ mc.MOV_rr(edi.value, edx.value) # length argument
+ if hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
+ mc.MOV_rr(esi.value, ebp.value) # for tests only
elif kind == 'str' or kind == 'unicode':
if IS_X86_32:
# stack layout: [---][---][---][ret].. with 3 free stack places
- mc.MOV_sr(0, edi.value) # store the length
- else:
- pass # length already in edi
+ mc.MOV_sr(0, edx.value) # store the length
+ elif IS_X86_64:
+ mc.MOV_rr(edi.value, edx.value) # length argument
else:
if IS_X86_32:
# stack layout: [---][---][---][ret][gcmap][itemsize]...
- mc.MOV_sr(WORD * 2, edi.value) # store the length
- mc.MOV_sr(WORD * 1, eax.value) # store the tid
- mc.MOV_rs(edi.value, WORD * 5) # load the itemsize
- mc.MOV_sr(WORD * 0, edi.value) # store the itemsize
+ mc.MOV_sr(WORD * 2, edx.value) # store the length
+ mc.MOV_sr(WORD * 1, ecx.value) # store the tid
+ mc.MOV_rs(edx.value, WORD * 5) # load the itemsize
+ mc.MOV_sr(WORD * 0, edx.value) # store the itemsize
else:
# stack layout: [---][ret][gcmap][itemsize]...
- mc.MOV_rr(edx.value, edi.value) # length
- mc.MOV_rr(esi.value, eax.value) # tid
+ # (already in edx) # length
+ mc.MOV_rr(esi.value, ecx.value) # tid
mc.MOV_rs(edi.value, WORD * 3) # load the itemsize
self.set_extra_stack_depth(mc, 16)
mc.CALL(imm(follow_jump(addr)))
@@ -267,10 +267,11 @@
mc.TEST_rr(eax.value, eax.value)
mc.J_il(rx86.Conditions['Z'], 0xfffff) # patched later
jz_location = mc.get_relative_pos()
+ mc.MOV_rr(ecx.value, eax.value)
#
nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
- self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats)
- mc.MOV(edi, heap(nursery_free_adr)) # load this in EDI
+ self._pop_all_regs_from_frame(mc, [ecx, edx], self.cpu.supports_floats)
+ mc.MOV(edx, heap(nursery_free_adr)) # load this in EDX
self.pop_gcmap(mc) # push_gcmap(store=True) done by the caller
mc.RET()
#
@@ -2441,9 +2442,9 @@
def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
assert size & (WORD-1) == 0 # must be correctly aligned
- self.mc.MOV(eax, heap(nursery_free_adr))
- self.mc.LEA_rm(edi.value, (eax.value, size))
- self.mc.CMP(edi, heap(nursery_top_adr))
+ self.mc.MOV(ecx, heap(nursery_free_adr))
+ self.mc.LEA_rm(edx.value, (ecx.value, size))
+ self.mc.CMP(edx, heap(nursery_top_adr))
self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
jmp_adr = self.mc.get_relative_pos()
# save the gcmap
@@ -2452,19 +2453,19 @@
offset = self.mc.get_relative_pos() - jmp_adr
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr-1, chr(offset))
- self.mc.MOV(heap(nursery_free_adr), edi)
+ self.mc.MOV(heap(nursery_free_adr), edx)
def malloc_cond_varsize_frame(self, nursery_free_adr, nursery_top_adr,
sizeloc, gcmap):
- if sizeloc is eax:
- self.mc.MOV(edi, sizeloc)
- sizeloc = edi
- self.mc.MOV(eax, heap(nursery_free_adr))
- if sizeloc is edi:
- self.mc.ADD_rr(edi.value, eax.value)
+ if sizeloc is ecx:
+ self.mc.MOV(edx, sizeloc)
+ sizeloc = edx
+ self.mc.MOV(ecx, heap(nursery_free_adr))
+ if sizeloc is edx:
+ self.mc.ADD_rr(edx.value, ecx.value)
else:
- self.mc.LEA_ra(edi.value, (eax.value, sizeloc.value, 0, 0))
- self.mc.CMP(edi, heap(nursery_top_adr))
+ self.mc.LEA_ra(edx.value, (ecx.value, sizeloc.value, 0, 0))
+ self.mc.CMP(edx, heap(nursery_top_adr))
self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
jmp_adr = self.mc.get_relative_pos()
# save the gcmap
@@ -2473,7 +2474,7 @@
offset = self.mc.get_relative_pos() - jmp_adr
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr-1, chr(offset))
- self.mc.MOV(heap(nursery_free_adr), edi)
+ self.mc.MOV(heap(nursery_free_adr), edx)
def malloc_cond_varsize(self, kind, nursery_free_adr, nursery_top_adr,
lengthloc, itemsize, maxlength, gcmap,
@@ -2482,39 +2483,39 @@
assert isinstance(arraydescr, ArrayDescr)
# lengthloc is the length of the array, which we must not modify!
- assert lengthloc is not eax and lengthloc is not edi
+ assert lengthloc is not ecx and lengthloc is not edx
if isinstance(lengthloc, RegLoc):
varsizeloc = lengthloc
else:
- self.mc.MOV(edi, lengthloc)
- varsizeloc = edi
+ self.mc.MOV(edx, lengthloc)
+ varsizeloc = edx
self.mc.CMP(varsizeloc, imm(maxlength))
self.mc.J_il8(rx86.Conditions['A'], 0) # patched later
jmp_adr0 = self.mc.get_relative_pos()
- self.mc.MOV(eax, heap(nursery_free_adr))
+ self.mc.MOV(ecx, heap(nursery_free_adr))
if valid_addressing_size(itemsize):
shift = get_scale(itemsize)
else:
- shift = self._imul_const_scaled(self.mc, edi.value,
+ shift = self._imul_const_scaled(self.mc, edx.value,
varsizeloc.value, itemsize)
- varsizeloc = edi
+ varsizeloc = edx
- # now varsizeloc is a register != eax. The size of
+ # now varsizeloc is a register != ecx. The size of
# the variable part of the array is (varsizeloc << shift)
assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
constsize = arraydescr.basesize + self.gc_size_of_header
force_realignment = (itemsize % WORD) != 0
if force_realignment:
constsize += WORD - 1
- self.mc.LEA_ra(edi.value, (eax.value, varsizeloc.value, shift,
+ self.mc.LEA_ra(edx.value, (ecx.value, varsizeloc.value, shift,
constsize))
if force_realignment:
- self.mc.AND_ri(edi.value, ~(WORD - 1))
- # now edi contains the total size in bytes, rounded up to a multiple
+ self.mc.AND_ri(edx.value, ~(WORD - 1))
+ # now edx contains the total size in bytes, rounded up to a multiple
# of WORD, plus nursery_free_adr
- self.mc.CMP(edi, heap(nursery_top_adr))
+ self.mc.CMP(edx, heap(nursery_top_adr))
self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
jmp_adr1 = self.mc.get_relative_pos()
#
@@ -2525,8 +2526,8 @@
self.push_gcmap(self.mc, gcmap, store=True)
if kind == rewrite.FLAG_ARRAY:
self.mc.MOV_si(WORD, itemsize)
- self.mc.MOV(edi, lengthloc)
- self.mc.MOV_ri(eax.value, arraydescr.tid)
+ self.mc.MOV(edx, lengthloc)
+ self.mc.MOV_ri(ecx.value, arraydescr.tid)
addr = self.malloc_slowpath_varsize
else:
if kind == rewrite.FLAG_STR:
@@ -2534,7 +2535,7 @@
else:
assert kind == rewrite.FLAG_UNICODE
addr = self.malloc_slowpath_unicode
- self.mc.MOV(edi, lengthloc)
+ self.mc.MOV(edx, lengthloc)
self.mc.CALL(imm(follow_jump(addr)))
self.mc.JMP_l8(0) # jump to done, patched later
jmp_location = self.mc.get_relative_pos()
@@ -2544,9 +2545,9 @@
self.mc.overwrite(jmp_adr1-1, chr(offset))
self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
# write down the tid, but not if it's the result of the CALL
- self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
+ self.mc.MOV(mem(ecx, 0), imm(arraydescr.tid))
# while we're at it, this line is not needed if we've done the CALL
- self.mc.MOV(heap(nursery_free_adr), edi)
+ self.mc.MOV(heap(nursery_free_adr), edx)
#
offset = self.mc.get_relative_pos() - jmp_location
assert 0 < offset <= 127
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -952,14 +952,16 @@
size_box = op.getarg(0)
assert isinstance(size_box, ConstInt)
size = size_box.getint()
- # looking at the result
- self.rm.force_allocate_reg(op, selected_reg=eax)
+ # hint: try to move unrelated registers away from eax and edx now
+ self.rm.spill_or_move_registers_before_call([ecx, edx])
+ # the result will be in ecx
+ self.rm.force_allocate_reg(op, selected_reg=ecx)
#
- # We need edi as a temporary, but otherwise don't save any more
+ # We need edx as a temporary, but otherwise don't save any more
# register. See comments in _build_malloc_slowpath().
tmp_box = TempVar()
- self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
- gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
+ self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+ gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before*
self.rm.possibly_free_var(tmp_box)
#
gc_ll_descr = self.assembler.cpu.gc_ll_descr
@@ -972,15 +974,16 @@
size_box = op.getarg(0)
assert not isinstance(size_box, Const) # we cannot have a const here!
# sizeloc must be in a register, but we can free it now
- # (we take care explicitly of conflicts with eax or edi)
+ # (we take care explicitly of conflicts with ecx or edx)
sizeloc = self.rm.make_sure_var_in_reg(size_box)
+ self.rm.spill_or_move_registers_before_call([ecx, edx]) # sizeloc safe
self.rm.possibly_free_var(size_box)
- # the result will be in eax
- self.rm.force_allocate_reg(op, selected_reg=eax)
- # we need edi as a temporary
+ # the result will be in ecx
+ self.rm.force_allocate_reg(op, selected_reg=ecx)
+ # we need edx as a temporary
tmp_box = TempVar()
- self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
- gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
+ self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+ gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before*
self.rm.possibly_free_var(tmp_box)
#
gc_ll_descr = self.assembler.cpu.gc_ll_descr
@@ -997,16 +1000,21 @@
arraydescr = op.getdescr()
length_box = op.getarg(2)
assert not isinstance(length_box, Const) # we cannot have a const here!
- # the result will be in eax
- self.rm.force_allocate_reg(op, selected_reg=eax)
- # we need edi as a temporary
+ # can only use spill_or_move_registers_before_call() as a hint if
+ # we are sure that length_box stays alive and won't be freed now
+ # (it should always be the case, see below, but better safe than sorry)
+ if self.rm.stays_alive(length_box):
+ self.rm.spill_or_move_registers_before_call([ecx, edx])
+ # the result will be in ecx
+ self.rm.force_allocate_reg(op, selected_reg=ecx)
+ # we need edx as a temporary
tmp_box = TempVar()
- self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
- gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
+ self.rm.force_allocate_reg(tmp_box, selected_reg=edx)
+ gcmap = self.get_gcmap([ecx, edx]) # allocate the gcmap *before*
self.rm.possibly_free_var(tmp_box)
# length_box always survives: it's typically also present in the
# next operation that will copy it inside the new array. It's
- # fine to load it from the stack too, as long as it's != eax, edi.
+ # fine to load it from the stack too, as long as it is != ecx, edx.
lengthloc = self.rm.loc(length_box)
self.rm.possibly_free_var(length_box)
#
@@ -1225,6 +1233,8 @@
raise AssertionError("bad unicode item size")
def _consider_math_read_timestamp(self, op):
+ # hint: try to move unrelated registers away from eax and edx now
+ self.rm.spill_or_move_registers_before_call([eax, edx])
tmpbox_high = TempVar()
self.rm.force_allocate_reg(tmpbox_high, selected_reg=eax)
if longlong.is_64_bit:
diff --git a/rpython/jit/backend/x86/test/test_zvmprof.py
b/rpython/jit/backend/x86/test/test_zvmprof.py
deleted file mode 100644
--- a/rpython/jit/backend/x86/test/test_zvmprof.py
+++ /dev/null
@@ -1,7 +0,0 @@
-
-from rpython.jit.backend.llsupport.test.zrpy_vmprof_test import
CompiledVmprofTest
-
-class TestZVMprof(CompiledVmprofTest):
-
- gcrootfinder = "shadowstack"
- gc = "incminimark"
\ No newline at end of file
diff --git a/rpython/jit/backend/zarch/callbuilder.py
b/rpython/jit/backend/zarch/callbuilder.py
--- a/rpython/jit/backend/zarch/callbuilder.py
+++ b/rpython/jit/backend/zarch/callbuilder.py
@@ -12,6 +12,8 @@
from rpython.rtyper.lltypesystem import rffi
from rpython.jit.backend.llsupport.descr import CallDescr
+CALL_RELEASE_GIL_STACK_OFF = 6*WORD
+
class CallBuilder(AbstractCallBuilder):
GPR_ARGS = [r.r2, r.r3, r.r4, r.r5, r.r6]
FPR_ARGS = [r.f0, r.f2, r.f4, r.f6]
@@ -85,8 +87,8 @@
self.subtracted_to_sp += len(stack_params) * WORD
base = len(stack_params) * WORD
if self.is_call_release_gil:
- self.subtracted_to_sp += 8*WORD
- base += 8*WORD
+ self.subtracted_to_sp += CALL_RELEASE_GIL_STACK_OFF
+ base += CALL_RELEASE_GIL_STACK_OFF
for idx,i in enumerate(stack_params):
loc = arglocs[i]
offset = STD_FRAME_SIZE_IN_BYTES - base + 8 * idx
@@ -187,7 +189,7 @@
RSHADOWPTR = self.RSHADOWPTR
RFASTGILPTR = self.RFASTGILPTR
#
- pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD
+ pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF
self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP))
#
# Save this thread's shadowstack pointer into r8, for later comparison
@@ -286,7 +288,7 @@
if gcrootmap:
if gcrootmap.is_shadow_stack and self.is_call_release_gil:
self.mc.LGR(r.SCRATCH, RSHADOWOLD)
- pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD
+ pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF
self.mc.LMG(r.r8, r.r13, l.addr(pos, r.SP))
def write_real_errno(self, save_err):
diff --git a/rpython/jit/backend/zarch/instructions.py
b/rpython/jit/backend/zarch/instructions.py
--- a/rpython/jit/backend/zarch/instructions.py
+++ b/rpython/jit/backend/zarch/instructions.py
@@ -29,6 +29,7 @@
'MGHI': ('ri', ['\xA7','\x0D']),
'MSGFI': ('ril', ['\xC2','\x00']),
'MLGR': ('rre', ['\xB9','\x86'], 'eo,r'),
+ 'MLG': ('rxy', ['\xE3','\x86'], 'eo,bid'),
# div/mod
'DSGR': ('rre', ['\xB9','\x0D'], 'eo,r'),
'DSG': ('rxy', ['\xE3','\x0D'], 'eo,bidl'),
@@ -44,7 +45,6 @@
# rotating
'RISBG': ('rie_f', ['\xEC','\x55']),
- 'RISBGN': ('rie_f', ['\xEC','\x59']),
# invert & negative & absolute
'LPGR': ('rre', ['\xB9','\x00']),
diff --git a/rpython/jit/backend/zarch/opassembler.py
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -160,11 +160,15 @@
omc.BRC(c.ANY, l.imm(label_end - jmp_neither_lqlr_overflow))
omc.overwrite()
- emit_int_floordiv = gen_emit_div_mod('DSGR', 'DSG')
- emit_uint_floordiv = gen_emit_div_mod('DLGR', 'DLG')
- # NOTE division sets one register with the modulo value, thus
- # the regalloc ensures the right register survives.
- emit_int_mod = gen_emit_div_mod('DSGR', 'DSG')
+ def emit_uint_mul_high(self, op, arglocs, regalloc):
+ r0, _, a1 = arglocs
+ # _ carries the value, contents of r0 are ignored
+ assert not r0.is_imm()
+ assert not a1.is_imm()
+ if a1.is_core_reg():
+ self.mc.MLGR(r0, a1)
+ else:
+ self.mc.MLG(r0, a1)
def emit_int_invert(self, op, arglocs, regalloc):
l0, = arglocs
diff --git a/rpython/jit/backend/zarch/regalloc.py
b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -733,9 +733,6 @@
prepare_int_sub_ovf = helper.prepare_int_sub
prepare_int_mul = helper.prepare_int_mul
prepare_int_mul_ovf = helper.prepare_int_mul_ovf
- prepare_int_floordiv = helper.prepare_int_div
- prepare_uint_floordiv = helper.prepare_int_div
- prepare_int_mod = helper.prepare_int_mod
prepare_nursery_ptr_increment = prepare_int_add
prepare_int_and = helper.prepare_int_logic
@@ -746,6 +743,18 @@
prepare_int_lshift = helper.prepare_int_shift
prepare_uint_rshift = helper.prepare_int_shift
+ def prepare_uint_mul_high(self, op):
+ a0 = op.getarg(0)
+ a1 = op.getarg(1)
+ if a0.is_constant():
+ a0, a1 = a1, a0
+ if helper.check_imm32(a1):
+ l1 = self.ensure_reg(a1)
+ else:
+ l1 = self.ensure_reg_or_pool(a1)
+ lr,lq = self.rm.ensure_even_odd_pair(a0, op, bind_first=True)
+ return [lr, lq, l1]
+
prepare_int_le = helper.generate_cmp_op()
prepare_int_lt = helper.generate_cmp_op()
prepare_int_ge = helper.generate_cmp_op()
diff --git a/rpython/jit/backend/zarch/test/test_assembler.py
b/rpython/jit/backend/zarch/test/test_assembler.py
--- a/rpython/jit/backend/zarch/test/test_assembler.py
+++ b/rpython/jit/backend/zarch/test/test_assembler.py
@@ -155,7 +155,15 @@
s64 = bin(fac_data[1])[2:]
print(f64)
print(s64)
+ for i,c in enumerate(f64):
+ print('index: %d is set? %s' % (i,c))
+
+ assert f64[1] == '1' # The z/Architecture architectural mode is
installed.
+ assert f64[2] == '1' # The z/Architecture architectural mode is active.
assert f64[18] == '1' # long displacement facility
+ assert f64[21] == '1' # extended immediate facility
+ assert f64[34] == '1' # general instruction facility
+ assert f64[41] == '1' # floating-point-support-enhancement
def test_load_byte_zero_extend(self):
adr = self.a.datablockwrapper.malloc_aligned(16, 16)
@@ -189,7 +197,7 @@
@py.test.mark.parametrize('p',
[2**32,2**32+1,2**63-1,2**63-2,0,1,2,3,4,5,6,7,8,10001])
def test_align_withroll(self, p):
self.a.mc.load_imm(r.r2, p & 0xffffFFFFffffFFFF)
- self.a.mc.RISBGN(r.r2, r.r2, loc.imm(0), loc.imm(0x80 | 60),
loc.imm(0))
+ self.a.mc.RISBG(r.r2, r.r2, loc.imm(0), loc.imm(0x80 | 60), loc.imm(0))
self.a.mc.BCR(con.ANY, r.r14)
assert run_asm(self.a) == rffi.cast(rffi.ULONG,p) & ~(7)
@@ -214,7 +222,7 @@
n = 13
l = loc
self.a.mc.load_imm(r.r2, 7<<n)
- self.a.mc.RISBGN(r.r2, r.r2, l.imm(61), l.imm(0x80 | 63), l.imm(64-n))
+ self.a.mc.RISBG(r.r2, r.r2, l.imm(61), l.imm(0x80 | 63), l.imm(64-n))
self.a.mc.BCR(con.ANY, r.r14)
assert run_asm(self.a) == 7
@@ -222,7 +230,7 @@
n = 16
l = loc
self.a.mc.load_imm(r.r2, 0xffFFffFF)
- self.a.mc.RISBGN(r.r2, r.r2, l.imm(60), l.imm(0x80 | 63), l.imm(64-n))
+ self.a.mc.RISBG(r.r2, r.r2, l.imm(60), l.imm(0x80 | 63), l.imm(64-n))
self.a.mc.BCR(con.ANY, r.r14)
assert run_asm(self.a) == 15
diff --git a/rpython/jit/backend/zarch/test/test_auto_encoding.py
b/rpython/jit/backend/zarch/test/test_auto_encoding.py
--- a/rpython/jit/backend/zarch/test/test_auto_encoding.py
+++ b/rpython/jit/backend/zarch/test/test_auto_encoding.py
@@ -204,7 +204,7 @@
g.write('%s\n' % op)
oplist.append(op)
g.write('\t.string "%s"\n' % END_TAG)
- proc = subprocess.Popen(['as', '-m64', '-mzarch', '-march=zEC12',
+ proc = subprocess.Popen(['as', '-m64', '-mzarch', '-march=z196',
inputname, '-o', filename],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
diff --git a/rpython/jit/backend/zarch/test/test_int.py
b/rpython/jit/backend/zarch/test/test_int.py
--- a/rpython/jit/backend/zarch/test/test_int.py
+++ b/rpython/jit/backend/zarch/test/test_int.py
@@ -35,41 +35,13 @@
fail = self.cpu.get_latest_descr(deadframe)
assert fail == finishdescr # ensures that guard is not taken!
- def test_double_evenodd_pair(self):
- code = """
- [i0]
- i1 = int_floordiv(i0, 2)
- i2 = int_floordiv(i0, 3)
- i3 = int_floordiv(i0, 4)
- i4 = int_floordiv(i0, 5)
- i5 = int_floordiv(i0, 6)
- i6 = int_floordiv(i0, 7)
- i7 = int_floordiv(i0, 8)
- i8 = int_le(i1, 0)
- guard_true(i8) [i1,i2,i3,i4,i5,i6,i7]
- finish(i0, descr=faildescr)
- """
- # the guard forces 3 spills because after 4 divisions
- # all even slots of the managed registers are full
- loop = parse(code, namespace={'faildescr': BasicFinalDescr(1)})
- looptoken = JitCellToken()
- self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
- deadframe = self.cpu.execute_token(looptoken, 100)
- fail = self.cpu.get_latest_descr(deadframe)
- for i in range(2,9):
- assert self.cpu.get_int_value(deadframe, i-2) == 100//i
-
-
-
@py.test.mark.parametrize('value', [2,3,15,2**16])
def test_evenodd_pair_extensive(self, value):
instrs = []
failargs = []
values = []
j = 0
- mapping = (('int_floordiv',lambda x,y: x // y),
- ('int_mod', lambda x,y: x % y),
- ('int_mul_ovf', lambda x,y: x * y))
+ mapping = (('int_mul_ovf', lambda x,y: x * y),)
for i in range(20):
name, func = mapping[j]
instrs.append("i{d} = {i}(i0, {d})".format(d=i+1, i=name))
diff --git a/rpython/jit/backend/zarch/test/test_regalloc.py
b/rpython/jit/backend/zarch/test/test_regalloc.py
--- a/rpython/jit/backend/zarch/test/test_regalloc.py
+++ b/rpython/jit/backend/zarch/test/test_regalloc.py
@@ -146,128 +146,3 @@
assert cpu.get_int_value(deadframe, 0) == 0
assert cpu.get_int_value(deadframe, 1) == -1000
-def test_bug_0():
- cpu, deadframe = run([-13, 10, 10, 8, -8, -16, -18, 46, -12, 26], '''
- [i1, i2, i3, i4, i5, i6, i7, i8, i9, i10]
- i11 = uint_gt(i3, -48)
- i12 = int_xor(i8, i1)
- i13 = int_gt(i6, -9)
- i14 = int_le(i13, i2)
- i15 = int_le(i11, i5)
- i16 = uint_ge(i13, i13)
- i17 = int_or(i9, -23)
- i18 = int_lt(i10, i13)
- i19 = int_or(i15, i5)
- i20 = int_xor(i17, 54)
- i21 = int_mul(i8, i10)
- i22 = int_or(i3, i9)
- i41 = int_and(i11, -4)
- i42 = int_or(i41, 1)
- i23 = int_mod(i12, i42)
- i24 = int_is_true(i6)
- i25 = uint_rshift(i15, 6)
- i26 = int_or(-4, i25)
- i27 = int_invert(i8)
- i28 = int_sub(-113, i11)
- i29 = int_neg(i7)
- i30 = int_neg(i24)
- i31 = int_floordiv(i3, 53)
- i32 = int_mul(i28, i27)
- i43 = int_and(i18, -4)
- i44 = int_or(i43, 1)
- i33 = int_mod(i26, i44)
- i34 = int_or(i27, i19)
- i35 = uint_lt(i13, 1)
- i45 = int_and(i21, 31)
- i36 = int_rshift(i21, i45)
- i46 = int_and(i20, 31)
- i37 = uint_rshift(i4, i46)
- i38 = uint_gt(i33, -11)
- i39 = int_neg(i7)
- i40 = int_gt(i24, i32)
- i99 = same_as_i(0)
- guard_true(i99) [i40, i36, i37, i31, i16, i34, i35, i23, i22, i29, i14,
i39, i30, i38]
- finish(42)
- ''')
- assert cpu.get_int_value(deadframe, 0) == 0
- assert cpu.get_int_value(deadframe, 1) == 0
- assert cpu.get_int_value(deadframe, 2) == 0
- assert cpu.get_int_value(deadframe, 3) == 0
- assert cpu.get_int_value(deadframe, 4) == 1
- assert cpu.get_int_value(deadframe, 5) == -7
- assert cpu.get_int_value(deadframe, 6) == 1
- assert cpu.get_int_value(deadframe, 7) == 0
- assert cpu.get_int_value(deadframe, 8) == -2
- assert cpu.get_int_value(deadframe, 9) == 18
- assert cpu.get_int_value(deadframe, 10) == 1
- assert cpu.get_int_value(deadframe, 11) == 18
- assert cpu.get_int_value(deadframe, 12) == -1
- assert cpu.get_int_value(deadframe, 13) == 0
-
-def test_bug_1():
- cpu, deadframe = run([17, -20, -6, 6, 1, 13, 13, 9, 49, 8], '''
- [i1, i2, i3, i4, i5, i6, i7, i8, i9, i10]
- i11 = uint_lt(i6, 0)
- i41 = int_and(i3, 31)
- i12 = int_rshift(i3, i41)
- i13 = int_neg(i2)
- i14 = int_add(i11, i7)
- i15 = int_or(i3, i2)
- i16 = int_or(i12, i12)
- i17 = int_ne(i2, i5)
- i42 = int_and(i5, 31)
- i18 = uint_rshift(i14, i42)
- i43 = int_and(i14, 31)
- i19 = int_lshift(7, i43)
- i20 = int_neg(i19)
- i21 = int_mod(i3, 1)
- i22 = uint_ge(i15, i1)
- i44 = int_and(i16, 31)
- i23 = int_lshift(i8, i44)
- i24 = int_is_true(i17)
- i45 = int_and(i5, 31)
- i25 = int_lshift(i14, i45)
- i26 = int_lshift(i5, 17)
- i27 = int_eq(i9, i15)
- i28 = int_ge(0, i6)
- i29 = int_neg(i15)
- i30 = int_neg(i22)
- i31 = int_add(i7, i16)
- i32 = uint_lt(i19, i19)
- i33 = int_add(i2, 1)
- i34 = int_neg(i5)
- i35 = int_add(i17, i24)
- i36 = uint_lt(2, i16)
- i37 = int_neg(i9)
- i38 = int_gt(i4, i11)
- i39 = int_lt(i27, i22)
- i40 = int_neg(i27)
- i99 = same_as_i(0)
- guard_true(i99) [i40, i10, i36, i26, i13, i30, i21, i33, i18, i25, i31,
i32, i28, i29, i35, i38, i20, i39, i34, i23, i37]
- finish(-42)
- ''')
- assert cpu.get_int_value(deadframe, 0) == 0
- assert cpu.get_int_value(deadframe, 1) == 8
- assert cpu.get_int_value(deadframe, 2) == 1
- assert cpu.get_int_value(deadframe, 3) == 131072
- assert cpu.get_int_value(deadframe, 4) == 20
- assert cpu.get_int_value(deadframe, 5) == -1
- assert cpu.get_int_value(deadframe, 6) == 0
- assert cpu.get_int_value(deadframe, 7) == -19
- assert cpu.get_int_value(deadframe, 8) == 6
- assert cpu.get_int_value(deadframe, 9) == 26
- assert cpu.get_int_value(deadframe, 10) == 12
- assert cpu.get_int_value(deadframe, 11) == 0
- assert cpu.get_int_value(deadframe, 12) == 0
- assert cpu.get_int_value(deadframe, 13) == 2
- assert cpu.get_int_value(deadframe, 14) == 2
- assert cpu.get_int_value(deadframe, 15) == 1
- assert cpu.get_int_value(deadframe, 16) == -57344
- assert cpu.get_int_value(deadframe, 17) == 1
- assert cpu.get_int_value(deadframe, 18) == -1
- if WORD == 4:
- assert cpu.get_int_value(deadframe, 19) == -2147483648
- elif WORD == 8:
- assert cpu.get_int_value(deadframe, 19) == 19327352832
- assert cpu.get_int_value(deadframe, 20) == -49
-
diff --git a/rpython/jit/codewriter/jtransform.py
b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -521,6 +521,8 @@
# XXX some of the following functions should not become residual calls
# but be really compiled
rewrite_op_int_abs = _do_builtin_call
+ rewrite_op_int_floordiv = _do_builtin_call
+ rewrite_op_int_mod = _do_builtin_call
rewrite_op_llong_abs = _do_builtin_call
rewrite_op_llong_floordiv = _do_builtin_call
rewrite_op_llong_mod = _do_builtin_call
@@ -530,7 +532,6 @@
rewrite_op_gc_id = _do_builtin_call
rewrite_op_gc_pin = _do_builtin_call
rewrite_op_gc_unpin = _do_builtin_call
- rewrite_op_uint_mod = _do_builtin_call
rewrite_op_cast_float_to_uint = _do_builtin_call
rewrite_op_cast_uint_to_float = _do_builtin_call
rewrite_op_weakref_create = _do_builtin_call
diff --git a/rpython/jit/codewriter/support.py
b/rpython/jit/codewriter/support.py
--- a/rpython/jit/codewriter/support.py
+++ b/rpython/jit/codewriter/support.py
@@ -248,6 +248,26 @@
mask = x >> (LONG_BIT - 1)
return (x ^ mask) - mask
+
+def _ll_2_int_floordiv(x, y):
+ # this is used only if the RPython program uses llop.int_floordiv()
+ # explicitly. For 'a // b', see _handle_int_special() in jtransform.py.
+ # This is the reverse of rpython.rtyper.rint.ll_int_py_div(), i.e.
+ # the same logic as rpython.rtyper.lltypesystem.opimpl.op_int_floordiv
+ # but written in a no-branch style.
+ r = x // y
+ p = r * y
+ # the JIT knows that if x and y are both positive, this is just 'r'
+ return r + (((x ^ y) >> (LONG_BIT - 1)) & (p != x))
+
+def _ll_2_int_mod(x, y):
+ # same comments as _ll_2_int_floordiv()
+ r = x % y
+ # the JIT knows that if x and y are both positive, this doesn't change 'r'
+ r -= y & (((x ^ y) & (r | -r)) >> (LONG_BIT - 1))
+ return r
+
+
def _ll_1_cast_uint_to_float(x):
# XXX on 32-bit platforms, this should be done using cast_longlong_to_float
# (which is a residual call right now in the x86 backend)
@@ -417,6 +437,8 @@
# in the following calls to builtins, the JIT is allowed to look inside:
inline_calls_to = [
('int_abs', [lltype.Signed], lltype.Signed),
+ ('int_floordiv', [lltype.Signed, lltype.Signed], lltype.Signed),
+ ('int_mod', [lltype.Signed, lltype.Signed], lltype.Signed),
('ll_math.ll_math_sqrt', [lltype.Float], lltype.Float),
]
diff --git a/rpython/jit/codewriter/test/test_flatten.py
b/rpython/jit/codewriter/test/test_flatten.py
--- a/rpython/jit/codewriter/test/test_flatten.py
+++ b/rpython/jit/codewriter/test/test_flatten.py
@@ -478,7 +478,7 @@
except ZeroDivisionError:
return -42
self.encoding_test(f, [7, 2], """
- residual_call_ir_i $<* fn ll_int_floordiv_ovf_zer__Signed_Signed>,
I[%i0, %i1], R[], <Descr> -> %i2
+ residual_call_ir_i $<* fn ll_int_py_div_ovf_zer__Signed_Signed>,
I[%i0, %i1], R[], <Descr> -> %i2
-live-
catch_exception L1
int_return %i2
@@ -505,7 +505,7 @@
return 42
# XXX so far, this really produces a int_mod_ovf_zer...
self.encoding_test(f, [7, 2], """
- residual_call_ir_i $<* fn ll_int_mod_ovf_zer__Signed_Signed>,
I[%i0, %i1], R[], <Descr> -> %i2
+ residual_call_ir_i $<* fn ll_int_py_mod_ovf_zer__Signed_Signed>,
I[%i0, %i1], R[], <Descr> -> %i2
-live-
catch_exception L1
int_return %i2
diff --git a/rpython/jit/codewriter/test/test_support.py
b/rpython/jit/codewriter/test/test_support.py
--- a/rpython/jit/codewriter/test/test_support.py
+++ b/rpython/jit/codewriter/test/test_support.py
@@ -3,7 +3,6 @@
from rpython.rtyper.annlowlevel import llstr
from rpython.flowspace.model import Variable, Constant, SpaceOperation
from rpython.jit.codewriter.support import decode_builtin_call, LLtypeHelpers
-from rpython.jit.codewriter.support import _ll_1_int_abs
def newconst(x):
return Constant(x, lltype.typeOf(x))
@@ -136,6 +135,7 @@
py.test.raises(AttributeError, func, llstr(None), p2)
def test_int_abs():
+ from rpython.jit.codewriter.support import _ll_1_int_abs
assert _ll_1_int_abs(0) == 0
assert _ll_1_int_abs(1) == 1
assert _ll_1_int_abs(10) == 10
@@ -143,3 +143,14 @@
assert _ll_1_int_abs(-1) == 1
assert _ll_1_int_abs(-10) == 10
assert _ll_1_int_abs(-sys.maxint) == sys.maxint
+
+def test_int_floordiv_mod():
+ from rpython.rtyper.lltypesystem.lloperation import llop
+ from rpython.jit.codewriter.support import _ll_2_int_floordiv,
_ll_2_int_mod
+ for x in range(-6, 7):
+ for y in range(-3, 4):
+ if y != 0:
+ assert (_ll_2_int_floordiv(x, y) ==
+ llop.int_floordiv(lltype.Signed, x, y))
+ assert (_ll_2_int_mod(x, y) ==
+ llop.int_mod(lltype.Signed, x, y))
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py
b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -97,17 +97,14 @@
self.emit_operation(op)
r = self.getintbound(op)
- if b2.is_constant():
- val = b2.lower
- if val >= 0:
- r.intersect(IntBound(0, val))
- elif b1.is_constant():
- val = b1.lower
- if val >= 0:
- r.intersect(IntBound(0, val))
- elif b1.known_ge(IntBound(0, 0)) and b2.known_ge(IntBound(0, 0)):
- lesser = min(b1.upper, b2.upper)
- r.intersect(IntBound(0, next_pow2_m1(lesser)))
+ pos1 = b1.known_ge(IntBound(0, 0))
+ pos2 = b2.known_ge(IntBound(0, 0))
+ if pos1 or pos2:
+ r.make_ge(IntBound(0, 0))
+ if pos1:
+ r.make_le(b1)
+ if pos2:
+ r.make_le(b2)
def optimize_INT_SUB(self, op):
self.emit_operation(op)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -5188,6 +5188,25 @@
"""
self.optimize_loop(ops, ops)
+ def test_int_and_positive(self):
+ ops = """
+ [i0, i1]
+ i2 = int_ge(i1, 0)
+ guard_true(i2) []
+ i3 = int_and(i0, i1)
+ i4 = int_ge(i3, 0)
+ guard_true(i4) []
+ jump(i3)
+ """
+ expected = """
+ [i0, i1]
+ i2 = int_ge(i1, 0)
+ guard_true(i2) []
+ i3 = int_and(i0, i1)
+ jump(i3)
+ """
+ self.optimize_loop(ops, expected)
+
def test_int_or_cmp_above_bounds(self):
ops = """
[p0,p1]
@@ -5252,6 +5271,47 @@
"""
self.optimize_loop(ops, ops)
+ def test_int_xor_positive_is_positive(self):
+ ops = """
+ [i0, i1]
+ i2 = int_lt(i0, 0)
+ guard_false(i2) []
+ i3 = int_lt(i1, 0)
+ guard_false(i3) []
+ i4 = int_xor(i0, i1)
+ i5 = int_lt(i4, 0)
+ guard_false(i5) []
+ jump(i4, i0)
+ """
+ expected = """
+ [i0, i1]
+ i2 = int_lt(i0, 0)
+ guard_false(i2) []
+ i3 = int_lt(i1, 0)
+ guard_false(i3) []
+ i4 = int_xor(i0, i1)
+ jump(i4, i0)
+ """
+ self.optimize_loop(ops, expected)
+
+ def test_positive_rshift_bits_minus_1(self):
+ ops = """
+ [i0]
+ i2 = int_lt(i0, 0)
+ guard_false(i2) []
+ i3 = int_rshift(i2, %d)
+ escape_n(i3)
+ jump(i0)
+ """ % (LONG_BIT - 1,)
+ expected = """
+ [i0]
+ i2 = int_lt(i0, 0)
+ guard_false(i2) []
+ escape_n(0)
+ jump(i0)
+ """
+ self.optimize_loop(ops, expected)
+
def test_int_or_same_arg(self):
ops = """
[i0]
diff --git a/rpython/jit/metainterp/test/test_ajit.py
b/rpython/jit/metainterp/test/test_ajit.py
--- a/rpython/jit/metainterp/test/test_ajit.py
+++ b/rpython/jit/metainterp/test/test_ajit.py
@@ -955,6 +955,75 @@
res = self.meta_interp(f, [-5])
assert res == 5+4+3+2+1+0+1+2+3+4+5+6+7+8+9
+ def test_int_c_div(self):
+ from rpython.rlib.rarithmetic import int_c_div
+ myjitdriver = JitDriver(greens = [], reds = ['i', 't'])
+ def f(i):
+ t = 0
+ while i < 10:
+ myjitdriver.can_enter_jit(i=i, t=t)
+ myjitdriver.jit_merge_point(i=i, t=t)
+ t += int_c_div(-100, i)
+ i += 1
+ return t
+ expected = -sum([100 // n for n in range(1, 10)])
+ assert f(1) == expected
+ res = self.meta_interp(f, [1])
+ assert res == expected
+ # should contain a call_i(..., OS=OS_INT_PY_DIV)
+
+ def test_int_c_mod(self):
+ from rpython.rlib.rarithmetic import int_c_mod
+ myjitdriver = JitDriver(greens = [], reds = ['i', 't'])
+ def f(i):
+ t = 0
+ while i < 10:
+ myjitdriver.can_enter_jit(i=i, t=t)
+ myjitdriver.jit_merge_point(i=i, t=t)
+ t += int_c_mod(-100, i)
+ i += 1
+ return t
+ expected = -sum([100 % n for n in range(1, 10)])
+ assert f(1) == expected
+ res = self.meta_interp(f, [1])
+ assert res == expected
+ # should contain a call_i(..., OS=OS_INT_PY_MOD)
+
+ def test_positive_c_div_mod(self):
+ from rpython.rlib.rarithmetic import int_c_div, int_c_mod
+ myjitdriver = JitDriver(greens = [], reds = ['i', 't'])
+ def f(i):
+ t = 0
+ while i < 10:
+ myjitdriver.can_enter_jit(i=i, t=t)
+ myjitdriver.jit_merge_point(i=i, t=t)
+ assert i > 0
+ t += int_c_div(100, i) - int_c_mod(100, i)
+ i += 1
+ return t
+ expected = sum([100 // n - 100 % n for n in range(1, 10)])
+ assert f(1) == expected
+ res = self.meta_interp(f, [1])
+ assert res == expected
+ # all the correction code should be dead now, xxx test that
+
+ def test_int_c_div_by_constant(self):
+ from rpython.rlib.rarithmetic import int_c_div
+ myjitdriver = JitDriver(greens = ['k'], reds = ['i', 't'])
+ def f(i, k):
+ t = 0
+ while i < 100:
+ myjitdriver.can_enter_jit(i=i, t=t, k=k)
+ myjitdriver.jit_merge_point(i=i, t=t, k=k)
+ t += int_c_div(i, k)
+ i += 1
+ return t
+ expected = sum([i // 10 for i in range(51, 100)])
+ assert f(-50, 10) == expected
+ res = self.meta_interp(f, [-50, 10])
+ assert res == expected
+ self.check_resops(call=0, uint_mul_high=2)
+
def test_float(self):
myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res'])
def f(x, y):
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -281,11 +281,12 @@
large_object=8*WORD,
ArenaCollectionClass=None,
**kwds):
+ "NOT_RPYTHON"
MovingGCBase.__init__(self, config, **kwds)
assert small_request_threshold % WORD == 0
self.read_from_env = read_from_env
self.nursery_size = nursery_size
-
+
self.small_request_threshold = small_request_threshold
self.major_collection_threshold = major_collection_threshold
self.growth_rate_max = growth_rate_max
@@ -644,6 +645,7 @@
# Get the memory from the nursery. If there is not enough space
# there, do a collect first.
result = self.nursery_free
+ ll_assert(result != llmemory.NULL, "uninitialized nursery")
self.nursery_free = new_free = result + totalsize
if new_free > self.nursery_top:
result = self.collect_and_reserve(totalsize)
@@ -703,6 +705,7 @@
# Get the memory from the nursery. If there is not enough space
# there, do a collect first.
result = self.nursery_free
+ ll_assert(result != llmemory.NULL, "uninitialized nursery")
self.nursery_free = new_free = result + totalsize
if new_free > self.nursery_top:
result = self.collect_and_reserve(totalsize)
@@ -1139,7 +1142,8 @@
Implemented a bit obscurely by checking an unrelated flag
that can never be set on a young object -- except if tid == -42.
"""
- assert self.is_in_nursery(obj)
+ ll_assert(self.is_in_nursery(obj),
+ "Can't forward an object outside the nursery.")
tid = self.header(obj).tid
result = (tid & GCFLAG_FINALIZATION_ORDERING != 0)
if result:
@@ -1463,7 +1467,8 @@
objhdr.tid |= GCFLAG_CARDS_SET
remember_young_pointer_from_array2._dont_inline_ = True
- assert self.card_page_indices > 0
+ ll_assert(self.card_page_indices > 0,
+ "non-positive card_page_indices")
self.remember_young_pointer_from_array2 = (
remember_young_pointer_from_array2)
@@ -1513,7 +1518,8 @@
return True
# ^^^ a fast path of write-barrier
#
- if source_hdr.tid & GCFLAG_HAS_CARDS != 0:
+ if (self.card_page_indices > 0 and # check constant-folded
+ source_hdr.tid & GCFLAG_HAS_CARDS != 0):
#
if source_hdr.tid & GCFLAG_TRACK_YOUNG_PTRS == 0:
# The source object may have random young pointers.
@@ -1548,7 +1554,8 @@
def manually_copy_card_bits(self, source_addr, dest_addr, length):
# manually copy the individual card marks from source to dest
- assert self.card_page_indices > 0
+ ll_assert(self.card_page_indices > 0,
+ "non-positive card_page_indices")
bytes = self.card_marking_bytes_for_length(length)
#
anybyte = 0
@@ -1721,12 +1728,15 @@
nursery_barriers = self.AddressDeque()
prev = self.nursery
self.surviving_pinned_objects.sort()
- assert self.pinned_objects_in_nursery == \
- self.surviving_pinned_objects.length()
+ ll_assert(
+ self.pinned_objects_in_nursery == \
+ self.surviving_pinned_objects.length(),
+ "pinned_objects_in_nursery != surviving_pinned_objects.length()")
while self.surviving_pinned_objects.non_empty():
#
cur = self.surviving_pinned_objects.pop()
- assert cur >= prev
+ ll_assert(
+ cur >= prev, "pinned objects encountered in backwards order")
#
# clear the arena between the last pinned object (or arena start)
# and the pinned object
@@ -1784,7 +1794,8 @@
debug_stop("gc-minor")
def _reset_flag_old_objects_pointing_to_pinned(self, obj, ignore):
- assert self.header(obj).tid & GCFLAG_PINNED_OBJECT_PARENT_KNOWN
+ ll_assert(self.header(obj).tid & GCFLAG_PINNED_OBJECT_PARENT_KNOWN !=
0,
+ "!GCFLAG_PINNED_OBJECT_PARENT_KNOWN, but requested to
reset.")
self.header(obj).tid &= ~GCFLAG_PINNED_OBJECT_PARENT_KNOWN
def _visit_old_objects_pointing_to_pinned(self, obj, ignore):
diff --git a/rpython/memory/gc/test/test_direct.py
b/rpython/memory/gc/test/test_direct.py
--- a/rpython/memory/gc/test/test_direct.py
+++ b/rpython/memory/gc/test/test_direct.py
@@ -554,6 +554,7 @@
assert res # we optimized it
assert hdr_dst.tid & minimark.GCFLAG_TRACK_YOUNG_PTRS == 0 # and we
copied the flag
#
+ self.gc.card_page_indices = 128 # force > 0
hdr_src.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS
hdr_dst.tid |= minimark.GCFLAG_TRACK_YOUNG_PTRS
hdr_src.tid |= minimark.GCFLAG_HAS_CARDS
diff --git a/rpython/rlib/clibffi.py b/rpython/rlib/clibffi.py
--- a/rpython/rlib/clibffi.py
+++ b/rpython/rlib/clibffi.py
@@ -148,7 +148,8 @@
('elements', FFI_TYPE_PP)])
ffi_cif = rffi_platform.Struct('ffi_cif', [])
- ffi_closure = rffi_platform.Struct('ffi_closure', [])
+ ffi_closure = rffi_platform.Struct('ffi_closure',
+ [('user_data', rffi.VOIDP)])
def add_simple_type(type_name):
for name in ['size', 'alignment', 'type']:
diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -650,6 +650,26 @@
from rpython.rtyper.lltypesystem.lloperation import llop
return llop.int_force_ge_zero(lltype.Signed, n)
+def int_c_div(x, y):
+ """Return the result of the C-style 'x / y'. This differs from the
+ Python-style division if (x < 0 xor y < 0). The JIT implements it
+ with a Python-style division followed by correction code. This
+ is not that bad, because the JIT removes the correction code if
+ x and y are both nonnegative, and if y is any nonnegative constant
+ then the division turns into a rshift or a mul.
+ """
+ from rpython.rtyper.lltypesystem import lltype
+ from rpython.rtyper.lltypesystem.lloperation import llop
+ return llop.int_floordiv(lltype.Signed, x, y)
+
+def int_c_mod(x, y):
+ """Return the result of the C-style 'x % y'. This differs from the
+ Python-style division if (x < 0 xor y < 0).
+ """
+ from rpython.rtyper.lltypesystem import lltype
+ from rpython.rtyper.lltypesystem.lloperation import llop
+ return llop.int_mod(lltype.Signed, x, y)
+
@objectmodel.specialize.ll()
def byteswap(arg):
""" Convert little->big endian and the opposite
diff --git a/rpython/rlib/rvmprof/src/vmprof_config.h
b/rpython/rlib/rvmprof/src/vmprof_config.h
--- a/rpython/rlib/rvmprof/src/vmprof_config.h
+++ b/rpython/rlib/rvmprof/src/vmprof_config.h
@@ -1,10 +1,17 @@
-#define HAVE_SYS_UCONTEXT_H
+#if !defined(__OpenBSD__)
+# define HAVE_SYS_UCONTEXT_H
+#else
+# define HAVE_SIGNAL_H
+#endif
+
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
#ifdef __i386__
#define PC_FROM_UCONTEXT uc_mcontext.mc_eip
#else
#define PC_FROM_UCONTEXT uc_mcontext.mc_rip
#endif
+#elif defined(__OpenBSD__)
+#define PC_FROM_UCONTEXT sc_rip
#elif defined( __APPLE__)
#if ((ULONG_MAX) == (UINT_MAX))
#define PC_FROM_UCONTEXT uc_mcontext->__ss.__eip
diff --git a/rpython/rlib/rvmprof/src/vmprof_getpc.h
b/rpython/rlib/rvmprof/src/vmprof_getpc.h
--- a/rpython/rlib/rvmprof/src/vmprof_getpc.h
+++ b/rpython/rlib/rvmprof/src/vmprof_getpc.h
@@ -65,6 +65,10 @@
#elif defined(HAVE_CYGWIN_SIGNAL_H)
#include <cygwin/signal.h>
typedef ucontext ucontext_t;
+#elif defined(HAVE_SIGNAL_H)
+#include <signal.h>
+#else
+# error "don't know how to get the pc on this platform"
#endif
diff --git a/rpython/rlib/test/test_rarithmetic.py
b/rpython/rlib/test/test_rarithmetic.py
--- a/rpython/rlib/test/test_rarithmetic.py
+++ b/rpython/rlib/test/test_rarithmetic.py
@@ -2,6 +2,7 @@
from rpython.rtyper.test.test_llinterp import interpret
from rpython.rlib.rarithmetic import *
from rpython.rlib.rstring import ParseStringError, ParseStringOverflowError
+from hypothesis import given, strategies
import sys
import py
@@ -393,6 +394,21 @@
assert not int_between(1, 2, 2)
assert not int_between(1, 1, 1)
+def test_int_force_ge_zero():
+ assert int_force_ge_zero(42) == 42
+ assert int_force_ge_zero(0) == 0
+ assert int_force_ge_zero(-42) == 0
+
+@given(strategies.integers(min_value=0, max_value=sys.maxint),
+ strategies.integers(min_value=1, max_value=sys.maxint))
+def test_int_c_div_mod(x, y):
+ assert int_c_div(~x, y) == -(abs(~x) // y)
+ assert int_c_div( x,-y) == -(x // y)
+ assert int_c_div(~x,-y) == +(abs(~x) // y)
+ for x1 in [x, ~x]:
+ for y1 in [y, -y]:
+ assert int_c_div(x1, y1) * y1 + int_c_mod(x1, y1) == x1
+
# these can't be prebuilt on 32bit
U1 = r_ulonglong(0x0102030405060708L)
U2 = r_ulonglong(0x0807060504030201L)
diff --git a/rpython/rtyper/rint.py b/rpython/rtyper/rint.py
--- a/rpython/rtyper/rint.py
+++ b/rpython/rtyper/rint.py
@@ -236,11 +236,11 @@
return _rtype_template(hop, 'mul_ovf')
def rtype_floordiv(_, hop):
- return _rtype_call_helper(hop, 'floordiv', [ZeroDivisionError])
+ return _rtype_call_helper(hop, 'py_div', [ZeroDivisionError])
rtype_inplace_floordiv = rtype_floordiv
def rtype_floordiv_ovf(_, hop):
- return _rtype_call_helper(hop, 'floordiv_ovf', [ZeroDivisionError])
+ return _rtype_call_helper(hop, 'py_div_ovf', [ZeroDivisionError])
# turn 'div' on integers into 'floordiv'
rtype_div = rtype_floordiv
@@ -250,11 +250,11 @@
# 'def rtype_truediv' is delegated to the superclass FloatRepr
def rtype_mod(_, hop):
- return _rtype_call_helper(hop, 'mod', [ZeroDivisionError])
+ return _rtype_call_helper(hop, 'py_mod', [ZeroDivisionError])
rtype_inplace_mod = rtype_mod
def rtype_mod_ovf(_, hop):
- return _rtype_call_helper(hop, 'mod_ovf', [ZeroDivisionError])
+ return _rtype_call_helper(hop, 'py_mod_ovf', [ZeroDivisionError])
def rtype_xor(_, hop):
return _rtype_template(hop, 'xor')
@@ -319,7 +319,7 @@
vlist = hop.inputargs(repr, repr2)
prefix = repr.opprefix
- if '_ovf' in func or func.startswith(('mod', 'floordiv')):
+ if '_ovf' in func or func.startswith(('py_mod', 'py_div')):
if prefix+func not in ('int_add_ovf', 'int_add_nonneg_ovf',
'int_sub_ovf', 'int_mul_ovf'):
raise TyperError("%r should not be used here any more" % (func,))
@@ -353,7 +353,7 @@
any_implicit_exception = True
if not any_implicit_exception:
- if not func.startswith(('mod', 'floordiv')):
+ if not func.startswith(('py_mod', 'py_div')):
return _rtype_template(hop, func)
repr = hop.r_result
@@ -388,7 +388,7 @@
# ---------- floordiv ----------
@jit.oopspec("int.py_div(x, y)")
-def ll_int_floordiv(x, y):
+def ll_int_py_div(x, y):
# Python, and RPython, assume that integer division truncates
# towards -infinity. However, in C, integer division truncates
# towards 0. So assuming that, we need to apply a correction
@@ -400,159 +400,159 @@
return r + (u >> INT_BITS_1)
@jit.oopspec("int.py_div(x, y)")
-def ll_int_floordiv_nonnegargs(x, y):
+def ll_int_py_div_nonnegargs(x, y):
from rpython.rlib.debug import ll_assert
r = llop.int_floordiv(Signed, x, y) # <= truncates like in C
- ll_assert(r >= 0, "int_floordiv_nonnegargs(): one arg is negative")
+ ll_assert(r >= 0, "int_py_div_nonnegargs(): one arg is negative")
return r
-def ll_int_floordiv_zer(x, y):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit