Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r87059:7384b2be2e75
Date: 2016-09-13 10:22 +0200
http://bitbucket.org/pypy/pypy/changeset/7384b2be2e75/
Log: resolve some test issues
diff --git a/pypy/module/micronumpy/test/test_zjit.py
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -16,7 +16,7 @@
from rpython.jit.backend.detect_cpu import getcpuclass
CPU = getcpuclass()
-if not CPU.vector_extension:
+if not CPU.vector_ext:
py.test.skip("this cpu %s has no implemented vector backend" % CPU)
def get_profiler():
@@ -29,7 +29,7 @@
interp = None
def setup_method(self, method):
- if not self.CPUClass.vector_extension:
+ if not self.CPUClass.vector_ext:
py.test.skip("needs vector extension to run (for now)")
def assert_float_equal(self, f1, f2, delta=0.0001):
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -553,14 +553,15 @@
"""
def __init__(self, loop):
self.loop = loop
- self.label = Node(loop.label, 0)
+ label = loop.prefix_label or loop.label
+ self.label = Node(label, 0)
self.nodes = [ Node(op,0) for op in loop.operations if not
rop.is_jit_debug(op.opnum) ]
for i,node in enumerate(self.nodes):
node.opidx = i+1
self.inodes = [] # imaginary nodes
self.jump = Node(loop.jump, len(self.nodes)+1)
self.invariant_vars = {}
- self.update_invariant_vars()
+ self.update_invariant_vars(label)
self.memory_refs = {}
self.schedulable_nodes = []
self.index_vars = {}
@@ -576,8 +577,9 @@
self.inodes.append(node)
return node
- def update_invariant_vars(self):
- label_op = self.label.getoperation()
+ def update_invariant_vars(self, label_op=None):
+ if not label_op:
+ label_op = self.label.getoperation()
jump_op = self.jump.getoperation()
assert label_op.numargs() == jump_op.numargs()
for i in range(label_op.numargs()):
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -84,33 +84,14 @@
self.renamer.rename(op)
self.oplist.append(op)
-
def post_schedule(self):
loop = self.graph.loop
- #
if self.delayed:
# some operations can be delayed until the jump instruction,
# handle them here
self.resolve_delayed({}, self.delayed, loop.jump)
-
- #
+ loop.operations = self.oplist
self.renamer.rename(loop.jump)
- self.ensure_args_unpacked(loop.jump)
- loop.operations = self.oplist
- loop.prefix = self.invariant_oplist
- if len(self.invariant_vector_vars) + len(self.invariant_oplist) > 0:
- # label
- args = loop.label.getarglist_copy() + self.invariant_vector_vars
- opnum = loop.label.getopnum()
- op = loop.label.copy_and_change(opnum, args)
- self.renamer.rename(op)
- loop.prefix_label = op
- # jump
- args = loop.jump.getarglist_copy() + self.invariant_vector_vars
- opnum = loop.jump.getopnum()
- op = loop.jump.copy_and_change(opnum, args)
- self.renamer.rename(op)
- loop.jump = op
def profitable(self):
return True
@@ -755,6 +736,25 @@
break
self.setvector_of_box(arg, i, box)
+ def post_schedule(self):
+ SchedulerState.post_schedule(self)
+ loop = self.graph.loop
+ self.ensure_args_unpacked(loop.jump)
+ loop.prefix = self.invariant_oplist
+ if len(self.invariant_vector_vars) + len(self.invariant_oplist) > 0:
+ # label
+ args = loop.label.getarglist_copy() + self.invariant_vector_vars
+ opnum = loop.label.getopnum()
+ op = loop.label.copy_and_change(opnum, args)
+ self.renamer.rename(op)
+ loop.prefix_label = op
+ # jump
+ args = loop.jump.getarglist_copy() + self.invariant_vector_vars
+ opnum = loop.jump.getopnum()
+ op = loop.jump.copy_and_change(opnum, args)
+ self.renamer.rename(op)
+ loop.jump = op
+
class Pack(object):
""" A pack is a set of n statements that are:
* isomorphic
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -205,6 +205,7 @@
state.post_schedule()
#
oplist = loop.operations
+
loop.operations = loop.prefix[:]
if loop.prefix_label:
loop.operations += [loop.prefix_label]
@@ -1074,23 +1075,14 @@
guard_true(i3) [p0,i0]
jump(p0,i2)
"""
- dead_code = '\n '.join([
- "i{t1} = int_add(i0,{i})\n i{s} = int_lt(i{t1}, 102)".format(
- i=i+2, t1=i+201, t=i+200, s=i+20)
- for i in range(0,14)])
opt="""
[p0,i0]
- i200 = int_add(i0, 1)
- i400 = int_lt(i200, 102)
i2 = int_add(i0, 16)
i3 = int_lt(i2, 102)
guard_true(i3) [p0,i0]
- {dead_code}
- i500 = int_add(i0, 16)
- i501 = int_lt(i500, 102)
v10[16xi8] = vec_load_i(p0, i0, 1, 0, descr=chararraydescr)
jump(p0,i2)
- """.format(dead_code=dead_code)
+ """
loop = self.parse_loop(ops)
vopt = self.schedule(loop,15,with_guard_opt=True)
self.assert_equal(loop, self.parse_loop(opt))
@@ -1127,13 +1119,9 @@
[p0,i0]
v3[2xf64] = vec_expand_i(42)
label(p0,i0,v3[2xf64])
- i20 = int_add(i0, 1)
- i30 = int_lt(i20, 10)
i2 = int_add(i0, 2)
i3 = int_lt(i2, 10)
guard_true(i3) [p0,i0]
- i4 = int_add(i0, 2)
- i5 = int_lt(i4, 10)
v1[2xf64] = vec_load_i(p0, i0, 8, 0, descr=arraydescr)
v2[2xf64] = vec_int_sub(v1[2xf64], v3[2xf64])
vec_store(p0, i0, v2[2xf64], 8, 0, descr=arraydescr)
@@ -1158,13 +1146,9 @@
[p0,i0,f3]
v3[2xf64] = vec_expand_f(f3)
label(p0,i0,f3,v3[2xf64])
- i20 = int_add(i0, 1)
- i30 = int_lt(i20, 10)
i2 = int_add(i0, 2)
i3 = int_lt(i2, 10)
guard_true(i3) [p0,i0,f3]
- i4 = int_add(i0, 2)
- i5 = int_lt(i4, 10)
v1[2xf64] = vec_load_f(p0, i0, 8, 0, descr=floatarraydescr)
v2[2xf64] = vec_float_add(v1[2xf64], v3[2xf64])
vec_store(p0, i0, v2[2xf64], 8, 0, descr=floatarraydescr)
@@ -1190,13 +1174,9 @@
v7[2xf64] = vec_float_xor(v6[0xf64], v6[0xf64])
v2[2xf64] = vec_pack_f(v7[2xf64], f0, 0, 1)
label(p0, i0, v2[2xf64])
- i100 = int_add(i0, 8)
- i200 = int_lt(i100, 100)
i1 = int_add(i0, 16)
i2 = int_lt(i1, 100)
guard_true(i2) [p0, i0, v2[2xf64]]
- i10 = int_add(i0, 16)
- i20 = int_lt(i10, 100)
v1[2xf64] = vec_load_f(p0, i0, 1, 0, descr=floatarraydescr)
v3[2xf64] = vec_float_add(v2[2xf64], v1[2xf64])
jump(p0, i1, v3[2xf64])
@@ -1224,20 +1204,16 @@
trace_opt = self.parse_loop("""
[p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43,
i44, i21, i4, i0, i18]
guard_not_invalidated() [p36, i28, p9, i37, p14, f34, p12, p38, f35,
p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
- i50 = int_add(i28, 1)
- i20 = int_ge(i50, i18)
i54 = int_add(i28, 2)
i638 = int_ge(i54, i18)
guard_false(i638) [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39,
i40, i41, p42, i43, i44, i21, i4, i0, i18]
- i47 = int_add(i28, 2)
- i52 = int_ge(i47, i18)
v61[2xf64] = vec_load_f(i21, i44, 1, 0, descr=floatarraydescr)
v62[2xf64] = vec_load_f(i4, i41, 1, 0, descr=floatarraydescr)
v63[2xf64] = vec_float_add(v61, v62)
vec_store(i0, i37, v63, 1, 0, descr=floatarraydescr)
+ i637 = int_add(i37, 16)
+ i629 = int_add(i41, 16)
i55 = int_add(i44, 16)
- i629 = int_add(i41, 16)
- i637 = int_add(i37, 16)
f100 = vec_unpack_f(v61, 1, 1)
f101 = vec_unpack_f(v62, 1, 1)
jump(p36, i637, p9, i629, p14, f100, p12, p38, f101, p39, i40, i54,
p42, i43, i55, i21, i4, i0, i18)
@@ -1258,20 +1234,13 @@
"""
opt = """
[p0, p1, i1]
- i3 = int_add(i1, 1)
- i4 = int_ge(i3, 36)
i50 = int_add(i1, 4)
i51 = int_ge(i50, 36)
guard_false(i51) [p0, p1, i1]
+ v17 = vec_load_f(p0, i1, 8, 0, descr=floatarraydescr)
i5 = int_add(i1, 2)
- i8 = int_ge(i5, 36)
- i6 = int_add(i1, 3)
- i11 = int_ge(i6, 36)
- i7 = int_add(i1, 4)
- i14 = int_ge(i7, 36)
- v17 = vec_load_f(p0, i1, 8, 0, descr=floatarraydescr)
+ v18 = vec_load_f(p0, i5, 8, 0, descr=floatarraydescr)
v19 = vec_cast_float_to_singlefloat(v17)
- v18 = vec_load_f(p0, i5, 8, 0, descr=floatarraydescr)
v20 = vec_cast_float_to_singlefloat(v18)
v21 = vec_pack_i(v19, v20, 2, 2)
vec_store(p1, i1, v21, 4, 0, descr=float32arraydescr)
@@ -1299,26 +1268,18 @@
""")
trace_opt = self.parse_loop("""
[p0, p1, p2, i0, i4]
- i5 = int_add(i4, 4)
- i186 = int_lt(i5, 100)
i500 = int_add(i4, 16)
i501 = int_lt(i500, 100)
guard_true(i501) [p0, p1, p2, i0, i4]
- i187 = int_add(i4, 8)
- i188 = int_lt(i187, 100)
- i196 = int_add(i4, 12)
- i197 = int_lt(i196, 100)
- i400 = int_add(i4, 16)
- i401= int_lt(i400, 100)
v228[4xi32] = vec_load_i(p0, i0, 1, 0, descr=float32arraydescr)
- v229[2xf64] = vec_cast_singlefloat_to_float(v228)
- v230 = vec_unpack_i(v228, 2, 2)
- v231 = vec_cast_singlefloat_to_float(v230)
i189 = int_add(i0, 4)
v232 = vec_load_i(p1, i189, 1, 0, descr=float32arraydescr)
+ v229[2xf64] = vec_cast_singlefloat_to_float(v228)
v233 = vec_cast_singlefloat_to_float(v232)
v236 = vec_float_add(v229, v233)
v238 = vec_cast_float_to_singlefloat(v236)
+ v230 = vec_unpack_i(v228, 2, 2)
+ v231 = vec_cast_singlefloat_to_float(v230)
v234 = vec_unpack_i(v232, 2, 2)
v235 = vec_cast_singlefloat_to_float(v234)
v237 = vec_float_add(v231, v235)
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -58,7 +58,6 @@
op.set_forwarded(None)
def finaloplist(self, jitcell_token=None, reset_label_token=True,
label=False):
- oplist = []
if jitcell_token:
if reset_label_token:
token = TargetToken(jitcell_token)
@@ -76,6 +75,7 @@
self.jump.setdescr(token)
if reset_label_token:
self.jump.setdescr(token)
+ oplist = []
if self.prefix_label:
oplist = self.prefix + [self.prefix_label]
elif self.prefix:
@@ -133,7 +133,7 @@
#
start = time.clock()
opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd,
warmstate.vec_cost)
- opt.run_optimization(metainterp_sd, info, loop)
+ oplist = opt.run_optimization(metainterp_sd, info, loop,
jitcell_token, user_code)
end = time.clock()
#
metainterp_sd.profiler.count(Counters.OPT_VECTORIZED)
@@ -144,7 +144,7 @@
debug_stop("vec-opt-loop")
#
info.label_op = loop.label
- return info, loop.finaloplist(jitcell_token=jitcell_token,
reset_label_token=False)
+ return info, oplist
except NotAVectorizeableLoop:
debug_stop("vec-opt-loop")
# vectorization is not possible
@@ -221,7 +221,7 @@
self.smallest_type_bytes = 0
self.orig_label_args = None
- def run_optimization(self, metainterp_sd, info, loop):
+ def run_optimization(self, metainterp_sd, info, loop, jitcell_token,
user_code):
self.orig_label_args = loop.label.getarglist_copy()
self.linear_find_smallest_type(loop)
byte_count = self.smallest_type_bytes
@@ -260,15 +260,16 @@
gso = GuardStrengthenOpt(graph.index_vars)
gso.propagate_all_forward(info, loop, user_code)
- # re-schedule the trace -> removes index operations
+ # re-schedule the trace -> removes many pure operations
graph = DependencyGraph(loop)
costmodel = GenericCostModel(self.cpu, self.cost_threshold)
- state = ScheduleState(self.cpu, graph)
+ state = SchedulerState(self.cpu, graph)
state.prepare()
- scheduler = Scheduler()
- scheduler.walk_and_emit(state)
+ Scheduler().walk_and_emit(state)
state.post_schedule()
+ return loop.finaloplist(jitcell_token=jitcell_token,
reset_label_token=False)
+
def unroll_loop_iterations(self, loop, unroll_count):
""" Unroll the loop X times. unroll_count + 1 = unroll_factor """
numops = len(loop.operations)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit