Author: Richard Plangger <[email protected]>
Branch: vecopt2
Changeset: r77082:dd4ba307d155
Date: 2015-03-18 16:10 +0100
http://bitbucket.org/pypy/pypy/changeset/dd4ba307d155/
Log: enhanced the vectorizing testcase, clarified unroll count. it is now
a number how often to fruther unroll it, not the total amount
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -53,7 +53,7 @@
opt = self.vec_optimizer(loop)
opt._gather_trace_information(loop)
if unroll_factor == -1:
- unroll_factor = opt.get_estimated_unroll_factor()
+ unroll_factor = opt.get_unroll_count()
opt.unroll_loop_iterations(loop, unroll_factor)
opt.loop.operations = opt.get_newoperations()
return opt
@@ -184,7 +184,7 @@
guard_true(i10) []
jump(p0,p1,p2,i9)
"""
- self.assert_unroll_loop_equals(self.parse_loop(ops),
self.parse_loop(opt_ops), 2)
+ self.assert_unroll_loop_equals(self.parse_loop(ops),
self.parse_loop(opt_ops), 1)
def test_estimate_unroll_factor_smallest_byte_zero(self):
ops = """
@@ -194,7 +194,7 @@
"""
vopt = self.vec_optimizer(self.parse_loop(ops))
assert 0 == vopt.vec_info.smallest_type_bytes
- assert 0 == vopt.get_estimated_unroll_factor()
+ assert 0 == vopt.get_unroll_count()
def test_array_operation_indices_not_unrolled(self):
ops = """
@@ -212,7 +212,7 @@
raw_load(p0,i0,descr=chararraydescr)
jump(p0,i0)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),2)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
assert 1 in vopt.vec_info.memory_refs
assert 2 in vopt.vec_info.memory_refs
assert len(vopt.vec_info.memory_refs) == 2
@@ -224,15 +224,15 @@
i4 = raw_load(p0,i1,descr=chararraydescr)
jump(p0,i3,i4)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
assert 1 in vopt.vec_info.memory_refs
assert 2 in vopt.vec_info.memory_refs
assert len(vopt.vec_info.memory_refs) == 2
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),2)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
for i in [1,2,3,4]:
assert i in vopt.vec_info.memory_refs
assert len(vopt.vec_info.memory_refs) == 4
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),4)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),3)
for i in [1,2,3,4,5,6,7,8]:
assert i in vopt.vec_info.memory_refs
assert len(vopt.vec_info.memory_refs) == 8
@@ -244,7 +244,7 @@
i1 = int_add(i0,1)
jump(p0,i1)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),2)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
vopt.build_dependency_graph()
self.assert_no_edge(vopt.dependency_graph, [(i,i) for i in range(6)])
self.assert_def_use(vopt.dependency_graph, [(0,1),(2,3),(4,5)])
@@ -269,7 +269,7 @@
i3 = raw_load(p0,i0,descr=chararraydescr)
jump(p0,i0)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref1 = vopt.vec_info.memory_refs[1]
@@ -284,7 +284,7 @@
i3 = raw_load(p0,i1,descr=chararraydescr)
jump(p0,i1)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref1 = vopt.vec_info.memory_refs[2]
@@ -299,7 +299,7 @@
i3 = raw_load(p0,i1,descr=chararraydescr)
jump(p0,i1)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref1 = vopt.vec_info.memory_refs[2]
@@ -315,7 +315,7 @@
i3 = raw_load(p0,i2,descr=chararraydescr)
jump(p0,i1)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref1 = vopt.vec_info.memory_refs[3]
@@ -333,7 +333,7 @@
i5 = raw_load(p0,i4,descr=chararraydescr)
jump(p0,i4)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref1 = vopt.vec_info.memory_refs[5]
@@ -352,7 +352,7 @@
i7 = raw_load(p0,i6,descr=chararraydescr)
jump(p0,i6)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref1 = vopt.vec_info.memory_refs[7]
@@ -371,7 +371,7 @@
i5 = raw_load(p0,i4,descr=chararraydescr)
jump(p0,i4)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref1 = vopt.vec_info.memory_refs[5]
@@ -389,7 +389,7 @@
i6 = int_add(i4,1)
jump(p0,i1,i6)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),2)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
vopt.build_dependency_graph()
self.assert_no_edge(vopt.dependency_graph, [(i,i) for i in range(6)])
self.assert_def_use(vopt.dependency_graph,
[(0,1),(0,2),(0,3),(0,4),(2,5)])
@@ -424,7 +424,7 @@
i3 = raw_load(p0,i2,descr=chararraydescr)
jump(p0,i2)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref = vopt.vec_info.memory_refs[3]
@@ -436,7 +436,7 @@
i3 = raw_load(p0,i2,descr=chararraydescr)
jump(p0,i2)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref = vopt.vec_info.memory_refs[3]
@@ -452,7 +452,7 @@
i6 = raw_load(p0,i5,descr=chararraydescr)
jump(p0,i2)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref = vopt.vec_info.memory_refs[3]
@@ -473,7 +473,7 @@
i7 = raw_load(p0,i6,descr=chararraydescr)
jump(p0,i2)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref = vopt.vec_info.memory_refs[3]
@@ -494,7 +494,7 @@
i7 = raw_load(p0,i6,descr=chararraydescr)
jump(p0,i2)
"""
- vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+ vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
vopt.build_dependency_graph()
vopt.find_adjacent_memory_refs()
mref = vopt.vec_info.memory_refs[3]
@@ -511,7 +511,7 @@
jump()
"""
loop = self.parse_loop(ops)
- vopt = self.vec_optimizer_unrolled(loop,2)
+ vopt = self.vec_optimizer_unrolled(loop,1)
self.assert_equal(loop, self.parse_loop(ops))
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -59,6 +59,9 @@
rename_map[la] = ja
def unroll_loop_iterations(self, loop, unroll_factor):
+ """ Unroll the loop X times. Unroll_factor of 0 = no unrolling,
+ 1 once, ...
+ """
op_count = len(loop.operations)
label_op = loop.operations[0]
@@ -81,7 +84,7 @@
jump_op_args = jump_op.getarglist()
rename_map = {}
- for i in range(2, unroll_factor+1):
+ for i in range(0, unroll_factor):
# for each unrolling factor the boxes are renamed.
self._rename_arguments_ssa(rename_map, label_op_args, jump_op_args)
for op in operations:
@@ -102,6 +105,19 @@
except KeyError:
pass
+
+ #if copied_op.is_guard():
+ # self.store_final_boxes_in_guard(copied_op, [])
+ #failargs = copied_op.getfailargs()
+ #if failargs:
+ # for i, arg in enumerate(failargs):
+ # try:
+ # value = rename_map[arg]
+ # print(type(copied_op))
+ # copied_op.setfailarg(i, value)
+ # except KeyError:
+ # pass
+
self.emit_unrolled_operation(copied_op)
self.vec_info.inspect_operation(copied_op)
@@ -129,17 +145,15 @@
for i,op in enumerate(loop.operations):
self.vec_info.inspect_operation(op)
- def get_estimated_unroll_factor(self, force_reg_bytes = -1):
- """ force_reg_bytes used for testing """
+ def get_unroll_count(self):
+ """ This is an estimated number of further unrolls """
# this optimization is not opaque, and needs info about the CPU
byte_count = self.vec_info.smallest_type_bytes
if byte_count == 0:
return 0
simd_vec_reg_bytes = 16 # TODO get from cpu
- if force_reg_bytes > 0:
- simd_vec_reg_bytes = force_reg_bytes
unroll_factor = simd_vec_reg_bytes // byte_count
- return unroll_factor
+ return unroll_factor-1 # it is already unrolled once
def propagate_all_forward(self):
@@ -152,7 +166,7 @@
# stop, there is no chance to vectorize this trace
raise NotAVectorizeableLoop()
- unroll_factor = self.get_estimated_unroll_factor()
+ unroll_factor = self.get_unroll_count()
self.unroll_loop_iterations(self.loop, unroll_factor)
diff --git a/rpython/jit/metainterp/test/test_vectorize.py
b/rpython/jit/metainterp/test/test_vectorize.py
--- a/rpython/jit/metainterp/test/test_vectorize.py
+++ b/rpython/jit/metainterp/test/test_vectorize.py
@@ -23,23 +23,25 @@
def test_simple_raw_load(self):
myjitdriver = JitDriver(greens = [],
- reds = ['i', 'res', 'va'],
+ reds = ['i', 'res', 'va','c'],
vectorize=True)
- def f():
- res = r_uint(0)
- va = alloc_raw_storage(32, zero=True)
- for i in range(32):
- raw_storage_setitem(va, i, rffi.cast(rffi.UCHAR,i))
+ def f(c):
+ res = 0
+ va = alloc_raw_storage(c*rffi.sizeof(rffi.SIGNED), zero=True)
+ for i in range(c):
+ raw_storage_setitem(va, i*rffi.sizeof(rffi.SIGNED),
+ rffi.cast(rffi.SIGNED,i))
i = 0
- while i < 32:
- myjitdriver.can_enter_jit(i=i, res=res, va=va)
- myjitdriver.jit_merge_point(i=i, res=res, va=va)
- res += raw_storage_getitem(rffi.UCHAR,va,i)
+ while i < c:
+ myjitdriver.can_enter_jit(i=i, res=res, va=va, c=c)
+ myjitdriver.jit_merge_point(i=i, res=res, va=va, c=c)
+ res +=
raw_storage_getitem(rffi.SIGNED,va,i*rffi.sizeof(rffi.SIGNED))
i += 1
free_raw_storage(va)
return res
- res = self.meta_interp(f, [])
- assert res == sum(range(32))
+ i = 32
+ res = self.meta_interp(f, [i])
+ assert res == sum(range(i))
self.check_trace_count(1)
class TestLLtype(VectorizeTest, LLJitMixin):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit