Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77428:46704e37a322 Date: 2015-05-20 10:39 +0200 http://bitbucket.org/pypy/pypy/changeset/46704e37a322/
Log: turned off vectorize opt for all jit drivers and enabled vectorize opt in micronumpy loop jit drivers resolved a problem in a test case general exception clause printing debug information when vecopt fails diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -16,7 +16,7 @@ call2_driver = jit.JitDriver( name='numpy_call2', greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'], - reds='auto') + reds='auto', vectorize=True) def call2(space, shape, func, calc_dtype, res_dtype, w_lhs, w_rhs, out): # handle array_priority @@ -81,7 +81,7 @@ call1_driver = jit.JitDriver( name='numpy_call1', greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'], - reds='auto') + reds='auto', vectorize=True) def call1(space, shape, func, calc_dtype, res_dtype, w_obj, out): obj_iter, obj_state = w_obj.create_iter(shape) @@ -103,7 +103,7 @@ call_many_to_one_driver = jit.JitDriver( name='numpy_call_many_to_one', greens=['shapelen', 'nin', 'func', 'res_dtype'], - reds='auto') + reds='auto', vectorize=True) def call_many_to_one(space, shape, func, res_dtype, in_args, out): # out must hav been built. func needs no calc_type, is usually an @@ -137,7 +137,7 @@ call_many_to_many_driver = jit.JitDriver( name='numpy_call_many_to_many', greens=['shapelen', 'nin', 'nout', 'func', 'res_dtype'], - reds='auto') + reds='auto', vectorize=True) def call_many_to_many(space, shape, func, res_dtype, in_args, out_args): # out must hav been built. func needs no calc_type, is usually an @@ -184,7 +184,7 @@ setslice_driver = jit.JitDriver(name='numpy_setslice', greens = ['shapelen', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def setslice(space, shape, target, source): if not shape: @@ -221,7 +221,7 @@ reduce_driver = jit.JitDriver(name='numpy_reduce', greens = ['shapelen', 'func', 'done_func', 'calc_dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def compute_reduce(space, obj, calc_dtype, func, done_func, identity): obj_iter, obj_state = obj.create_iter() @@ -244,7 +244,7 @@ reduce_cum_driver = jit.JitDriver(name='numpy_reduce_cum_driver', greens = ['shapelen', 'func', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def compute_reduce_cumulative(space, obj, out, calc_dtype, func, identity): obj_iter, obj_state = obj.create_iter() @@ -282,7 +282,7 @@ where_driver = jit.JitDriver(name='numpy_where', greens = ['shapelen', 'dtype', 'arr_dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def where(space, out, shape, arr, x, y, dtype): out_iter, out_state = out.create_iter(shape) @@ -325,7 +325,7 @@ axis_reduce_driver = jit.JitDriver(name='numpy_axis_reduce', greens=['shapelen', 'func', 'dtype'], - reds='auto') + reds='auto', vectorize=True) def do_axis_reduce(space, shape, func, arr, dtype, axis, out, identity, cumulative, temp): @@ -369,7 +369,7 @@ def _new_argmin_argmax(op_name): arg_driver = jit.JitDriver(name='numpy_' + op_name, greens = ['shapelen', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def argmin_argmax(arr): result = 0 @@ -395,7 +395,7 @@ dot_driver = jit.JitDriver(name = 'numpy_dot', greens = ['dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def multidim_dot(space, left, right, result, dtype, right_critical_dim): ''' assumes left, right are concrete arrays @@ -449,7 +449,7 @@ count_all_true_driver = jit.JitDriver(name = 'numpy_count', greens = ['shapelen', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def count_all_true_concrete(impl): s = 0 @@ -470,7 +470,7 @@ nonzero_driver = jit.JitDriver(name = 'numpy_nonzero', greens = ['shapelen', 'dims', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def nonzero(res, arr, box): res_iter, res_state = res.create_iter() @@ -492,7 +492,7 @@ getitem_filter_driver = jit.JitDriver(name = 'numpy_getitem_bool', greens = ['shapelen', 'arr_dtype', 'index_dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def getitem_filter(res, arr, index): res_iter, res_state = res.create_iter() @@ -520,7 +520,7 @@ setitem_filter_driver = jit.JitDriver(name = 'numpy_setitem_bool', greens = ['shapelen', 'arr_dtype', 'index_dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def setitem_filter(space, arr, index, value): arr_iter, arr_state = arr.create_iter() @@ -563,7 +563,7 @@ flatiter_setitem_driver = jit.JitDriver(name = 'numpy_flatiter_setitem', greens = ['dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def flatiter_setitem(space, dtype, val, arr_iter, arr_state, step, length): val_iter, val_state = val.create_iter() @@ -583,7 +583,7 @@ fromstring_driver = jit.JitDriver(name = 'numpy_fromstring', greens = ['itemsize', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def fromstring_loop(space, a, dtype, itemsize, s): i = 0 @@ -617,7 +617,7 @@ getitem_int_driver = jit.JitDriver(name = 'numpy_getitem_int', greens = ['shapelen', 'indexlen', 'prefixlen', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def getitem_array_int(space, arr, res, iter_shape, indexes_w, prefix_w): shapelen = len(iter_shape) @@ -645,7 +645,7 @@ setitem_int_driver = jit.JitDriver(name = 'numpy_setitem_int', greens = ['shapelen', 'indexlen', 'prefixlen', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def setitem_array_int(space, arr, iter_shape, indexes_w, val_arr, prefix_w): @@ -675,7 +675,7 @@ byteswap_driver = jit.JitDriver(name='numpy_byteswap_driver', greens = ['dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def byteswap(from_, to): dtype = from_.dtype @@ -690,7 +690,7 @@ choose_driver = jit.JitDriver(name='numpy_choose_driver', greens = ['shapelen', 'mode', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def choose(space, arr, choices, shape, dtype, out, mode): shapelen = len(shape) @@ -724,7 +724,7 @@ clip_driver = jit.JitDriver(name='numpy_clip_driver', greens = ['shapelen', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def clip(space, arr, shape, min, max, out): assert min or max @@ -759,7 +759,7 @@ round_driver = jit.JitDriver(name='numpy_round_driver', greens = ['shapelen', 'dtype'], - reds = 'auto') + reds = 'auto', vectorize=True) def round(space, arr, dtype, shape, decimals, out): arr_iter, arr_state = arr.create_iter(shape) @@ -775,7 +775,7 @@ diagonal_simple_driver = jit.JitDriver(name='numpy_diagonal_simple_driver', greens = ['axis1', 'axis2'], - reds = 'auto') + reds = 'auto', vectorize=True) def diagonal_simple(space, arr, out, offset, axis1, axis2, size): out_iter, out_state = out.create_iter() @@ -819,7 +819,7 @@ def _new_binsearch(side, op_name): binsearch_driver = jit.JitDriver(name='numpy_binsearch_' + side, greens=['dtype'], - reds='auto') + reds='auto', vectorize=True) def binsearch(space, arr, key, ret): assert len(arr.get_shape()) == 1 diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -140,7 +140,9 @@ tgt_op.setfailargs(op.getfailargs()) def edge_to(self, to, arg=None, label=None): - assert self != to + if self is to: + print "debug: tried to put edge from: ", self.op, "to:", to.op + return dep = self.depends_on(to) if not dep: #if force or self.independent(idx_from, idx_to): @@ -818,34 +820,6 @@ .format(name='INT_SUB', op='-')).compile() del additive_func_source - #def operation_INT_ADD(self, op, node): - # box_r = op.result - # if not box_r: - # return - # box_a0 = op.getarg(0) - # box_a1 = op.getarg(1) - # if self.is_const_integral(box_a0) and self.is_const_integral(box_a1): - # idx_ref = IndexVar(box_r) - # idx_ref.constant = box_a0.getint() + box_a1.getint() - # self.index_vars[box_r] = idx_ref - # elif self.is_const_integral(box_a0): - # idx_ref = self.get_or_create(box_a1) - # idx_ref = idx_ref.clone() - # idx_ref.constant {op}= box_a0.getint() - # self.index_vars[box_r] = idx_ref - # elif self.is_const_integral(box_a1): - # idx_ref = self.get_or_create(box_a0) - # idx_ref = idx_ref.clone() - # idx_ref.add_const(box_a1.getint()) - # self.index_vars[box_r] = idx_ref - # else: - # # both variables are boxes - # if box_a1 in self.invariant_vars: - # idx_var = self.get_or_create(box_a0) - # idx_var = idx_var.clone() - # idx_var.set_next_nonconst_mod(BoxedIndexVar(box_a1, op.getopnum(), box_a0)) - # self.index_vars[box_r] = idx_var - multiplicative_func_source = """ def operation_{name}(self, op, node): box_r = op.result diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py @@ -63,6 +63,7 @@ opt.schedule() opt.unroll_loop_iterations(loop, unroll_factor) opt.loop.operations = opt.get_newoperations() + self.debug_print_operations(opt.loop) opt.clear_newoperations() opt.build_dependency_graph() self.last_graph = opt.dependency_graph @@ -1151,7 +1152,6 @@ i10 = raw_load(p0, i0, descr=singlefloatarraydescr) i1 = int_add(i0, 4) i11 = raw_load(p1, i1, descr=singlefloatarraydescr) - i2 = int_add(i1, 4) f1 = cast_singlefloat_to_float(i10) f2 = cast_singlefloat_to_float(i11) f3 = float_add(f1, f2) @@ -1160,7 +1160,7 @@ i5 = int_add(i4, 4) i186 = int_lt(i5, 100) guard_false(i186) [] - jump(p0,p1,p2,i2,i5) + jump(p0,p1,p2,i1,i5) """ opt = """ [p0, p1, p2, i0, i4] @@ -1168,33 +1168,31 @@ i5 = int_add(i4, 4) i1 = int_add(i0, 4) i186 = int_lt(i5, 100) - i2 = int_add(i0, 8) + i189 = int_add(i0, 8) i187 = int_add(i4, 8) - i191 = int_add(i0, 12) - i190 = int_lt(i187, 100) - i192 = int_add(i0, 16) - i188 = int_add(i4, 12) - i200 = int_add(i0, 20) - i199 = int_lt(i188, 100) - i201 = int_add(i0, 24) - i189 = int_add(i4, 16) - i209 = int_add(i0, 28) - i208 = int_lt(i189, 100) - guard_false(i208) [] - i210 = int_add(i0, 32) - v217 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr) - v218 = vec_cast_singlefloat_to_float(v217, 0, 2) - v219 = vec_cast_singlefloat_to_float(v217, 2, 2) - v220 = vec_raw_load(p1, i1, 4, descr=singlefloatarraydescr) - v221 = vec_cast_singlefloat_to_float(v220, 0, 2) - v222 = vec_cast_singlefloat_to_float(v220, 2, 2) - v223 = vec_float_add(v218, v221, 2) - v224 = vec_float_add(v219, v222, 2) - v225 = vec_cast_float_to_singlefloat(v223, 2) - v226 = vec_cast_float_to_singlefloat(v224, 2) - v227 = vec_float_pack(v225, v226, 2, 2) - vec_raw_store(p2, i4, v227, 4, descr=singlefloatarraydescr) - jump(p0, p1, p2, i210, i189) + i198 = int_add(i0, 12) + i188 = int_lt(i187, 100) + i207 = int_add(i0, 16) + i196 = int_add(i4, 12) + i197 = int_lt(i196, 100) + i205 = int_add(i4, 16) + i206 = int_lt(i205, 100) + guard_false(i206) [] + v228 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr) + v229 = vec_cast_singlefloat_to_float(v228, 2) + v230 = vec_int_unpack(v228, 2, 2) + v231 = vec_cast_singlefloat_to_float(v230, 2) + v232 = vec_raw_load(p1, i1, 4, descr=singlefloatarraydescr) + v233 = vec_cast_singlefloat_to_float(v232, 2) + v234 = vec_int_unpack(v232, 2, 2) + v235 = vec_cast_singlefloat_to_float(v234, 2) + v236 = vec_float_add(v229, v233, 2) + v237 = vec_float_add(v231, v235, 2) + v238 = vec_cast_float_to_singlefloat(v236, 2) + v239 = vec_cast_float_to_singlefloat(v237, 2) + v240 = vec_float_pack(v238, v239, 2, 2) + vec_raw_store(p2, i4, v240, 4, descr=singlefloatarraydescr) + jump(p0, p1, p2, i207, i205) """ vopt = self.vectorize(self.parse_loop(ops)) self.assert_equal(vopt.loop, self.parse_loop(opt)) diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -50,6 +50,18 @@ except NotAVectorizeableLoop: # vectorization is not possible, propagate only normal optimizations loop.operations = orig_ops + except Exception as e: + loop.operations = orig_ops + print 'loop with %d instructions failed! ' % (len(orig_ops),) + print('--- loop instr numbered ---') + for i,op in enumerate(loop.operations): + print "[",i,"]",op, + if op.is_guard(): + print op.getfailargs() + else: + print "" + #import traceback + #traceback.print_exc() class VectorizingOptimizer(Optimizer): """ Try to unroll the loop and find instructions to group """ diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py --- a/rpython/rlib/jit.py +++ b/rpython/rlib/jit.py @@ -552,7 +552,7 @@ 'enable_opts': 'INTERNAL USE ONLY (MAY NOT WORK OR LEAD TO CRASHES): ' 'optimizations to enable, or all = %s' % ENABLE_ALL_OPTS, 'max_unroll_recursion': 'how many levels deep to unroll a recursive function', - 'vectorize': 'turn on the vectorization optimization. default off. requirement: (sse2)', + 'vectorize': 'turn on the vectorization optimization. requires sse4.1', } PARAMETERS = {'threshold': 1039, # just above 1024, prime @@ -590,7 +590,7 @@ get_jitcell_at=None, set_jitcell_at=None, get_printable_location=None, confirm_enter_jit=None, can_never_inline=None, should_unroll_one_iteration=None, - name='jitdriver', check_untranslated=True, vectorize=True, + name='jitdriver', check_untranslated=True, vectorize=False, get_unique_id=None): if greens is not None: self.greens = greens _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit