Author: Richard Plangger <planri...@gmail.com> Branch: vecopt-merge Changeset: r79737:664117c201a8 Date: 2015-09-21 11:40 +0200 http://bitbucket.org/pypy/pypy/changeset/664117c201a8/
Log: vecopt.py tests passing again, now let's finally head to the assembler diff --git a/rpython/jit/metainterp/optimizeopt/guard.py b/rpython/jit/metainterp/optimizeopt/guard.py --- a/rpython/jit/metainterp/optimizeopt/guard.py +++ b/rpython/jit/metainterp/optimizeopt/guard.py @@ -91,7 +91,7 @@ guard.setdescr(descr.clone()) guard.setarg(0, box_result) label = loop.find_first(rop.LABEL) - guard.setfailargs(label.getarglist()) + guard.setfailargs(label.getarglist()[:]) opt.emit_operation(guard) return guard @@ -120,7 +120,7 @@ descr = myop.getdescr() descr.copy_all_attributes_from(other.op.getdescr()) myop.rd_frame_info_list = otherop.rd_frame_info_list - myop.setfailargs(otherop.getfailargs()) + myop.setfailargs(otherop.getfailargs()[:]) myop.rd_snapshot = otherop.rd_snapshot def emit_varops(self, opt, var, old_arg): @@ -140,6 +140,7 @@ opt.emit_operation(cmp_op) # emit that actual guard guard = ResOperation(self.op.getopnum(), [cmp_op], self.op.getdescr()) + guard.setfailargs(self.op.getfailargs()[:]) opt.emit_operation(guard) self.setindex(opt.operation_position()-1) self.setoperation(guard) @@ -173,6 +174,7 @@ self.strength_reduced = 0 # how many guards could be removed? self.strongest_guards = {} self.guards = {} + self.delayed = {} def collect_guard_information(self, loop): operations = loop.operations @@ -271,8 +273,30 @@ def emit_operation(self, op): self.renamer.rename(op) + #if op.is_always_pure(): + # self.delay(op) + # return + #self.emit_delayed_for(op) + #if not op.is_always_pure(): self._newoperations.append(op) + def delay(self, op): + self.delayed[op] = None + print "delayed", op + + def emit_delayed_for(self, op): + if op.is_inputarg(): + return + additional = [] + if op.is_guard(): + additional = op.getfailargs() + for arg in op.getarglist() + additional: + if arg in self.delayed: + del self.delayed[arg] + self.emit_delayed_for(arg) + self._newoperations.append(op) + + def operation_position(self): return len(self._newoperations) diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -5,6 +5,7 @@ from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, MemoryRef, Node, IndexVar) from rpython.jit.metainterp.optimizeopt.renamer import Renamer +from rpython.jit.metainterp.resume import AccumInfo from rpython.rlib.objectmodel import we_are_translated from rpython.jit.metainterp.jitexc import NotAProfitableLoop from rpython.rlib.objectmodel import specialize, always_inline @@ -23,14 +24,16 @@ def post_schedule(self): loop = self.graph.loop self.renamer.rename(loop.jump) + self.ensure_args_unpacked(loop.jump) loop.operations = self.oplist loop.prefix = self.invariant_oplist - if len(self.invariant_vector_vars) > 0: - # TODO, accum? + if len(self.invariant_vector_vars) + len(self.invariant_oplist) > 0: args = loop.label.getarglist_copy() + self.invariant_vector_vars opnum = loop.label.getopnum() # TODO descr? - loop.prefix_label = loop.label.copy_and_change(opnum, args) + op = loop.label.copy_and_change(opnum, args) + self.renamer.rename(op) + loop.prefix_label = op def profitable(self): return True @@ -172,25 +175,22 @@ def any_size(self): return self.bytesize == TypeRestrict.ANY_SIZE + @always_inline + def any_count(self): + return self.count == TypeRestrict.ANY_COUNT + def check(self, value): assert value.datatype != '\x00' if self.type != TypeRestrict.ANY_TYPE: - if self.type != value.datatype: - assert 0, "type mismatch" - + assert self.type == value.datatype assert value.bytesize > 0 if not self.any_size(): - if self.bytesize != value.bytesize: - assert 0, "size mismatch" - + assert self.bytesize == value.bytesize assert value.count > 0 if self.count != TypeRestrict.ANY_COUNT: - if self.count != value.count: - assert 0, "count mismatch" - + assert value.count >= self.count if self.sign != TypeRestrict.ANY_SIGN: - if bool(self.sign) != value.sign: - assert 0, "sign mismatch" + assert bool(self.sign) == value.sign def max_input_count(self, count): """ How many """ @@ -205,8 +205,7 @@ TR_ANY_INTEGER = TypeRestrict(INT) TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2) TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2) - TR_LONG = TypeRestrict(INT, 8, 2) - TR_INT_2 = TypeRestrict(INT, 4, 2) + TR_INT32_2 = TypeRestrict(INT, 4, 2) # note that the following definition is x86 arch specific MAPPING = { @@ -237,9 +236,10 @@ rop.VEC_INT_SIGNEXT: [TR_ANY_INTEGER], rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: [TR_DOUBLE_2], - rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: [TR_FLOAT_2], + # weird but the trace will store single floats in int boxes + rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: [TR_INT32_2], rop.VEC_CAST_FLOAT_TO_INT: [TR_DOUBLE_2], - rop.VEC_CAST_INT_TO_FLOAT: [TR_INT_2], + rop.VEC_CAST_INT_TO_FLOAT: [TR_INT32_2], rop.VEC_FLOAT_EQ: [TR_ANY_FLOAT,TR_ANY_FLOAT], rop.VEC_FLOAT_NE: [TR_ANY_FLOAT,TR_ANY_FLOAT], @@ -264,11 +264,6 @@ assert isinstance(vecop, GuardResOp) vecop.setfailargs(op.getfailargs()) vecop.rd_snapshot = op.rd_snapshot - if pack.is_accumulating(): - for i,node in enumerate(pack.operations): - op = node.getoperation() - state.accumulation[op] = pack - def prepare_arguments(state, pack, args): # Transforming one argument to a vector box argument @@ -344,6 +339,12 @@ @always_inline def position_values(state, restrict, pack, args, index, position): + arg = args[index] + newcount, count = restrict.count, arg.count + if not restrict.any_count() and newcount != count: + if position == 0: + pass + pass if position != 0: # The vector box is at a position != 0 but it # is required to be at position 0. Unpack it! @@ -527,18 +528,17 @@ #self.appendedvar_pos_arg_count = len(sched_data.invariant_vector_vars) failargs = op.getfailargs() descr = op.getdescr() + # note: stitching a guard must resemble the order of the label + # otherwise a wrong mapping is handed to the register allocator for i,arg in enumerate(failargs): if arg is None: continue accum = self.accumulation.get(arg, None) if accum: assert isinstance(accum, AccumPack) - accum.attach_accum_info(descr.rd_accum_list, i) - - def post_schedule(self): - loop = self.graph.loop - self.ensure_args_unpacked(loop.jump) - SchedulerState.post_schedule(self) + accum.attach_accum_info(descr, i, arg) + seed = accum.getseed() + failargs[i] = self.renamer.rename_map.get(seed, seed) def profitable(self): return self.costmodel.profitable() @@ -602,6 +602,8 @@ if var: if var in self.invariant_vector_vars: return arg + if arg in self.accumulation: + return var args = [var, ConstInt(pos), ConstInt(1)] vecop = OpHelpers.create_vec_unpack(var.type, args, var.bytesize, var.signed, 1) @@ -757,12 +759,12 @@ vector register. """ before_count = len(packlist) - #print "splitting pack", self + print "splitting pack", self pack = self while pack.pack_load(vec_reg_size) > Pack.FULL: pack.clear() oplist, newoplist = pack.slice_operations(vec_reg_size) - #print " split of %dx, left: %d" % (len(oplist), len(newoplist)) + print " split of %dx, left: %d" % (len(oplist), len(newoplist)) pack.operations = oplist pack.update_pack_of_nodes() if not pack.leftmost().is_typecast(): @@ -778,7 +780,7 @@ newpack.clear() newpack.operations = [] break - #print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]])) + print " => %dx packs out of %d operations" % (-before_count + len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]])) pack.update_pack_of_nodes() def slice_operations(self, vec_reg_size): @@ -864,9 +866,8 @@ return 0 def attach_accum_info(self, descr, position, scalar): - descr.rd_accum_list = AccumInfo(descr.rd_accum_list, - position, self.operator, - self.scalar, None) + descr.rd_accum_list = AccumInfo(descr.rd_accum_list, position, self.operator, + scalar, None) def is_accumulating(self): return True diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py --- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py @@ -1085,7 +1085,7 @@ f2 = float_add(f0, f1) i1 = int_add(i0, 8) i2 = int_lt(i1, 100) - guard_false(i2) [p0, i0, f2] + guard_true(i2) [p0, i0, f2] jump(p0, i1, f2) """ trace_opt = """ @@ -1094,9 +1094,11 @@ v7[2xf64] = vec_int_xor(v6[0xf64], v6[0xf64]) v2[2xf64] = vec_pack_f(v7[2xf64], f0, 0, 1) label(p0, i0, v2[2xf64]) + i100 = int_add(i0, 8) + i200 = int_lt(i100, 100) i1 = int_add(i0, 16) i2 = int_lt(i1, 100) - guard_false(i2) [p0, i0, v2[2xf64]] + guard_true(i2) [p0, i0, v2[2xf64]] i10 = int_add(i0, 16) i20 = int_lt(i10, 100) v1[2xf64] = vec_raw_load_f(p0, i0, descr=floatarraydescr) @@ -1108,7 +1110,7 @@ self.assert_equal(loop, self.parse_loop(trace_opt)) def test_element_f45_in_guard_failargs(self): - ops = """ + trace = self.parse_loop(""" [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18] f45 = raw_load_f(i21, i44, descr=floatarraydescr) guard_not_invalidated() [p38, p12, p9, p14, f45, p39, i37, i44, f35, i40, p42, i43, None, i28, p36, i41] @@ -1122,33 +1124,33 @@ i52 = int_ge(i50, i18) guard_false(i52) [p38, p12, p9, p14, i48, i46, f47, i51, i50, f45, p39, None, None, None, i40, p42, i43, None, None, p36, None] jump(p36, i50, p9, i51, p14, f45, p12, p38, f47, p39, i40, i48, p42, i43, i46, i21, i4, i0, i18) - """ - opt = """ + """) + trace_opt = self.parse_loop(""" [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18] - guard_not_invalidated() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, i43, f34, i28, p36, i41] + guard_not_invalidated() [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18] i50 = int_add(i28, 1) - i48 = int_add(i41, 8) - i51 = int_add(i37, 8) - i54 = int_add(i41, 16) - i46 = int_add(i44, 8) - i56 = int_add(i37, 16) - i52 = int_ge(i50, i18) - i637 = int_add(i28, 2) - i638 = int_ge(i637, i18) + i20 = int_ge(i50, i18) + i54 = int_add(i28, 2) + i638 = int_ge(i54, i18) guard_false(i638) [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18] + i12 = int_add(i44, 8) + i56 = int_add(i41, 8) + i46 = int_add(i37, 8) + i47 = int_add(i28, 2) + i52 = int_ge(i47, i18) i55 = int_add(i44, 16) - i629 = int_add(i28, 2) - i57 = int_ge(i637, i18) - v61 = vec_raw_load_f(i21, i44, 2, descr=floatarraydescr) - v62 = vec_raw_load_f(i4, i41, 2, descr=floatarraydescr) - v63 = vec_float_add(v61, v62) + i629 = int_add(i41, 16) + i637 = int_add(i37, 16) + v61[2xf64] = vec_raw_load_f(i21, i44, descr=floatarraydescr) + v62[2xf64] = vec_raw_load_f(i4, i41, descr=floatarraydescr) + v63[2xf64] = vec_float_add(v61, v62) vec_raw_store(i0, i37, v63, descr=floatarraydescr) - f100 = vec_float_unpack(v61, 1, 1) - f101 = vec_float_unpack(v62, 1, 1) + f100 = vec_unpack_f(v61, 1, 1) + f101 = vec_unpack_f(v62, 1, 1) jump(p36, i637, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18) - """ - vopt = self.vectorize(self.parse_loop(ops)) - self.assert_equal(vopt.loop, self.parse_loop(opt)) + """) + vopt = self.vectorize(trace) + self.assert_equal(trace, trace_opt) def test_shrink_vector_size(self): ops = """ @@ -1187,7 +1189,7 @@ self.assert_equal(loop, self.parse_loop(opt)) def test_castup_arith_castdown(self): - ops = """ + trace = self.parse_loop(""" [p0,p1,p2,i0,i4] i10 = raw_load_i(p0, i0, descr=float32arraydescr) i1 = int_add(i0, 4) @@ -1201,76 +1203,57 @@ i186 = int_lt(i5, 100) guard_true(i186) [] jump(p0,p1,p2,i1,i5) - """ - opt = """ + """) + trace_opt = self.parse_loop(""" [p0, p1, p2, i0, i4] i5 = int_add(i4, 4) - i1 = int_add(i0, 4) i186 = int_lt(i5, 100) i500 = int_add(i4, 16) i501 = int_lt(i500, 100) guard_true(i501) [p0, p1, p2, i0, i4] - i189 = int_add(i0, 8) + i189 = int_add(i0, 4) i187 = int_add(i4, 8) - i198 = int_add(i0, 12) i188 = int_lt(i187, 100) - i207 = int_add(i0, 16) + i207 = int_add(i0, 8) i196 = int_add(i4, 12) i197 = int_lt(i196, 100) - i205 = int_add(i4, 16) - i206 = int_lt(i205, 100) - v228 = vec_raw_load_i(p0, i0, 4, descr=float32arraydescr) - v229 = vec_cast_singlefloat_to_float(v228) - v230 = vec_int_unpack(v228, 2, 2) + i205 = int_add(i0, 12) + i400 = int_add(i4, 16) + i401= int_lt(i400, 100) + i402 = int_add(i0, 16) + v228[4xi32] = vec_raw_load_i(p0, i0, descr=float32arraydescr) + v229[2xf64] = vec_cast_singlefloat_to_float(v228) + v230 = vec_unpack_i(v228, 2, 2) v231 = vec_cast_singlefloat_to_float(v230) - v232 = vec_raw_load_i(p1, i1, 4, descr=float32arraydescr) + v232 = vec_raw_load_i(p1, i189, descr=float32arraydescr) v233 = vec_cast_singlefloat_to_float(v232) - v234 = vec_int_unpack(v232, 2, 2) + v236 = vec_float_add(v229, v233) + v238 = vec_cast_float_to_singlefloat(v236) + v234 = vec_unpack_i(v232, 2, 2) v235 = vec_cast_singlefloat_to_float(v234) v237 = vec_float_add(v231, v235) v239 = vec_cast_float_to_singlefloat(v237) - v236 = vec_float_add(v229, v233) - v238 = vec_cast_float_to_singlefloat(v236) - v240 = vec_pack_f(v238, v239, 2, 2) + v240 = vec_pack_i(v238, v239, 2, 2) vec_raw_store(p2, i4, v240, descr=float32arraydescr) jump(p0, p1, p2, i207, i500) - """ - vopt = self.vectorize(self.parse_loop(ops)) - self.assert_equal(vopt.loop, self.parse_loop(opt)) - - def test_truediv_abs_neg_float(self): - ops = """ - [f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19] - f20 = raw_load(i16, i12, descr=floatarraydescr) - guard_not_invalidated() [p8, p7, p5, p4, p2, f20, None, i12, i11, p10, i15, i14, p13] - i23 = int_add(i12, 8) - f24 = float_truediv(f20, f17) - f25 = float_abs(f20) - f26 = float_neg(f20) - raw_store(i18, i15, f24, descr=floatarraydescr) - i26 = int_add(i14, 1) - i28 = int_add(i15, 8) - i29 = int_ge(i26, i19) - guard_false(i29) [p8, p7, p5, p4, p2, f20, i23, i28, None, p13] - jump(f20, p10, i11, p4, i23, p2, p5, p13, i26, p7, i28, p8, i16, f17, i18, i19) - """ - opt = self.vectorize(self.parse_loop(ops)) - self.debug_print_operations(opt.loop) + """) + vopt = self.vectorize(trace) + self.assert_equal(trace, trace_opt) def test_axis_sum(self): trace = """ [i1, p10, i11, p8, i12, p3, p4, p13, i14, i15, p6, p9, i16, i17, i18, i19, i20, i21, i22, i23] - f24 = raw_load(i16, i12, descr=floatarraydescr) + f24 = raw_load_f(i16, i12, descr=floatarraydescr) guard_not_invalidated() [i1, p9, p8, p6, p4, p3, f24, i11, i15, p13, i12, i14, p10] i26 = int_add(i12, 8) - i27 = getarrayitem_gc(p13, i1, descr=floatarraydescr) + i27 = getarrayitem_gc_f(p13, i1, descr=floatarraydescr) i28 = int_is_zero(i27) guard_false(i28) [i1, p9, p8, p6, p4, p3, f24, i26, i11, i15, p13, None, i14, p10] - f30 = raw_load(i17, i15, descr=floatarraydescr) + f30 = raw_load_f(i17, i15, descr=floatarraydescr) f31 = float_add(f30, f24) raw_store(i18, i15, f31, descr=floatarraydescr) i33 = int_add(i14, 1) - i34 = getarrayitem_gc(p13, i19, descr=floatarraydescr) + i34 = getarrayitem_gc_f(p13, i19, descr=floatarraydescr) i35 = int_lt(i34, i20) guard_true(i35) [i1, p9, p8, p6, p4, p3, i21, i34, i15, i33, i19, p13, f31, None, i26, i11, None, None, None, i14, p10] i37 = int_add(i34, 1) @@ -1287,7 +1270,8 @@ pass def test_cast_1(self): - trace = """ + # TODO + trace = self.parse_loop(""" [i9, i10, p2, p11, i12, i13, p4, p5, p14, i15, p8, i16, p17, i18, i19, i20, i21, i22, i23] i24 = raw_load_i(i20, i16, descr=float32arraydescr) guard_not_invalidated() [p8, p5, p4, p2, i24, p17, i13, i12, i10, i19, p14, p11, i18, i15, i16, None] @@ -1304,11 +1288,33 @@ i39 = int_ge(i36, i23) guard_false(i39) [p8, p5, p4, p2, i27, i28, i30, i24, i38, i36, p17, None, None, None, None, p14, p11, i18, i15, None, None] jump(i24, i28, p2, p11, i36, i38, p4, p5, p14, i15, p8, i27, p17, i18, i30, i20, i21, i22, i23) - """ - opt = self.vectorize(self.parse_loop(trace)) - self.debug_print_operations(opt.loop) + """) + opt = self.vectorize(trace) + self.debug_print_operations(trace) + + def test_truediv_abs_neg_float(self): + # TODO + trace = self.parse_loop(""" + [f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19] + f20 = raw_load_f(i16, i12, descr=floatarraydescr) + guard_not_invalidated() [p8, p7, p5, p4, p2, f20, None, i12, i11, p10, i15, i14, p13] + i23 = int_add(i12, 8) + f24 = float_truediv(f20, f17) + f25 = float_abs(f20) + f26 = float_neg(f20) + raw_store(i18, i15, f24, descr=floatarraydescr) + i26 = int_add(i14, 1) + i28 = int_add(i15, 8) + i29 = int_ge(i26, i19) + guard_false(i29) [p8, p7, p5, p4, p2, f20, i23, i28, None, p13] + jump(f20, p10, i11, p4, i23, p2, p5, p13, i26, p7, i28, p8, i16, f17, i18, i19) + """) + opt = self.vectorize(trace) + self.debug_print_operations(trace) + def test_all_guard(self): + # TODO trace = """ [p0, p3, i4, i5, i6, i7] f8 = raw_load_f(i6, i5, descr=floatarraydescr) @@ -1327,6 +1333,7 @@ self.debug_print_operations(loop) def test_max(self): + # TODO trace = """ [p3, i4, p2, i5, f6, i7, i8] f9 = raw_load_f(i7, i5, descr=floatarraydescr) diff --git a/rpython/jit/metainterp/optimizeopt/util.py b/rpython/jit/metainterp/optimizeopt/util.py --- a/rpython/jit/metainterp/optimizeopt/util.py +++ b/rpython/jit/metainterp/optimizeopt/util.py @@ -148,6 +148,7 @@ x = op1.getarg(i) y = op2.getarg(i) assert x.same_box(remap.get(y, y)) + assert x.same_shape(remap.get(y, y)) if op2 in remap: assert op1.same_box(remap[op2]) else: diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -36,7 +36,7 @@ class VectorLoop(object): def __init__(self, label, oplist, jump): self.label = label - self.inputargs = label.getarglist() + self.inputargs = label.getarglist_copy() self.prefix = [] self.prefix_label = None assert self.label.getopnum() == rop.LABEL @@ -160,15 +160,6 @@ self.has_two_labels = False def propagate_all_forward(self, info, loop): - #label = loop.label - #jump = loop.jump - #if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \ - # label.getopnum() != rop.LABEL: - # import pdb; pdb. set_trace() - # raise NotAVectorizeableLoop() - #if jump.numargs() != label.numargs(): - # import pdb; pdb. set_trace() - # raise NotAVectorizeableLoop() self.orig_label_args = loop.label.getarglist_copy() self.linear_find_smallest_type(loop) byte_count = self.smallest_type_bytes @@ -207,29 +198,6 @@ def unroll_loop_iterations(self, loop, unroll_count): """ Unroll the loop X times. unroll_count + 1 = unroll_factor """ numops = len(loop.operations) - # use the target token of the label - #target_token = label_op.getdescr() - #if not we_are_translated(): - # target_token.assumed_classes = {} - #if jump_op.getopnum() == rop.LABEL: - # jump_op = ResOperation(rop.JUMP, jump_op.getarglist(), target_token) - #else: - # jump_op = jump_op.clone() - # jump_op.setdescr(target_token) - #assert jump_op.is_final() - - #self.emit_unrolled_operation(label_op) - - #for i in range(0,numops): - # op = loop.operations[i].copy() - # if op.is_guard(): - # assert isinstance(op, GuardResOp) - # failargs = renamer.rename_failargs(op, clone=True) - # snapshot = renamer.rename_rd_snapshot(op.rd_snapshot, clone=True) - # op.setfailargs(failargs) - # op.rd_snapshot = snapshot - # operations.append(op) - # self.emit_unrolled_operation(op) renamer = Renamer() operations = loop.operations @@ -560,16 +528,12 @@ """ Marks this guard as an early exit! """ op = node.getoperation() assert isinstance(op, GuardResOp) - descr = None if op.getopnum() in (rop.GUARD_TRUE, rop.GUARD_FALSE): descr = CompileLoopVersionDescr() - else: - descr = ResumeAtLoopHeaderDescr() - if op.getdescr(): - descr.copy_all_attributes_from(op.getdescr()) - # - op.setdescr(descr) - op.setfailargs(loop.inputargs) + if op.getdescr(): + descr.copy_all_attributes_from(op.getdescr()) + op.setdescr(descr) + op.setfailargs(loop.label.getarglist_copy()) class CostModel(object): """ Utility to estimate the savings for the new trace loop. @@ -789,6 +753,9 @@ for pack in self.packs: if not pack.is_accumulating(): continue + for i,node in enumerate(pack.operations): + op = node.getoperation() + state.accumulation[op] = pack assert isinstance(pack, AccumPack) datatype = pack.getdatatype() bytesize = pack.getbytesize() @@ -818,6 +785,7 @@ state.setvector_of_box(pack.getseed(), 0, vecop) # prevent it from expansion state.renamer.start_renaming(pack.getseed(), vecop) + def split_overloaded_packs(self): newpacks = [] for i,pack in enumerate(self.packs): diff --git a/rpython/jit/metainterp/optimizeopt/version.py b/rpython/jit/metainterp/optimizeopt/version.py --- a/rpython/jit/metainterp/optimizeopt/version.py +++ b/rpython/jit/metainterp/optimizeopt/version.py @@ -28,9 +28,6 @@ else: self.descrs.append(descr) self.leads_to[descr] = version - # note: stitching a guard must resemble the order of the label - # otherwise a wrong mapping is handed to the register allocator - op.setfailargs(version.renamed_inputargs) assert version.renamed_inputargs is not None def remove(self, descr): diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -37,7 +37,7 @@ return self is other def same_shape(self, other): - return self is other + return True def repr_short(self, memo): return self.repr(memo) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit