Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79637:86dbbde6b191
Date: 2015-09-14 19:21 +0200
http://bitbucket.org/pypy/pypy/changeset/86dbbde6b191/
Log: further poking the scheduler. resoperations are now fully typed.
this makes all the transformation logic much easier and less code,
first simple tests pass already
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -97,7 +97,6 @@
""" Emit all the operations into the oplist parameter.
Initiates the scheduling. """
assert isinstance(state, SchedulerState)
- import pdb; pdb.set_trace()
while state.has_more():
node = self.next(state)
if node:
@@ -273,6 +272,37 @@
# return self.count
+
+class TypeOutput(object):
+ def __init__(self, type, count):
+ self.type = type
+ self.count = count
+
+
+ def bytecount(self):
+ return self.count * self.type.bytecount()
+
+class DataTyper(object):
+
+ def infer_type(self, op):
+ # default action, pass through: find the first arg
+ # the output is the same as the first argument!
+ if op.returns_void() or op.argcount() == 0:
+ return
+ arg0 = op.getarg(0)
+ op.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed)
+
+class PassFirstArg(TypeOutput):
+ def __init__(self):
+ pass
+
+def update_arg_in_vector_pos(state, argidx, box):
+ arguments = [op.getoperation().getarg(argidx) for op in
self.getoperations()]
+ for i,arg in enumerate(arguments):
+ #if i >= box.count:
+ # break
+ state.setvector_of_box(arg, i, box)
+
class TypeRestrict(object):
ANY_TYPE = -1
ANY_SIZE = -1
@@ -296,389 +326,433 @@
return True
-class TypeOutput(object):
- def __init__(self, type, count):
- self.type = type
- self.count = count
+class trans(object):
+ #DT_PASS = DataTyper()
+ TR_ANY = TypeRestrict()
+ TR_ANY_FLOAT = TypeRestrict(FLOAT)
+ TR_ANY_INTEGER = TypeRestrict(INT)
+ TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
+ TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
+ TR_LONG = TypeRestrict(INT, 8, 2)
+ TR_INT_2 = TypeRestrict(INT, 4, 2)
- def bytecount(self):
- return self.count * self.type.bytecount()
+ #INT = OpToVectorOp((TR_ANY_INTEGER, TR_ANY_INTEGER), DT_PASS)
+ #FLOAT = OpToVectorOp((TR_ANY_FLOAT, TR_ANY_FLOAT), DT_PASS)
+ #FLOAT_UNARY = OpToVectorOp((TR_ANY_FLOAT,), DT_PASS)
+ #LOAD = LoadToVectorLoad()
+ #STORE = StoreToVectorStore()
+ #GUARD = PassThroughOp((TR_ANY_INTEGER,))
-class DataTyper(object):
+ # note that the following definition is x86 arch specific
+ MAPPING = {
+ rop.VEC_INT_ADD: [TR_ANY_INTEGER, TR_ANY_INTEGER],
+ rop.VEC_INT_SUB: [TR_ANY_INTEGER, TR_ANY_INTEGER],
+ rop.VEC_INT_MUL: [TR_ANY_INTEGER, TR_ANY_INTEGER],
+ rop.VEC_INT_AND: [TR_ANY_INTEGER, TR_ANY_INTEGER],
+ rop.VEC_INT_OR: [TR_ANY_INTEGER, TR_ANY_INTEGER],
+ rop.VEC_INT_XOR: [TR_ANY_INTEGER, TR_ANY_INTEGER],
+ rop.VEC_INT_EQ: [TR_ANY_INTEGER, TR_ANY_INTEGER],
+ rop.VEC_INT_NE: [TR_ANY_INTEGER, TR_ANY_INTEGER],
- def infer_type(self, op):
- # default action, pass through: find the first arg
- # the output is the same as the first argument!
- if op.returns_void() or op.argcount() == 0:
- return
- arg0 = op.getarg(0)
- op.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed)
+ rop.VEC_FLOAT_ADD: [TR_ANY_FLOAT, TR_ANY_FLOAT],
+ rop.VEC_FLOAT_SUB: [TR_ANY_FLOAT, TR_ANY_FLOAT],
+ rop.VEC_FLOAT_MUL: [TR_ANY_FLOAT, TR_ANY_FLOAT],
+ rop.VEC_FLOAT_TRUEDIV: [TR_ANY_FLOAT, TR_ANY_FLOAT],
+ rop.VEC_FLOAT_ABS: [TR_ANY_FLOAT],
+ rop.VEC_FLOAT_NEG: [TR_ANY_FLOAT],
-class PassFirstArg(TypeOutput):
- def __init__(self):
- pass
+ rop.VEC_RAW_LOAD_I: [None, None, TR_ANY],
+ rop.VEC_RAW_LOAD_F: [None, None, TR_ANY],
+ rop.VEC_GETARRAYITEM_RAW_I: [None, None, TR_ANY],
+ rop.VEC_GETARRAYITEM_RAW_F: [None, None, TR_ANY],
+ rop.VEC_GETARRAYITEM_GC_I: [None, None, TR_ANY],
+ rop.VEC_GETARRAYITEM_GC_F: [None, None, TR_ANY],
+
+ rop.VEC_RAW_STORE: [None, None, None, TR_ANY],
+ rop.VEC_SETARRAYITEM_RAW: [None, None, None, TR_ANY],
+ rop.VEC_SETARRAYITEM_GC: [None, None, None, TR_ANY],
+
+ rop.GUARD_TRUE: [TR_ANY_INTEGER],
+ rop.GUARD_FALSE: [TR_ANY_INTEGER],
+
+ ## irregular
+ rop.VEC_INT_SIGNEXT: [TR_ANY_INTEGER],
+
+ rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: [TR_DOUBLE_2],
+ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: [TR_FLOAT_2],
+ rop.VEC_CAST_FLOAT_TO_INT: [TR_DOUBLE_2],
+ rop.VEC_CAST_INT_TO_FLOAT: [TR_INT_2],
+
+ rop.VEC_FLOAT_EQ: [TR_ANY_FLOAT,TR_ANY_FLOAT],
+ rop.VEC_FLOAT_NE: [TR_ANY_FLOAT,TR_ANY_FLOAT],
+ rop.VEC_INT_IS_TRUE: [TR_ANY_INTEGER,TR_ANY_INTEGER],
+ }
+
+ # TODO?
+ UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT,
+ rop.UINT_LT, rop.UINT_LE,
+ rop.UINT_GT, rop.UINT_GE)
+
+def turn_to_vector(state, pack):
+ """ Turn a pack into a vector instruction """
+ #
+ # TODO self.check_if_pack_supported(pack)
+ op = pack.leftmost()
+ args = op.getarglist()
+ prepare_arguments(state, pack, args)
+ vop = VecOperation(op.vector, args, op, pack.numops(), op.getdescr())
+ for i,node in enumerate(pack.operations):
+ op = node.getoperation()
+ state.setvector_of_box(op,i,vop)
+ #
+ if op.is_guard():
+ assert isinstance(op, GuardResOp)
+ assert isinstance(vop, GuardResOp)
+ vop.setfailargs(op.getfailargs())
+ vop.rd_snapshot = op.rd_snapshot
+ state.costmodel.record_pack_savings(pack, pack.numops())
+ #
+ if pack.is_accumulating():
+ box = oplist[position].result
+ assert box is not None
+ for node in pack.operations:
+ op = node.getoperation()
+ assert not op.returns_void()
+ state.renamer.start_renaming(op, box)
+ #
+ state.oplist.append(vop)
+
+
+def prepare_arguments(state, pack, args):
+ # Transforming one argument to a vector box argument
+ # The following cases can occur:
+ # 1) argument is present in the box_to_vbox map.
+ # a) vector can be reused immediatly (simple case)
+ # b) an operation forces the unpacking of a vector
+ # 2) argument is not known to reside in a vector
+ # a) expand vars/consts before the label and add as argument
+ # b) expand vars created in the loop body
+ #
+ restrictions = trans.MAPPING[pack.leftmost().vector]
+ for i,arg in enumerate(args):
+ if i >= len(restrictions) or restrictions[i] is None:
+ # ignore this argument
+ continue
+ print "trans", i, "arg", arg
+ if arg.returns_vector():
+ continue
+ pos, vecop = state.getvector_of_box(arg)
+ if not vecop:
+ # 2) constant/variable expand this box
+ # TODO just as one function call
+ vecop = self.expand(arg, i)
+ state.setvector_of_box(arg, 0, vecop)
+ pos = 0
+ continue
+ args[i] = vecop
+ assemble_scattered_values(state, pack, args, i)
+ position_values(state, pack, args, i, arg, pos)
+
+def assemble_scattered_values(state, pack, args, index):
+ vectors = pack.argument_vectors(state, pack, index)
+ if len(vectors) > 1:
+ # the argument is scattered along different vector boxes
+ value = gather(vectors, packable)
+ update_arg_in_vector_pos(state, i, value)
+ args[i] = value
+ #if packed < packable and len(vboxes) > 1:
+ # # the argument is scattered along different vector boxes
+ # args[i] = self.gather(vboxes, packable)
+ # self.update_arg_in_vector_pos(i, args[i])
+ # continue
+
+def gather(self, vboxes, target_count): # packed < packable and packed <
stride:
+ (_, box) = vboxes[0]
+ i = 1
+ while i < len(vboxes):
+ (box2_pos, box2) = vboxes[i]
+ if box.getcount() + box2.getcount() <= target_count:
+ box = self.package(box, box.getcount(),
+ box2, box2_pos, box2.getcount())
+ i += 1
+ return box
+
+def position_values(state, pack, args, index, arg, pos):
+ pass
+ #if pos != 0:
+ # # The vector box is at a position != 0 but it
+ # # is required to be at position 0. Unpack it!
+ # args[i] = self.unpack(vecop, pos, packed - pos, self.input_type)
+ # self.update_arg_in_vector_pos(i, args[i])
+ # continue
+
+ # convert size i64 -> i32, i32 -> i64, ...
+ # TODO if self.bytesize > 0:
+ # determine_trans(
+ # self.input_type.getsize() != vecop.getsize():
+ # vecop = self.extend(vecop, self.input_type)
+
+ # use the input as an indicator for the pack type
+ #packable = vecop.maximum_numops()
+ #packed = vecop.count
+ #assert packed >= 0
+ #assert packable >= 0
+ #if packed > packable:
+ # # the argument has more items than the operation is able to
process!
+ # # pos == 0 then it is already at the right place
+ # if pos != 0:
+ # args[i] = self.unpack(vecop, pos, packed - pos,
self.input_type)
+ # self.update_arg_in_vector_pos(i, args[i])
+ # #self.update_input_output(self.pack)
+ # continue
+ # else:
+ # assert vecop is not None
+ # args[i] = vecop
+ # continue
+ #vboxes = self.vector_boxes_for_args(i)
+ #if packed < packable and len(vboxes) > 1:
+ # # the argument is scattered along different vector boxes
+ # args[i] = self.gather(vboxes, packable)
+ # self.update_arg_in_vector_pos(i, args[i])
+ # continue
+ #if pos != 0:
+ # # The vector box is at a position != 0 but it
+ # # is required to be at position 0. Unpack it!
+ # args[i] = self.unpack(vecop, pos, packed - pos, self.input_type)
+ # self.update_arg_in_vector_pos(i, args[i])
+ # continue
+ ##
+ #assert vecop is not None
+ #args[i] = vecop
+
+def before_argument_transform(self, args):
+ pass
+
+def check_if_pack_supported(self, pack):
+ op0 = pack.operations[0].getoperation()
+ if self.input_type is None:
+ # must be a load/guard op
+ return
+ insize = self.input_type.getsize()
+ if op0.is_typecast():
+ # prohibit the packing of signext calls that
+ # cast to int16/int8.
+ _, outsize = op0.cast_to()
+ self.sched_data._prevent_signext(outsize, insize)
+ if op0.getopnum() == rop.INT_MUL:
+ if insize == 8 or insize == 1:
+ # see assembler for comment why
+ raise NotAProfitableLoop
+
+#def transform_result(self, result):
+# if result is None:
+# return None
+# vbox = self.new_result_vector_box()
+# #
+# # mark the position and the vbox in the hash
+# for i, node in enumerate(self.getoperations()):
+# if i >= vbox.getcount():
+# break
+# op = node.getoperation()
+# self.sched_data.setvector_of_box(op, i, vbox)
+# return vbox
+
+#def new_result_vector_box(self):
+# type = self.output_type.gettype()
+# size = self.output_type.getsize()
+# count = min(self.output_type.getcount(), len(self.pack.operations))
+# signed = self.output_type.signed
+# return BoxVector(type, count, size, signed)
+
+#def getoperations(self):
+# return self.pack.operations
+
+#def transform_arguments(self, args):
+# """ Transforming one argument to a vector box argument
+# The following cases can occur:
+# 1) argument is present in the box_to_vbox map.
+# a) vector can be reused immediatly (simple case)
+# b) vector is to big
+# c) vector is to small
+# 2) argument is not known to reside in a vector
+# a) expand vars/consts before the label and add as argument
+# b) expand vars created in the loop body
+# """
+# for i,arg in enumerate(args):
+# if arg.returns_vector():
+# continue
+# if not self.is_vector_arg(i):
+# continue
+# box_pos, vbox = self.sched_data.getvector_of_box(arg)
+# if not vbox:
+# # constant/variable expand this box
+# vbox = self.expand(arg, i)
+# self.sched_data.setvector_of_box(arg, 0, vbox)
+# box_pos = 0
+# # convert size i64 -> i32, i32 -> i64, ...
+# if self.input_type.getsize() > 0 and \
+# self.input_type.getsize() != vbox.getsize():
+# vbox = self.extend(vbox, self.input_type)
+
+# # use the input as an indicator for the pack type
+# packable = self.input_type.getcount()
+# packed = vbox.getcount()
+# assert packed >= 0
+# assert packable >= 0
+# if packed > packable:
+# # the argument has more items than the operation is able to
process!
+# # box_pos == 0 then it is already at the right place
+# if box_pos != 0:
+# args[i] = self.unpack(vbox, box_pos, packed - box_pos,
self.input_type)
+# self.update_arg_in_vector_pos(i, args[i])
+# #self.update_input_output(self.pack)
+# continue
+# else:
+# assert vbox is not None
+# args[i] = vbox
+# continue
+# vboxes = self.vector_boxes_for_args(i)
+# if packed < packable and len(vboxes) > 1:
+# # the argument is scattered along different vector boxes
+# args[i] = self.gather(vboxes, packable)
+# self.update_arg_in_vector_pos(i, args[i])
+# continue
+# if box_pos != 0:
+# # The vector box is at a position != 0 but it
+# # is required to be at position 0. Unpack it!
+# args[i] = self.unpack(vbox, box_pos, packed - box_pos,
self.input_type)
+# self.update_arg_in_vector_pos(i, args[i])
+# continue
+# #self.update_input_output(self.pack)
+# #
+# assert vbox is not None
+# args[i] = vbox
+
+def extend(self, vbox, newtype):
+ assert vbox.gettype() == newtype.gettype()
+ if vbox.gettype() == INT:
+ return self.extend_int(vbox, newtype)
+ else:
+ raise NotImplementedError("cannot yet extend float")
+
+def extend_int(self, vbox, newtype):
+ vbox_cloned = newtype.new_vector_box(vbox.getcount())
+ self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
+ newsize = newtype.getsize()
+ assert newsize > 0
+ op = ResOperation(rop.VEC_INT_SIGNEXT,
+ [vbox, ConstInt(newsize)],
+ vbox_cloned)
+ self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(),
vbox.getcount())
+ self.vecops.append(op)
+ return vbox_cloned
+
+def unpack(self, vbox, index, count, arg_ptype):
+ """ Extract parts of the vector box into another vector box """
+ assert index < vbox.getcount()
+ assert index + count <= vbox.getcount()
+ assert count > 0
+ vbox_cloned = vectorbox_clone_set(vbox, count=count)
+ opnum = getunpackopnum(vbox.gettype())
+ op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)],
vbox_cloned)
+ self.costmodel.record_vector_unpack(vbox, index, count)
+ self.vecops.append(op)
+ #
+ return vbox_cloned
+
+def package(self, tgt, tidx, src, sidx, scount):
+ """ tgt = [1,2,3,4,_,_,_,_]
+ src = [5,6,_,_]
+ new_box = [1,2,3,4,5,6,_,_] after the operation, tidx=4, scount=2
+ """
+ assert sidx == 0 # restriction
+ count = tgt.getcount() + src.getcount()
+ new_box = vectorbox_clone_set(tgt, count=count)
+ opnum = getpackopnum(tgt.gettype())
+ op = ResOperation(opnum, [tgt, src, ConstInt(tidx), ConstInt(scount)],
new_box)
+ self.vecops.append(op)
+ self.costmodel.record_vector_pack(src, sidx, scount)
+ if not we_are_translated():
+ self._check_vec_pack(op)
+ return new_box
+
+def _check_vec_pack(self, op):
+ result = op
+ arg0 = op.getarg(0)
+ arg1 = op.getarg(1)
+ index = op.getarg(2)
+ count = op.getarg(3)
+ assert isinstance(result, BoxVector)
+ assert isinstance(arg0, BoxVector)
+ assert isinstance(index, ConstInt)
+ assert isinstance(count, ConstInt)
+ assert arg0.getsize() == result.getsize()
+ if isinstance(arg1, BoxVector):
+ assert arg1.getsize() == result.getsize()
+ else:
+ assert count.value == 1
+ assert index.value < result.getcount()
+ assert index.value + count.value <= result.getcount()
+ assert result.getcount() > arg0.getcount()
+
+def expand(self, arg, argidx):
+ """ Expand a value into a vector box. useful for arith metic
+ of one vector with a scalar (either constant/varialbe)
+ """
+ elem_count = self.input_type.getcount()
+ vbox = self.input_type.new_vector_box(elem_count)
+ box_type = arg.type
+ expanded_map = self.sched_data.expanded_map
+ # note that heterogenous nodes are not yet tracked
+ already_expanded = expanded_map.get(arg, None)
+ if already_expanded:
+ return already_expanded
+
+ ops = self.sched_data.invariant_oplist
+ variables = self.sched_data.invariant_vector_vars
+ if isinstance(arg,Box) and arg not in self.sched_data.inputargs:
+ ops = self.vecops
+ variables = None
+ if isinstance(arg, BoxVector):
+ box_type = arg.gettype()
+
+ for i, node in enumerate(self.getoperations()):
+ op = node.getoperation()
+ if not arg.same_box(op.getarg(argidx)):
+ break
+ i += 1
+ else:
+ expand_opnum = getexpandopnum(box_type)
+ op = ResOperation(expand_opnum, [arg, ConstInt(vbox.item_count)], vbox)
+ ops.append(op)
+ if variables is not None:
+ variables.append(vbox)
+ expanded_map[arg] = vbox
+ return vbox
+
+ op = ResOperation(rop.VEC_BOX, [ConstInt(elem_count)], vbox)
+ ops.append(op)
+ opnum = getpackopnum(arg.type)
+ for i,node in enumerate(self.getoperations()):
+ op = node.getoperation()
+ arg = op.getarg(argidx)
+ new_box = vbox.clonebox()
+ ci = ConstInt(i)
+ c1 = ConstInt(1)
+ op = ResOperation(opnum, [vbox,arg,ci,c1], new_box)
+ vbox = new_box
+ ops.append(op)
+
+ if variables is not None:
+ variables.append(vbox)
+ return vbox
class OpToVectorOp(object):
- def __init__(self, restrictargs, typeoutput):
- self.args = list(restrictargs) # do not use a tuple. rpython cannot
union
- self.out = typeoutput
-
- def as_vector_operation(self, state, pack):
- #
- # TODO self.check_if_pack_supported(pack)
- op = pack.leftmost()
- args = op.getarglist()
- self.prepare_arguments(state, op.getarglist())
- vop = VecOperation(op.vector, args, op, pack.numops(), op.getdescr())
- #
- if op.is_guard():
- assert isinstance(op, GuardResOp)
- assert isinstance(vop, GuardResOp)
- vop.setfailargs(op.getfailargs())
- vop.rd_snapshot = op.rd_snapshot
- state.costmodel.record_pack_savings(pack, pack.numops())
- #
- if pack.is_accumulating():
- box = oplist[position].result
- assert box is not None
- for node in pack.operations:
- op = node.getoperation()
- assert not op.returns_void()
- scheduler.renamer.start_renaming(op, box)
- #
- state.oplist.append(vop)
-
- def prepare_arguments(self, state, args):
- self.before_argument_transform(args)
- # Transforming one argument to a vector box argument
- # The following cases can occur:
- # 1) argument is present in the box_to_vbox map.
- # a) vector can be reused immediatly (simple case)
- # b) vector is to big
- # c) vector is to small
- # 2) argument is not known to reside in a vector
- # a) expand vars/consts before the label and add as argument
- # b) expand vars created in the loop body
- #
- for i,arg in enumerate(args):
- if arg.returns_vector():
- continue
- if not self.transform_arg_at(i):
- continue
- box_pos, vbox = state.getvector_of_box(arg)
- if not vbox:
- # 2) constant/variable expand this box
- vbox = self.expand(arg, i)
- self.sched_data.setvector_of_box(arg, 0, vbox)
- box_pos = 0
- # convert size i64 -> i32, i32 -> i64, ...
- if self.input_type.getsize() > 0 and \
- self.input_type.getsize() != vbox.getsize():
- vbox = self.extend(vbox, self.input_type)
-
- # use the input as an indicator for the pack type
- packable = self.input_type.getcount()
- packed = vbox.getcount()
- assert packed >= 0
- assert packable >= 0
- if packed > packable:
- # the argument has more items than the operation is able to
process!
- # box_pos == 0 then it is already at the right place
- if box_pos != 0:
- args[i] = self.unpack(vbox, box_pos, packed - box_pos,
self.input_type)
- self.update_arg_in_vector_pos(i, args[i])
- #self.update_input_output(self.pack)
- continue
- else:
- assert vbox is not None
- args[i] = vbox
- continue
- vboxes = self.vector_boxes_for_args(i)
- if packed < packable and len(vboxes) > 1:
- # the argument is scattered along different vector boxes
- args[i] = self.gather(vboxes, packable)
- self.update_arg_in_vector_pos(i, args[i])
- continue
- if box_pos != 0:
- # The vector box is at a position != 0 but it
- # is required to be at position 0. Unpack it!
- args[i] = self.unpack(vbox, box_pos, packed - box_pos,
self.input_type)
- self.update_arg_in_vector_pos(i, args[i])
- continue
- #self.update_input_output(self.pack)
- #
- assert vbox is not None
- args[i] = vbox
-
- def before_argument_transform(self, args):
+ def __init__(self): #, restrictargs, typeoutput):
pass
-
- def check_if_pack_supported(self, pack):
- op0 = pack.operations[0].getoperation()
- if self.input_type is None:
- # must be a load/guard op
- return
- insize = self.input_type.getsize()
- if op0.casts_box():
- # prohibit the packing of signext calls that
- # cast to int16/int8.
- _, outsize = op0.cast_to()
- self.sched_data._prevent_signext(outsize, insize)
- if op0.getopnum() == rop.INT_MUL:
- if insize == 8 or insize == 1:
- # see assembler for comment why
- raise NotAProfitableLoop
-
- def transform_result(self, result):
- if result is None:
- return None
- vbox = self.new_result_vector_box()
- #
- # mark the position and the vbox in the hash
- for i, node in enumerate(self.getoperations()):
- if i >= vbox.getcount():
- break
- op = node.getoperation()
- self.sched_data.setvector_of_box(op, i, vbox)
- return vbox
-
- def new_result_vector_box(self):
- type = self.output_type.gettype()
- size = self.output_type.getsize()
- count = min(self.output_type.getcount(), len(self.pack.operations))
- signed = self.output_type.signed
- return BoxVector(type, count, size, signed)
-
- def getoperations(self):
- return self.pack.operations
-
- def transform_arguments(self, args):
- """ Transforming one argument to a vector box argument
- The following cases can occur:
- 1) argument is present in the box_to_vbox map.
- a) vector can be reused immediatly (simple case)
- b) vector is to big
- c) vector is to small
- 2) argument is not known to reside in a vector
- a) expand vars/consts before the label and add as argument
- b) expand vars created in the loop body
- """
- for i,arg in enumerate(args):
- if arg.returns_vector():
- continue
- if not self.is_vector_arg(i):
- continue
- box_pos, vbox = self.sched_data.getvector_of_box(arg)
- if not vbox:
- # constant/variable expand this box
- vbox = self.expand(arg, i)
- self.sched_data.setvector_of_box(arg, 0, vbox)
- box_pos = 0
- # convert size i64 -> i32, i32 -> i64, ...
- if self.input_type.getsize() > 0 and \
- self.input_type.getsize() != vbox.getsize():
- vbox = self.extend(vbox, self.input_type)
-
- # use the input as an indicator for the pack type
- packable = self.input_type.getcount()
- packed = vbox.getcount()
- assert packed >= 0
- assert packable >= 0
- if packed > packable:
- # the argument has more items than the operation is able to
process!
- # box_pos == 0 then it is already at the right place
- if box_pos != 0:
- args[i] = self.unpack(vbox, box_pos, packed - box_pos,
self.input_type)
- self.update_arg_in_vector_pos(i, args[i])
- #self.update_input_output(self.pack)
- continue
- else:
- assert vbox is not None
- args[i] = vbox
- continue
- vboxes = self.vector_boxes_for_args(i)
- if packed < packable and len(vboxes) > 1:
- # the argument is scattered along different vector boxes
- args[i] = self.gather(vboxes, packable)
- self.update_arg_in_vector_pos(i, args[i])
- continue
- if box_pos != 0:
- # The vector box is at a position != 0 but it
- # is required to be at position 0. Unpack it!
- args[i] = self.unpack(vbox, box_pos, packed - box_pos,
self.input_type)
- self.update_arg_in_vector_pos(i, args[i])
- continue
- #self.update_input_output(self.pack)
- #
- assert vbox is not None
- args[i] = vbox
-
- def gather(self, vboxes, target_count): # packed < packable and packed <
stride:
- (_, box) = vboxes[0]
- i = 1
- while i < len(vboxes):
- (box2_pos, box2) = vboxes[i]
- if box.getcount() + box2.getcount() <= target_count:
- box = self.package(box, box.getcount(),
- box2, box2_pos, box2.getcount())
- i += 1
- return box
-
- def update_arg_in_vector_pos(self, argidx, box):
- arguments = [op.getoperation().getarg(argidx) for op in
self.getoperations()]
- for i,arg in enumerate(arguments):
- if i >= box.getcount():
- break
- self.sched_data.setvector_of_box(arg, i, box)
-
- def vector_boxes_for_args(self, index):
- args = [op.getoperation().getarg(index) for op in self.getoperations()]
- vboxes = []
- last_vbox = None
- for arg in args:
- pos, vbox = self.sched_data.getvector_of_box(arg)
- if vbox is not last_vbox and vbox is not None:
- vboxes.append((pos, vbox))
- last_vbox = vbox
- return vboxes
-
-
- def extend(self, vbox, newtype):
- assert vbox.gettype() == newtype.gettype()
- if vbox.gettype() == INT:
- return self.extend_int(vbox, newtype)
- else:
- raise NotImplementedError("cannot yet extend float")
-
- def extend_int(self, vbox, newtype):
- vbox_cloned = newtype.new_vector_box(vbox.getcount())
- self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
- newsize = newtype.getsize()
- assert newsize > 0
- op = ResOperation(rop.VEC_INT_SIGNEXT,
- [vbox, ConstInt(newsize)],
- vbox_cloned)
- self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(),
vbox.getcount())
- self.vecops.append(op)
- return vbox_cloned
-
- def unpack(self, vbox, index, count, arg_ptype):
- """ Extract parts of the vector box into another vector box """
- assert index < vbox.getcount()
- assert index + count <= vbox.getcount()
- assert count > 0
- vbox_cloned = vectorbox_clone_set(vbox, count=count)
- opnum = getunpackopnum(vbox.gettype())
- op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)],
vbox_cloned)
- self.costmodel.record_vector_unpack(vbox, index, count)
- self.vecops.append(op)
- #
- return vbox_cloned
-
- def package(self, tgt, tidx, src, sidx, scount):
- """ tgt = [1,2,3,4,_,_,_,_]
- src = [5,6,_,_]
- new_box = [1,2,3,4,5,6,_,_] after the operation, tidx=4, scount=2
- """
- assert sidx == 0 # restriction
- count = tgt.getcount() + src.getcount()
- new_box = vectorbox_clone_set(tgt, count=count)
- opnum = getpackopnum(tgt.gettype())
- op = ResOperation(opnum, [tgt, src, ConstInt(tidx), ConstInt(scount)],
new_box)
- self.vecops.append(op)
- self.costmodel.record_vector_pack(src, sidx, scount)
- if not we_are_translated():
- self._check_vec_pack(op)
- return new_box
-
- def _check_vec_pack(self, op):
- result = op
- arg0 = op.getarg(0)
- arg1 = op.getarg(1)
- index = op.getarg(2)
- count = op.getarg(3)
- assert isinstance(result, BoxVector)
- assert isinstance(arg0, BoxVector)
- assert isinstance(index, ConstInt)
- assert isinstance(count, ConstInt)
- assert arg0.getsize() == result.getsize()
- if isinstance(arg1, BoxVector):
- assert arg1.getsize() == result.getsize()
- else:
- assert count.value == 1
- assert index.value < result.getcount()
- assert index.value + count.value <= result.getcount()
- assert result.getcount() > arg0.getcount()
-
- def expand(self, arg, argidx):
- """ Expand a value into a vector box. useful for arith metic
- of one vector with a scalar (either constant/varialbe)
- """
- elem_count = self.input_type.getcount()
- vbox = self.input_type.new_vector_box(elem_count)
- box_type = arg.type
- expanded_map = self.sched_data.expanded_map
- # note that heterogenous nodes are not yet tracked
- already_expanded = expanded_map.get(arg, None)
- if already_expanded:
- return already_expanded
-
- ops = self.sched_data.invariant_oplist
- variables = self.sched_data.invariant_vector_vars
- if isinstance(arg,Box) and arg not in self.sched_data.inputargs:
- ops = self.vecops
- variables = None
- if isinstance(arg, BoxVector):
- box_type = arg.gettype()
-
- for i, node in enumerate(self.getoperations()):
- op = node.getoperation()
- if not arg.same_box(op.getarg(argidx)):
- break
- i += 1
- else:
- expand_opnum = getexpandopnum(box_type)
- op = ResOperation(expand_opnum, [arg, ConstInt(vbox.item_count)],
vbox)
- ops.append(op)
- if variables is not None:
- variables.append(vbox)
- expanded_map[arg] = vbox
- return vbox
-
- op = ResOperation(rop.VEC_BOX, [ConstInt(elem_count)], vbox)
- ops.append(op)
- opnum = getpackopnum(arg.type)
- for i,node in enumerate(self.getoperations()):
- op = node.getoperation()
- arg = op.getarg(argidx)
- new_box = vbox.clonebox()
- ci = ConstInt(i)
- c1 = ConstInt(1)
- op = ResOperation(opnum, [vbox,arg,ci,c1], new_box)
- vbox = new_box
- ops.append(op)
-
- if variables is not None:
- variables.append(vbox)
- return vbox
-
- def transform_arg_at(self, i):
- if i < 0 or i >= len(self.args):
- return False
- return self.args[i] is not None
-
- def get_output_type_given(self, input_type, op):
- return input_type
-
- def get_input_type_given(self, output_type, op):
- return output_type
-
- def force_input(self, ptype):
- """ Some operations require a specific count/size,
- they can force the input type here!
- """
- return ptype
+ #self.args = list(restrictargs) # do not use a tuple. rpython cannot
union
+ #self.out = typeoutput
class OpToVectorOpConv(OpToVectorOp):
def __init__(self, intype, outtype):
@@ -790,97 +864,31 @@
raise AssertionError("cannot infer input type from output type")
-class trans(object):
- DT_PASS = DataTyper()
- TR_ANY_FLOAT = TypeRestrict(FLOAT)
- TR_ANY_INTEGER = TypeRestrict(INT)
- TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
- TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
- TR_LONG = TypeRestrict(INT, 8, 2)
- TR_INT_2 = TypeRestrict(INT, 4, 2)
-
- INT = OpToVectorOp((TR_ANY_INTEGER, TR_ANY_INTEGER), DT_PASS)
- FLOAT = OpToVectorOp((TR_ANY_FLOAT, TR_ANY_FLOAT), DT_PASS)
- FLOAT_UNARY = OpToVectorOp((TR_ANY_FLOAT,), DT_PASS)
- LOAD = LoadToVectorLoad()
- STORE = StoreToVectorStore()
- GUARD = PassThroughOp((TR_ANY_INTEGER,))
-
- # note that the following definition is x86 arch specific
- MAPPING = {
- rop.VEC_INT_ADD: INT,
- rop.VEC_INT_SUB: INT,
- rop.VEC_INT_MUL: INT,
- rop.VEC_INT_AND: INT,
- rop.VEC_INT_OR: INT,
- rop.VEC_INT_XOR: INT,
- rop.VEC_INT_EQ: INT,
- rop.VEC_INT_NE: INT,
-
- rop.VEC_FLOAT_ADD: FLOAT,
- rop.VEC_FLOAT_SUB: FLOAT,
- rop.VEC_FLOAT_MUL: FLOAT,
- rop.VEC_FLOAT_TRUEDIV: FLOAT,
- rop.VEC_FLOAT_ABS: FLOAT_UNARY,
- rop.VEC_FLOAT_NEG: FLOAT_UNARY,
-
- rop.VEC_RAW_LOAD_I: LOAD,
- rop.VEC_RAW_LOAD_F: LOAD,
- rop.VEC_GETARRAYITEM_RAW_I: LOAD,
- rop.VEC_GETARRAYITEM_RAW_F: LOAD,
- rop.VEC_GETARRAYITEM_GC_I: LOAD,
- rop.VEC_GETARRAYITEM_GC_F: LOAD,
-
- rop.VEC_RAW_STORE: STORE,
- rop.VEC_SETARRAYITEM_RAW: STORE,
- rop.VEC_SETARRAYITEM_GC: STORE,
-
- rop.GUARD_TRUE: GUARD,
- rop.GUARD_FALSE: GUARD,
-
- # irregular
- rop.VEC_INT_SIGNEXT: SignExtToVectorOp((TR_ANY_INTEGER,), None),
-
- rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(TR_DOUBLE_2,
None), #RESTRICT_2_FLOAT),
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(TR_FLOAT_2, None),
#RESTRICT_2_DOUBLE),
- rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(TR_DOUBLE_2, None),
#RESTRICT_2_INT),
- rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(TR_INT_2, None),
#RESTRICT_2_DOUBLE),
-
- rop.VEC_FLOAT_EQ: OpToVectorOp((TR_ANY_FLOAT,TR_ANY_FLOAT), None),
- rop.VEC_FLOAT_NE: OpToVectorOp((TR_ANY_FLOAT,TR_ANY_FLOAT), None),
- rop.VEC_INT_IS_TRUE: OpToVectorOp((TR_ANY_INTEGER,TR_ANY_INTEGER),
None), # TR_ANY_INTEGER),
- }
-
- # TODO?
- UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT,
- rop.UINT_LT, rop.UINT_LE,
- rop.UINT_GT, rop.UINT_GE)
-
-def determine_input_output_types(pack, node, forward):
- """ This function is two fold. If moving forward, it
- gets an input type from the packs output type and returns
- the transformed packtype.
-
- Moving backward, the origins pack input type is the output
- type and the transformation of the packtype (in reverse direction)
- is the input
- """
- op = node.getoperation()
- op2vecop = determine_trans(op)
- if forward:
- input_type = op2vecop.force_input(pack.output_type)
- output_type = op2vecop.get_output_type_given(input_type, op)
- if output_type:
- output_type = output_type.clone()
- else:
- # going backwards, things are not that easy anymore
- output_type = pack.input_type
- input_type = op2vecop.get_input_type_given(output_type, op)
- if input_type:
- input_type = input_type.clone()
-
- return input_type, output_type
+#def determine_input_output_types(pack, node, forward):
+# """ This function is two fold. If moving forward, it
+# gets an input type from the packs output type and returns
+# the transformed packtype.
+#
+# Moving backward, the origins pack input type is the output
+# type and the transformation of the packtype (in reverse direction)
+# is the input
+# """
+# op = node.getoperation()
+# op2vecop = determine_trans(op)
+# if forward:
+# input_type = op2vecop.force_input(pack.output_type)
+# output_type = op2vecop.get_output_type_given(input_type, op)
+# if output_type:
+# output_type = output_type.clone()
+# else:
+# # going backwards, things are not that easy anymore
+# output_type = pack.input_type
+# input_type = op2vecop.get_input_type_given(output_type, op)
+# if input_type:
+# input_type = input_type.clone()
+#
+# return input_type, output_type
def determine_trans(op):
op2vecop = trans.MAPPING.get(op.vector, None)
@@ -951,8 +959,8 @@
assert node.pack.numops() > 1
for node in node.pack.operations:
scheduler.mark_emitted(node, self)
- op2vecop = determine_trans(node.pack.leftmost())
- op2vecop.as_vector_operation(self, node.pack)
+ # TODO op2vecop = determine_trans(node.pack.leftmost())
+ turn_to_vector(self, node.pack)
return True
return False
@@ -1021,39 +1029,22 @@
def getvector_of_box(self, arg):
return self.box_to_vbox.get(arg, (-1, None))
- def setvector_of_box(self, box, off, vector):
- assert off < vector.getcount()
- assert box.type != 'V'
- self.box_to_vbox[box] = (off, vector)
+ def setvector_of_box(self, var, off, vector):
+ assert off < vector.count
+ assert not var.is_vector()
+ self.box_to_vbox[var] = (off, vector)
def opcount_filling_vector_register(pack, vec_reg_size):
""" how many operations of that kind can one execute
with a machine instruction of register size X?
"""
- pack_type = pack.input_type
- if pack_type is None:
- pack_type = pack.output_type # load operations
-
op = pack.leftmost()
- if op.casts_box():
- count = pack_type.getcount()
- return count
- count = vec_reg_size // pack_type.getsize()
- return count
-
-def maximum_byte_size(pack, vec_reg_size):
- """ The maxmum size in bytes the operation is able to
- process with the hardware register and the operation
- semantics.
- """
- op = pack.leftmost()
- if op.casts_box():
- # casting is special, often only takes a half full vector
- pack_type = pack.input_type
- if pack_type is None:
- pack_type = pack.output_type # load operations
- return pack_type.byte_size()
- return vec_reg_size
+ if op.is_typecast():
+ if op.casts_down():
+ return vec_reg_size // op.cast_from_bytesize()
+ else:
+ return vec_reg_size // op.cast_to_bytesize()
+ return vec_reg_size // op.bytesize
class Pack(object):
""" A pack is a set of n statements that are:
@@ -1080,6 +1071,9 @@
def leftmost(self):
return self.operations[0].getoperation()
+ def rightmost(self):
+ return self.operations[-1].getoperation()
+
def pack_type(self):
ptype = self.input_type
if self.input_type is None:
@@ -1113,14 +1107,15 @@
return 0
if self.numops() == 0:
return -1
- size = maximum_byte_size(self, vec_reg_size)
- return left.bytesize * self.numops() - size
- #if self.input_type is None:
- # e.g. load operations
- # return self.output_type.bytecount(self) - size
- # default only consider the input type
- # e.g. store operations, int_add, ...
- #return self.input_type.bytecount(self) - size
+ if left.is_typecast():
+ # casting is special, often only takes a half full vector
+ if left.casts_down():
+ # size is reduced
+ return left.cast_from_bytesize() * self.numops() - vec_reg_size
+ else:
+ # size is increased
+ return left.cast_to_bytesize() * self.numops() - vec_reg_size
+ return left.bytesize * self.numops() - vec_reg_size
def is_full(self, vec_reg_size):
""" If one input element times the opcount is equal
@@ -1190,6 +1185,17 @@
accum = False
return rightmost is leftmost and accum
+ def argument_vectors(self, state, pack, index):
+ args = [node.getoperation().getarg(index) for node in pack.operations]
+ vectors = []
+ last = None
+ for arg in args:
+ pos, vecop = state.getvector_of_box(arg)
+ if vecop is not last and vecop is not None:
+ vectors.append((pos, vecop))
+ last = vecop
+ return vectors
+
def __repr__(self):
if len(self.operations) == 0:
return "Pack(empty)"
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -50,7 +50,7 @@
else:
label = loop.operations[0]
label.setdescr(TargetToken(token))
- loop = VectorLoop(label, loop.operations[1:-1], loop.operations[-1])
+ loop = VectorLoop(label, loop.operations[0:-1], loop.operations[-1])
loop.jump.setdescr(token)
for op in loop.operations:
if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -67,8 +67,8 @@
loop.graph = FakeDependencyGraph(loop)
return loop
- def pack(self, loop, l, r, input_type, output_type):
- return Pack(loop.graph.nodes[1+l:1+r])
+ def pack(self, loop, l, r, input_type=None, output_type=None):
+ return Pack(loop.graph.nodes[l:r])
def schedule(self, loop, packs, vec_reg_size=16,
prepend_invariant=False, overwrite_funcs=None):
@@ -115,6 +115,21 @@
assert node.count == 1
# must return here, then the test passed
+ def test_split_pack(self):
+ loop1 = self.parse_trace("""
+ f10 = raw_load_f(p0, i0, descr=double)
+ f11 = raw_load_f(p0, i1, descr=double)
+ f12 = raw_load_f(p0, i2, descr=double)
+ """)
+ ps = PackSet(16)
+ ps.packs = [self.pack(loop1, 0, 3)]
+ op1 = ps.packs[0].operations[0]
+ op2 = ps.packs[0].operations[1]
+ ps.split_overloaded_packs()
+ assert len(ps.packs) == 1
+ assert ps.packs[0].leftmost() is op1.getoperation()
+ assert ps.packs[0].rightmost() is op2.getoperation()
+
def test_schedule_split_load(self):
loop1 = self.parse_trace("""
f10 = raw_load_f(p0, i0, descr=float)
@@ -124,10 +139,10 @@
f14 = raw_load_f(p0, i4, descr=float)
f15 = raw_load_f(p0, i5, descr=float)
""")
- pack1 = self.pack(loop1, 0, 6, None, F32)
+ pack1 = self.pack(loop1, 0, 6)
loop2 = self.schedule(loop1, [pack1])
loop3 = self.parse_trace("""
- v10[4xi32] = vec_raw_load_i(p0, i0, descr=float)
+ v10[4xi32] = vec_raw_load_f(p0, i0, descr=float)
f10 = raw_load_f(p0, i4, descr=float)
f11 = raw_load_f(p0, i5, descr=float)
""", False)
@@ -135,21 +150,21 @@
def test_int_to_float(self):
loop1 = self.parse_trace("""
- i10 = raw_load(p0, i0, descr=long)
- i11 = raw_load(p0, i1, descr=long)
+ i10 = raw_load_i(p0, i0, descr=long)
+ i11 = raw_load_i(p0, i1, descr=long)
i12 = int_signext(i10, 4)
i13 = int_signext(i11, 4)
f10 = cast_int_to_float(i12)
f11 = cast_int_to_float(i13)
""")
- pack1 = self.pack(loop1, 0, 2, None, I64)
- pack2 = self.pack(loop1, 2, 4, I64, I32_2)
- pack3 = self.pack(loop1, 4, 6, I32_2, F32_2)
+ pack1 = self.pack(loop1, 0, 2)
+ pack2 = self.pack(loop1, 2, 4)
+ pack3 = self.pack(loop1, 4, 6)
loop2 = self.schedule(loop1, [pack1, pack2, pack3])
loop3 = self.parse_trace("""
- v10[i64|2] = vec_raw_load_i(p0, i0, descr=long)
- v20[i32|2] = vec_int_signext(v10[i64|2], 4)
- v30[f64|2] = vec_cast_int_to_float(v20[i32|2])
+ v10[2xi64] = vec_raw_load_i(p0, i0, descr=long)
+ v20[2xi32] = vec_int_signext(v10[2xi64], 4)
+ v30[2xf64] = vec_cast_int_to_float(v20[2xi32])
""", False)
self.assert_equal(loop2, loop3)
@@ -161,12 +176,12 @@
pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
loop3 = self.parse_trace("""
- v10[i64|2] = vec_box(2)
- v20[i64|2] = vec_int_pack(v10[i64|2], i0, 0, 1)
- v30[i64|2] = vec_int_pack(v20[i64|2], i1, 1, 1)
- v40[i64|2] = vec_int_expand(73,2)
+ v10[2xi64] = vec_box_i()
+ v20[2xi64] = vec_int_pack(v10[2xi64], i0, 0, 1)
+ v30[2xi64] = vec_int_pack(v20[2xi64], i1, 1, 1)
+ v40[2xi64] = vec_int_expand(73,2)
#
- v50[i64|2] = vec_int_add(v30[i64|2], v40[i64|2])
+ v50[2xi64] = vec_int_add(v30[2xi64], v40[2xi64])
""", False)
self.assert_equal(loop2, loop3)
@@ -177,12 +192,12 @@
pack1 = self.pack(loop1, 0, 2, F64, F64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
loop3 = self.parse_trace("""
- v10[f64|2] = vec_box(2)
- v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
- v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
- v40[f64|2] = vec_float_expand(73.0,2)
+ v10[2xf64] = vec_box_f()
+ v20[2xf64] = vec_float_pack(v10[2xf64], f0, 0, 1)
+ v30[2xf64] = vec_float_pack(v20[2xf64], f1, 1, 1)
+ v40[2xf64] = vec_float_expand(73.0,2)
#
- v50[f64|2] = vec_float_add(v30[f64|2], v40[f64|2])
+ v50[2xf64] = vec_float_add(v30[2xf64], v40[2xf64])
""", False)
self.assert_equal(loop2, loop3)
@@ -197,13 +212,13 @@
pack2 = self.pack(loop1, 2, 4, F64, F64)
loop2 = self.schedule(loop1, [pack1, pack2], prepend_invariant=True)
loop3 = self.parse_trace("""
- v10[f64|2] = vec_box(2)
- v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
- v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
- v40[f64|2] = vec_float_expand(f5,2) # only expaned once
+ v10[2xf64] = vec_box_f()
+ v20[2xf64] = vec_float_pack(v10[2xf64], f0, 0, 1)
+ v30[2xf64] = vec_float_pack(v20[2xf64], f1, 1, 1)
+ v40[2xf64] = vec_float_expand(f5,2) # only expaned once
#
- v50[f64|2] = vec_float_add(v30[f64|2], v40[f64|2])
- v60[f64|2] = vec_float_add(v50[f64|2], v40[f64|2])
+ v50[2xf64] = vec_float_add(v30[2xf64], v40[2xf64])
+ v60[2xf64] = vec_float_add(v50[2xf64], v40[2xf64])
""", False)
self.assert_equal(loop2, loop3)
@@ -217,7 +232,7 @@
loop1 = self.parse_trace("""
i10 = int_signext(i1, 4)
i11 = int_signext(i1, 4)
- """, additional_args=['v10[i64|2]'])
+ """, additional_args=['v10[2xi64]'])
pack1 = self.pack(loop1, 0, 2, I64, I32_2)
var = self.find_input_arg('v10', loop1)
def i1inv103204(v):
@@ -227,20 +242,20 @@
'getvector_of_box': i1inv103204,
})
loop3 = self.parse_trace("""
- v11[i32|2] = vec_int_signext(v10[i64|2], 4)
- """, False, additional_args=['v10[i64|2]'])
+ v11[2xi32] = vec_int_signext(v10[2xi64], 4)
+ """, False, additional_args=['v10[2xi64]'])
self.assert_equal(loop2, loop3)
def test_cast_float_to_int(self):
loop1 = self.parse_trace("""
- f10 = raw_load(p0, i1, descr=double)
- f11 = raw_load(p0, i2, descr=double)
- f12 = raw_load(p0, i3, descr=double)
- f13 = raw_load(p0, i4, descr=double)
- f14 = raw_load(p0, i5, descr=double)
- f15 = raw_load(p0, i6, descr=double)
- f16 = raw_load(p0, i7, descr=double)
- f17 = raw_load(p0, i8, descr=double)
+ f10 = raw_load_f(p0, i1, descr=double)
+ f11 = raw_load_f(p0, i2, descr=double)
+ f12 = raw_load_f(p0, i3, descr=double)
+ f13 = raw_load_f(p0, i4, descr=double)
+ f14 = raw_load_f(p0, i5, descr=double)
+ f15 = raw_load_f(p0, i6, descr=double)
+ f16 = raw_load_f(p0, i7, descr=double)
+ f17 = raw_load_f(p0, i8, descr=double)
#
i10 = cast_float_to_int(f10)
i11 = cast_float_to_int(f11)
@@ -281,31 +296,31 @@
'_prevent_signext': void
})
loop3 = self.parse_trace("""
- v10[f64|2] = vec_raw_load_f(p0, i1, descr=double)
- v11[f64|2] = vec_raw_load_f(p0, i3, descr=double)
- v12[f64|2] = vec_raw_load_f(p0, i5, descr=double)
- v13[f64|2] = vec_raw_load_f(p0, i7, descr=double)
- v14[i32|2] = vec_cast_float_to_int(v10[f64|2])
- v15[i32|2] = vec_cast_float_to_int(v11[f64|2])
- v16[i32|2] = vec_cast_float_to_int(v12[f64|2])
- v17[i32|2] = vec_cast_float_to_int(v13[f64|2])
- v18[i16|2] = vec_int_signext(v14[i32|2],2)
- v19[i16|2] = vec_int_signext(v15[i32|2],2)
- v20[i16|2] = vec_int_signext(v16[i32|2],2)
- v21[i16|2] = vec_int_signext(v17[i32|2],2)
- v22[i16|4] = vec_int_pack(v18[i16|2], v19[i16|2], 2, 2)
- v23[i16|6] = vec_int_pack(v22[i16|4], v20[i16|2], 4, 2)
- v24[i16|8] = vec_int_pack(v23[i16|6], v21[i16|2], 6, 2)
- vec_raw_store(p1, i1, v24[i16|8], descr=short)
+ v10[2xf64] = vec_raw_load_f(p0, i1, descr=double)
+ v11[2xf64] = vec_raw_load_f(p0, i3, descr=double)
+ v12[2xf64] = vec_raw_load_f(p0, i5, descr=double)
+ v13[2xf64] = vec_raw_load_f(p0, i7, descr=double)
+ v14[2xi32] = vec_cast_float_to_int(v10[2xf64])
+ v15[2xi32] = vec_cast_float_to_int(v11[2xf64])
+ v16[2xi32] = vec_cast_float_to_int(v12[2xf64])
+ v17[2xi32] = vec_cast_float_to_int(v13[2xf64])
+ v18[2xi16] = vec_int_signext(v14[2xi32],2)
+ v19[2xi16] = vec_int_signext(v15[2xi32],2)
+ v20[2xi16] = vec_int_signext(v16[2xi32],2)
+ v21[2xi16] = vec_int_signext(v17[2xi32],2)
+ v22[4xi16] = vec_int_pack(v18[2xi16], v19[2xi16], 2, 2)
+ v23[6xi16] = vec_int_pack(v22[4xi16], v20[2xi16], 4, 2)
+ v24[8xi16] = vec_int_pack(v23[6xi16], v21[2xi16], 6, 2)
+ vec_raw_store(p1, i1, v24[8xi16], descr=short)
""", False)
self.assert_equal(loop2, loop3)
def test_cast_float_to_single_float(self):
loop1 = self.parse_trace("""
- f10 = raw_load(p0, i1, descr=double)
- f11 = raw_load(p0, i2, descr=double)
- f12 = raw_load(p0, i3, descr=double)
- f13 = raw_load(p0, i4, descr=double)
+ f10 = raw_load_f(p0, i1, descr=double)
+ f11 = raw_load_f(p0, i2, descr=double)
+ f12 = raw_load_f(p0, i3, descr=double)
+ f13 = raw_load_f(p0, i4, descr=double)
#
i10 = cast_float_to_singlefloat(f10)
i11 = cast_float_to_singlefloat(f11)
@@ -322,19 +337,19 @@
pack3 = self.pack(loop1, 8, 12, I32, None)
loop2 = self.schedule(loop1, [pack1,pack2,pack3])
loop3 = self.parse_trace("""
- v44[f64|2] = vec_raw_load_f(p0, i1, descr=double)
- v45[f64|2] = vec_raw_load_f(p0, i3, descr=double)
- v46[i32|2] = vec_cast_float_to_singlefloat(v44[f64|2])
- v47[i32|2] = vec_cast_float_to_singlefloat(v45[f64|2])
- v41[i32|4] = vec_int_pack(v46[i32|2], v47[i32|2], 2, 2)
- vec_raw_store(p1, i1, v41[i32|4], descr=float)
+ v44[2xf64] = vec_raw_load_f(p0, i1, descr=double)
+ v45[2xf64] = vec_raw_load_f(p0, i3, descr=double)
+ v46[2xi32] = vec_cast_float_to_singlefloat(v44[2xf64])
+ v47[2xi32] = vec_cast_float_to_singlefloat(v45[2xf64])
+ v41[4xi32] = vec_int_pack(v46[2xi32], v47[2xi32], 2, 2)
+ vec_raw_store(p1, i1, v41[4xi32], descr=float)
""", False)
self.assert_equal(loop2, loop3)
def test_all(self):
loop1 = self.parse_trace("""
- i10 = raw_load(p0, i1, descr=long)
- i11 = raw_load(p0, i2, descr=long)
+ i10 = raw_load_i(p0, i1, descr=long)
+ i11 = raw_load_i(p0, i2, descr=long)
#
i12 = int_and(i10, 255)
i13 = int_and(i11, 255)
@@ -347,20 +362,20 @@
pack3 = self.pack(loop1, 4, 6, I64, None)
loop2 = self.schedule(loop1, [pack1,pack2,pack3],
prepend_invariant=True)
loop3 = self.parse_trace("""
- v9[i64|2] = vec_int_expand(255,2)
- v10[i64|2] = vec_raw_load_i(p0, i1, descr=long)
- v11[i64|2] = vec_int_and(v10[i64|2], v9[i64|2])
- guard_true(v11[i64|2]) []
+ v9[2xi64] = vec_int_expand(255,2)
+ v10[2xi64] = vec_raw_load_i(p0, i1, descr=long)
+ v11[2xi64] = vec_int_and(v10[2xi64], v9[2xi64])
+ guard_true(v11[2xi64]) []
""", False)
self.assert_equal(loop2, loop3)
def test_split_load_store(self):
loop1 = self.parse_trace("""
- i10 = raw_load(p0, i1, descr=float)
- i11 = raw_load(p0, i2, descr=float)
- i12 = raw_load(p0, i3, descr=float)
- i13 = raw_load(p0, i4, descr=float)
+ i10 = raw_load_f(p0, i1, descr=float)
+ i11 = raw_load_f(p0, i2, descr=float)
+ i12 = raw_load_f(p0, i3, descr=float)
+ i13 = raw_load_f(p0, i4, descr=float)
raw_store(p0, i3, i10, descr=float)
raw_store(p0, i4, i11, descr=float)
""")
@@ -368,10 +383,10 @@
pack2 = self.pack(loop1, 4, 6, I32_2, None)
loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True)
loop3 = self.parse_trace("""
- v1[i32|4] = vec_raw_load_i(p0, i1, descr=float)
- i10 = vec_int_unpack(v1[i32|4], 0, 1)
+ v1[4xi32] = vec_raw_load_i(p0, i1, descr=float)
+ i10 = vec_int_unpack(v1[4xi32], 0, 1)
raw_store(p0, i3, i10, descr=float)
- i11 = vec_int_unpack(v1[i32|4], 1, 1)
+ i11 = vec_int_unpack(v1[4xi32], 1, 1)
raw_store(p0, i4, i11, descr=float)
""", False)
# unfortunate ui32 is the type for float32... the unsigned u is for
@@ -386,9 +401,9 @@
pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
loop3 = self.parse_trace("""
- v1[i64|2] = vec_int_expand(255,2)
- v2[i64|2] = vec_int_expand(i1,2)
- v3[i64|2] = vec_int_and(v1[i64|2], v2[i64|2])
+ v1[2xi64] = vec_int_expand(255,2)
+ v2[2xi64] = vec_int_expand(i1,2)
+ v3[2xi64] = vec_int_and(v1[2xi64], v2[2xi64])
""", False)
self.assert_equal(loop2, loop3)
@@ -400,9 +415,9 @@
pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
loop3 = self.parse_trace("""
- v1[i64|2] = vec_int_expand(255, 2)
- v2[i64|2] = vec_int_expand(i1, 2)
- v3[i64|2] = vec_int_and(v1[i64|2], v2[i64|2])
+ v1[2xi64] = vec_int_expand(255, 2)
+ v2[2xi64] = vec_int_expand(i1, 2)
+ v3[2xi64] = vec_int_and(v1[2xi64], v2[2xi64])
""", False)
self.assert_equal(loop2, loop3)
@@ -419,19 +434,19 @@
pack4 = self.pack(loop1, 4, 6, I64, I64)
loop2 = self.schedule(loop1, [pack1,pack4], prepend_invariant=True)
loop3 = self.parse_trace("""
- v1[i64|2] = vec_int_expand(255,2)
- v2[i64|2] = vec_box(2)
- v3[i64|2] = vec_int_pack(v2[i64|2], i1, 0, 1)
- v4[i64|2] = vec_int_pack(v3[i64|2], i2, 1, 1)
- v5[i64|2] = vec_int_and(v1[i64|2], v4[i64|2])
- i10 = vec_int_unpack(v5[i64|2], 0, 1)
+ v1[2xi64] = vec_int_expand(255,2)
+ v2[2xi64] = vec_box_i()
+ v3[2xi64] = vec_int_pack(v2[2xi64], i1, 0, 1)
+ v4[2xi64] = vec_int_pack(v3[2xi64], i2, 1, 1)
+ v5[2xi64] = vec_int_and(v1[2xi64], v4[2xi64])
+ i10 = vec_int_unpack(v5[2xi64], 0, 1)
i12 = uint_floordiv(i10,1)
- i11 = vec_int_unpack(v5[i64|2], 1, 1)
+ i11 = vec_int_unpack(v5[2xi64], 1, 1)
i13 = uint_floordiv(i11,1)
- v6[i64|2] = vec_box(2)
- v7[i64|2] = vec_int_pack(v6[i64|2], i12, 0, 1)
- v8[i64|2] = vec_int_pack(v7[i64|2], i13, 1, 1)
- v9[i64|2] = vec_int_and(v4[i64|2], v8[i64|2])
+ v6[2xi64] = vec_box_i()
+ v7[2xi64] = vec_int_pack(v6[2xi64], i12, 0, 1)
+ v8[2xi64] = vec_int_pack(v7[2xi64], i13, 1, 1)
+ v9[2xi64] = vec_int_and(v4[2xi64], v8[i64])
""", False)
self.assert_equal(loop2, loop3)
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -614,13 +614,9 @@
self.savings += benefit_factor * times - cost
def cb_signext(self, pack):
- op0 = pack.operations[0].getoperation()
- size = op0.getarg(1).getint()
- if pack.output_type is None:
- return 1,0
- orig_size = pack.output_type.getsize()
- if size == orig_size:
- return 0,0
+ left = pack.leftmost()
+ if left.cast_to_bytesize() == left.cast_from_bytesize():
+ return 0, 0
# no benefit for this operation! needs many x86 instrs
return 1,0
@@ -836,6 +832,8 @@
pack.split(newpacks, self.vec_reg_size)
continue
if load < Pack.FULL:
+ for op in pack.operations:
+ op.priority = -100
pack.clear()
self.packs[i] = None
continue
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -99,7 +99,7 @@
_attrs_ = ('datatype', 'bytesize', 'signed')
datatype = '\x00'
- bytesize = -1
+ bytesize = -1 # -1 means the biggest size known to the machine
signed = True
def inittype(self):
@@ -112,10 +112,17 @@
descr = self.getdescr()
type = self.type
if descr.is_array_of_floats() or descr.concrete_type == 'f':
- type = FLOAT
+ type = 'f'
self.bytesize = descr.get_item_size_in_bytes()
self.sign = descr.is_item_signed()
self.datatype = type
+ elif self.opnum == rop.INT_SIGNEXT:
+ arg0 = self.getarg(0)
+ arg1 = self.getarg(1)
+ self.setdatatype('i', arg1.value, arg0.signed)
+ elif self.is_typecast():
+ ft,tt = self.cast_types()
+ self.setdatatype(tt, self.cast_to_bytesize(), tt == 'i')
else:
# pass through the type of the first input argument
if self.numargs() == 0:
@@ -123,7 +130,7 @@
arg0 = self.getarg(0)
self.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed)
assert self.datatype != '\x00'
- assert self.bytesize > 0
+ #assert self.bytesize > 0
def setdatatype(self, data_type, bytesize, signed):
self.datatype = data_type
@@ -134,7 +141,7 @@
sign = '-'
if not self.signed:
sign = '+'
- return 'Type(%s%s, %d)' % (sign, self.type, self.size)
+ return 'Type(%s%s, %d)' % (sign, self.type, self.bytesize)
class AbstractResOpOrInputArg(AbstractValue, Typed):
_attrs_ = ('_forwarded',)
@@ -159,6 +166,7 @@
boolinverse = -1
vector = -1 # -1 means, no vector equivalent, -2 it is a vector statement
casts = ('\x00', -1, '\x00', -1)
+ count = -1
def getopnum(self):
return self.opnum
@@ -409,15 +417,6 @@
def forget_value(self):
pass
- def casts_box(self):
- return False
-
- def cast_to(self):
- return ('\x00',-1)
-
- def cast_from(self):
- return ('\x00',-1)
-
def is_label(self):
return self.getopnum() == rop.LABEL
@@ -430,6 +429,26 @@
def returns_vector(self):
return self.type != 'v' and self.vector == -2
+ def is_typecast(self):
+ return False
+
+ def cast_types(self):
+ return self.casts[0], self.casts[2]
+
+ def cast_to_bytesize(self):
+ return self.casts[1]
+
+ def cast_from_bytesize(self):
+ return self.casts[3]
+
+ def casts_up(self):
+ return self.cast_to_bytesize() > self.cast_from_bytesize()
+
+ def casts_down(self):
+ # includes the cast as noop
+ return self.cast_to_bytesize() <= self.cast_from_bytesize()
+
+
# ===================
# Top of the hierachy
# ===================
@@ -598,7 +617,7 @@
class CastOp(object):
_mixin_ = True
- def casts_box(self):
+ def is_typecast(self):
return True
def cast_to(self):
@@ -614,15 +633,40 @@
return (to_type,size)
def cast_from(self):
- return ('\x00',-1)
+ type, size, a, b = self.casts
+ if size == -1:
+ return self.bytesize
+ return (type, size)
+
+class SignExtOp(object):
+ _mixin_ = True
+
+ def is_typecast(self):
+ return True
+
+ def cast_types(self):
+ return self.casts[0], self.casts[2]
+
+ def cast_to_bytesize(self):
+ from rpython.jit.metainterp.history import ConstInt
+ arg = self.getarg(1)
+ assert isinstance(arg, ConstInt)
+ return arg.value
+
+ def cast_from_bytesize(self):
+ arg = self.getarg(0)
+ return arg.bytesize
class VectorOp(object):
_mixin_ = True
- _attrs_ = ('count',)
def repr_rpython(self):
return repr_rpython(self, 'bv')
+ def vector_bytesize(self):
+ assert self.count > 0
+ return self.byte_size * self.count
+
def same_shape(self, other):
""" NOT_RPYTHON """
if not other.is_vector():
@@ -675,10 +719,12 @@
class InputArgInt(IntOp, AbstractInputArg):
def __init__(self, intval=0):
self.setint(intval)
+ self.datatype = 'i'
class InputArgFloat(FloatOp, AbstractInputArg):
def __init__(self, f=longlong.ZEROF):
self.setfloatstorage(f)
+ self.datatype = 'f'
@staticmethod
def fromfloat(x):
@@ -687,13 +733,14 @@
class InputArgRef(RefOp, AbstractInputArg):
def __init__(self, r=lltype.nullptr(llmemory.GCREF.TO)):
self.setref_base(r)
+ self.datatype = 'r'
def reset_value(self):
self.setref_base(lltype.nullptr(llmemory.GCREF.TO))
class InputArgVector(VectorOp, AbstractInputArg):
- def __init__(self):
- pass
+ def __init__(self, datatype):
+ self.datatype = datatype
def returns_vector(self):
return True
@@ -947,11 +994,10 @@
'VEC_CAST_INT_TO_FLOAT/1/f',
'_VEC_CAST_LAST',
- 'VEC_INT_BOX/1/i',
+ 'VEC_BOX/0/if',
'VEC_INT_UNPACK/3/i', # iX|fX = VEC_INT_UNPACK(vX, index,
item_count)
'VEC_INT_PACK/4/i', # VEC_INT_PACK(vX, var/const, index,
item_count)
'VEC_INT_EXPAND/2/i', # vX = VEC_INT_EXPAND(var/const, item_count)
- 'VEC_FLOAT_BOX/1/f',
'VEC_FLOAT_UNPACK/3/f', # iX|fX = VEC_FLOAT_UNPACK(vX, index,
item_count)
'VEC_FLOAT_PACK/4/f', # VEC_FLOAT_PACK(vX, var/const, index,
item_count)
'VEC_FLOAT_EXPAND/2/f', # vX = VEC_FLOAT_EXPAND(var/const,
item_count)
@@ -1090,13 +1136,13 @@
]
_cast_ops = {
- 'INT_SIGNEXT': ('i', 0, 'i', 0),
'CAST_FLOAT_TO_INT': ('f', 8, 'i', 4),
'CAST_INT_TO_FLOAT': ('i', 4, 'f', 8),
'CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4),
'CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8),
- 'CAST_PTR_TO_INT': ('r', 0, 'i', 4),
- 'CAST_INT_TO_PTR': ('i', 4, 'r', 0),
+ 'INT_SIGNEXT': ('i', 0, 'i', 0),
+ #'CAST_PTR_TO_INT': ('r', 0, 'i', 4),
+ #'CAST_INT_TO_PTR': ('i', 4, 'r', 0),
}
# ____________________________________________________________
@@ -1187,6 +1233,8 @@
else:
baseclass = PlainResOp
mixins = [arity2mixin.get(arity, N_aryOp)]
+ if name.startswith('VEC'):
+ mixins.append(VectorOp)
if result_type == 'i':
mixins.append(IntOp)
elif result_type == 'f':
@@ -1196,9 +1244,9 @@
else:
assert result_type == 'n'
if name in _cast_ops:
+ if name == "INT_SIGNEXT":
+ mixins.append(SignExtOp)
mixins.append(CastOp)
- if name.startswith('VEC'):
- mixins.insert(1,VectorOp)
cls_name = '%s_OP' % name
bases = (get_base_class(tuple(mixins), baseclass),)
diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py
--- a/rpython/jit/tool/oparser.py
+++ b/rpython/jit/tool/oparser.py
@@ -344,9 +344,19 @@
if res in self.vars:
raise ParseError("Double assign to var %s in line: %s" % (res,
line))
resop = self.create_op(opnum, args, res, descr, fail_args)
+ self.update_vector_count(resop, res)
self.vars[res] = resop
return resop
+ def update_vector_count(self, resop, var):
+ pattern = re.compile('.*\[(\d+)x(u?)(i|f)(\d+)\]')
+ match = pattern.match(var)
+ if match:
+ resop.count = int(match.group(1))
+ resop.signed = not (match.group(2) == 'u')
+ resop.datatype = match.group(3)
+ resop.bytesize = int(match.group(4)) // 8
+
def parse_op_no_result(self, line):
opnum, args, descr, fail_args = self.parse_op(line)
res = self.create_op(opnum, args, None, descr, fail_args)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit