Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79589:7783b6299af3
Date: 2015-09-11 12:51 +0200
http://bitbucket.org/pypy/pypy/changeset/7783b6299af3/
Log: adjusting the resop creation to remove the vectorbox, work in
progress
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -8,33 +8,44 @@
from rpython.jit.metainterp.jitexc import NotAProfitableLoop
-class SchedulerData(object):
- pass
+class SchedulerState(object):
+ def __init__(self, graph):
+ self.renamer = Renamer()
+ self.graph = graph
+ self.oplist = []
+ self.worklist = []
+
+ def post_schedule(self):
+ pass
+
+ def profitable(self):
+ return self.costmodel.profitable()
+
+ def prepare(self):
+ pass
+
+ def has_more(self):
+ return len(self.worklist) > 0
+
class Scheduler(object):
""" The base class to be instantiated to (re)schedule a vector trace. """
- def __init__(self, graph, sched_data):
- assert isinstance(sched_data, SchedulerData)
- self.graph = graph
- self.schedulable_nodes = self.graph.schedulable_nodes
- self.sched_data = sched_data
- self.oplist = None
- self.renamer = None
+ def __init__(self):
+ pass
- def has_more(self):
- return len(self.schedulable_nodes) > 0
-
- def next_index(self, candidate_list):
- i = len(candidate_list)-1
- while i >= 0:
- candidate = candidate_list[i]
- if candidate.emitted:
- del candidate_list[i]
- i -= 1
+ def next(self, state):
+ worklist = state.worklist
+ visited = 0
+ while len(worklist) > 0:
+ if visited == len(worklist):
+ return None
+ node = worklist.pop()
+ if node.emitted:
continue
- if self.schedulable(candidate):
- return i
- i -= 1
- return -1
+ if self.schedulable(node):
+ return node
+ worklist.insert(0, node)
+ visited += 1
+ return None
def schedulable(self, candidate):
""" Is the candidate scheduleable? Boils down to dependency_count == 0
@@ -53,12 +64,14 @@
return False
return candidate.depends_count() == 0
- def scheduled(self, node):
+ def scheduled(self, node, state):
""" Call this function if an operation has been emitted
adds new operations to the schedule list if
their dependency count drops to zero.
In addition it keeps the list sorted (see priority)
"""
+ state.renamer.rename(op)
+ state.unpack_from_vector(op, self)
node.position = len(self.oplist)
for dep in node.provides()[:]: # COPY
to = dep.to
@@ -85,36 +98,37 @@
node.clear_dependencies()
node.emitted = True
- def emit_into(self, oplist, renamer, unpack=False):
+ def walk_and_emit(self, state): # TODO oplist, renamer, unpack=False):
""" Emit all the operations into the oplist parameter.
Initiates the scheduling.
"""
- self.renamer = renamer
- self.oplist = oplist
- self.unpack = unpack
+ assert isinstance(state, SchedulerState)
+ while state.has_more():
+ node = self.next(state)
+ if node:
+ if not state.emit(node):
+ if not node.emitted:
+ op = node.getoperation()
+ scheduler.scheduled(node, state)
+ continue
- while self.has_more():
- i = self.next_index(self.schedulable_nodes)
- if i >= 0:
- candidate = self.schedulable_nodes[i]
- del self.schedulable_nodes[i]
- self.sched_data.schedule_candidate(self, candidate)
- continue
+
# it happens that packs can emit many nodes that have been
# added to the scheuldable_nodes list, in this case it could
# be that no next exists even though the list contains elements
- if not self.has_more():
+ if not state.has_more():
break
raise AssertionError("schedule failed cannot continue. possible
reason: cycle")
- jump_node = self.graph.nodes[-1]
- jump_op = jump_node.getoperation()
- renamer.rename(jump_op)
- assert jump_op.getopnum() == rop.JUMP
- self.sched_data.unpack_from_vector(jump_op, self)
- oplist.append(jump_op)
+ # TODO
+ #jump_node = self.graph.nodes[-1]
+ #jump_op = jump_node.getoperation()
+ #renamer.rename(jump_op)
+ #assert jump_op.getopnum() == rop.JUMP
+ #self.sched_data.unpack_from_vector(jump_op, self)
+ #oplist.append(jump_op)
def vectorbox_outof_box(box, count=-1, size=-1, type='-'):
if box.type not in (FLOAT, INT):
@@ -178,7 +192,7 @@
@staticmethod
def of(box, count=-1):
- assert isinstance(box, BoxVector)
+ assert box.type == 'V'
if count == -1:
count = box.getcount()
return PackType(box.gettype(), box.getsize(), box.getsigned(), count)
@@ -210,6 +224,7 @@
assert count > 1
assert self.type in ('i','f')
assert self.size > 0
+ xxx
return BoxVector(self.type, count, self.size, self.signed)
def combine(self, other):
@@ -312,10 +327,9 @@
self.before_argument_transform(args)
self.transform_arguments(args)
#
- result = op.result
- result = self.transform_result(result)
+ vop = ResOperation(op.vector, args, op.getdescr())
+ #result = self.transform_result(op)
#
- vop = ResOperation(op.vector, args, result, op.getdescr())
if op.is_guard():
assert isinstance(op, GuardResOp)
assert isinstance(vop, GuardResOp)
@@ -334,7 +348,7 @@
if i >= vbox.getcount():
break
op = node.getoperation()
- self.sched_data.setvector_of_box(op.result, i, vbox)
+ self.sched_data.setvector_of_box(op, i, vbox)
return vbox
def new_result_vector_box(self):
@@ -348,9 +362,18 @@
return self.pack.operations
def transform_arguments(self, args):
- """ Transforming one argument to a vector box argument """
+ """ Transforming one argument to a vector box argument
+ The following cases can occur:
+ 1) argument is present in the box_to_vbox map.
+ a) vector can be reused immediatly (simple case)
+ b) vector is to big
+ c) vector is to small
+ 2) argument is not known to reside in a vector
+ a) expand vars/consts before the label and add as argument
+ b) expand vars created in the loop body
+ """
for i,arg in enumerate(args):
- if isinstance(arg, BoxVector):
+ if arg.returns_vector():
continue
if not self.is_vector_arg(i):
continue
@@ -478,7 +501,7 @@
return new_box
def _check_vec_pack(self, op):
- result = op.result
+ result = op
arg0 = op.getarg(0)
arg1 = op.getarg(1)
index = op.getarg(2)
@@ -754,63 +777,89 @@
raise NotImplementedError("missing vecop for '%s'" % (op.getopname(),))
return op2vecop
-class VecScheduleData(SchedulerData):
- def __init__(self, vec_reg_size, costmodel, inputargs):
+class VecScheduleState(SchedulerState):
+ def __init__(self, graph, packset, cpu, costmodel):
+ SchedulerState.__init__(self, graph)
self.box_to_vbox = {}
- self.vec_reg_size = vec_reg_size
+ self.cpu = cpu
+ self.vec_reg_size = cpu.vector_register_size
self.invariant_oplist = []
self.invariant_vector_vars = []
self.expanded_map = {}
self.costmodel = costmodel
self.inputargs = {}
- for arg in inputargs:
+ self.packset = packset
+ for arg in graph.loop.inputargs:
self.inputargs[arg] = None
self.seen = {}
- def schedule_candidate(self, scheduler, candidate):
+ def post_schedule(self):
+ pass
+ # TODO label rename
+ if vector:
+ # XXX
+ # add accumulation info to the descriptor
+ #for version in self.loop.versions:
+ # # this needs to be done for renamed (accum arguments)
+ # version.renamed_inputargs = [ renamer.rename_map.get(arg,arg)
for arg in version.inputargs ]
+ #self.appended_arg_count = len(sched_data.invariant_vector_vars)
+ ##for guard_node in graph.guards:
+ ## op = guard_node.getoperation()
+ ## failargs = op.getfailargs()
+ ## for i,arg in enumerate(failargs):
+ ## if arg is None:
+ ## continue
+ ## accum = arg.getaccum()
+ ## if accum:
+ ## pass
+ ## #accum.save_to_descr(op.getdescr(),i)
+ #self.has_two_labels = len(sched_data.invariant_oplist) > 0
+ #self.loop.operations =
self.prepend_invariant_operations(sched_data)
+ pass
+
+
+ def profitable(self):
+ return self.costmodel.profitable()
+
+ def prepare(self):
+ SchedulerState.prepare(self)
+ self.graph.prepare_for_scheduling()
+ self.packset.accumulate_prepare(self)
+ for arg in self.graph.loop.label.getarglist():
+ self.seen[arg] = None
+
+ def emit(self, node, scheduler):
""" If you implement a scheduler this operations is called
to emit the actual operation into the oplist of the scheduler.
"""
- renamer = scheduler.renamer
- if candidate.pack:
- for node in candidate.pack.operations:
- renamer.rename(node.getoperation())
+ if node.pack:
+ for node in node.pack.operations:
scheduler.scheduled(node)
- self.as_vector_operation(scheduler, candidate.pack)
- else:
- op = candidate.getoperation()
- renamer.rename(op)
- self.unpack_from_vector(op, scheduler)
- scheduler.scheduled(candidate)
- op = candidate.getoperation()
- #
- # prevent some instructions in the resulting trace!
- if op.getopnum() in (rop.DEBUG_MERGE_POINT,
- rop.GUARD_EARLY_EXIT):
- return
- scheduler.oplist.append(op)
+ self.as_vector_operation(node.pack)
+ return True
+ return False
- def as_vector_operation(self, scheduler, pack):
+
+ def as_vector_operation(self, pack):
""" Transform a pack into a single or several operation.
Calls the as_vector_operation of the OpToVectorOp implementation.
"""
assert pack.opcount() > 1
# properties that hold for the pack are:
- # + isomorphism (see func above)
+ # + isomorphism (see func)
# + tightly packed (no room between vector elems)
- oplist = scheduler.oplist
- position = len(oplist)
- op = pack.operations[0].getoperation()
- determine_trans(op).as_vector_operation(pack, self, scheduler, oplist)
+ position = len(self.oplist)
+ op = pack.leftmost().getoperation()
+ determine_trans(op).as_vector_operation(pack, self, self.oplist)
#
if pack.is_accumulating():
box = oplist[position].result
assert box is not None
for node in pack.operations:
op = node.getoperation()
- assert op.result is not None
- scheduler.renamer.start_renaming(op.result, box)
+ assert not op.returns_void()
+ scheduler.renamer.start_renaming(op, box)
def unpack_from_vector(self, op, scheduler):
""" If a box is needed that is currently stored within a vector
@@ -820,17 +869,17 @@
# unpack for an immediate use
for i, arg in enumerate(op.getarglist()):
- if isinstance(arg, Box):
+ if not arg.is_constant():
argument = self._unpack_from_vector(i, arg, scheduler)
if arg is not argument:
op.setarg(i, argument)
- if op.result:
- self.seen[op.result] = None
+ if not op.returns_void():
+ self.seen[op] = None
# unpack for a guard exit
if op.is_guard():
fail_args = op.getfailargs()
for i, arg in enumerate(fail_args):
- if arg and isinstance(arg, Box):
+ if arg and not arg.is_constant():
argument = self._unpack_from_vector(i, arg, scheduler)
if arg is not argument:
fail_args[i] = argument
@@ -865,7 +914,7 @@
def setvector_of_box(self, box, off, vector):
assert off < vector.getcount()
- assert not isinstance(box, BoxVector)
+ assert box.type != 'V'
self.box_to_vbox[box] = (off, vector)
def opcount_filling_vector_register(pack, vec_reg_size):
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -6,13 +6,28 @@
from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
Dependency,
IndexVar, MemoryRef, Node)
-from rpython.jit.metainterp.optimizeopt.vector import TraceLoop
+from rpython.jit.metainterp.optimizeopt.vector import VectorLoop
from rpython.jit.metainterp.resoperation import rop, ResOperation
from rpython.jit.backend.llgraph.runner import ArrayDescr
from rpython.rtyper.lltypesystem import rffi
from rpython.rtyper.lltypesystem import lltype
from rpython.conftest import option
+class FakeDependencyGraph(DependencyGraph):
+ """ A dependency graph that is able to emit every instruction
+ one by one. """
+ def __init__(self, loop):
+ self.loop = loop
+ if isinstance(loop, list):
+ self.nodes = loop
+ else:
+ operations = loop.operations
+ self.nodes = [Node(op,i) for i,op in \
+ enumerate(operations)]
+ self.schedulable_nodes = list(reversed(self.nodes))
+ self.guards = []
+
+
class DependencyBaseTest(BaseTest):
def setup_method(self, method):
@@ -26,12 +41,20 @@
assert node.independent(node)
return self.last_graph
- def parse_loop(self, ops):
+ def parse_loop(self, ops, add_label=True, **kwargs):
loop = self.parse(ops, postprocess=self.postprocess)
+ loop.operations = filter(lambda op: op.getopnum() !=
rop.DEBUG_MERGE_POINT, loop.operations)
token = JitCellToken()
- label = ResOperation(rop.LABEL, loop.inputargs,
descr=TargetToken(token))
- loop = TraceLoop(label, loop.operations[:-1], loop.operations[-1])
+ if add_label:
+ label = ResOperation(rop.LABEL, loop.inputargs,
descr=TargetToken(token))
+ else:
+ label = loop.operations[0]
+ label.setdescr(TargetToken(token))
+ loop = VectorLoop(label, loop.operations[1:-1], loop.operations[-1])
loop.jump.setdescr(token)
+ for op in loop.operations:
+ if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None:
+ op.setdescr(compile.ResumeAtLoopHeaderDescr())
return loop
def assert_edges(self, graph, edge_list, exceptions):
@@ -533,8 +556,7 @@
n1,n2 = FakeNode(1), FakeNode(2)
n1.edge_to(n2); n2.edge_to(n1)
- graph = FakeDependencyGraph()
- graph.nodes = [n1,n2]
+ graph = FakeDependencyGraph([n1,n2])
cycle = graph.cycles()
assert cycle == [n1, n2]
@@ -547,7 +569,7 @@
n1,n2,n3,n4 = FakeNode(1), FakeNode(2), FakeNode(3), FakeNode(4)
n1.edge_to(n3); n3.edge_to(n4); n4.edge_to(n1)
- graph = FakeDependencyGraph()
+ graph = FakeDependencyGraph([n1,n2])
graph.nodes = [n1,n2,n3]
cycle = graph.cycles()
assert cycle is not None
@@ -584,10 +606,6 @@
def __repr__(self):
return "n%d" % self.opidx
-class FakeDependencyGraph(DependencyGraph):
- def __init__(self):
- pass
-
class TestLLtype(BaseTestDependencyGraph, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -3,13 +3,14 @@
from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
from rpython.jit.metainterp.optimizeopt.util import equaloplists
from rpython.jit.metainterp.optimizeopt.renamer import Renamer
-from rpython.jit.metainterp.optimizeopt.vec import (VecScheduleData,
+from rpython.jit.metainterp.optimizeopt.vector import (VecScheduleState,
Pack, Pair, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel,
PackSet)
from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph
-from rpython.jit.metainterp.optimizeopt.schedule import PackType
+from rpython.jit.metainterp.optimizeopt.schedule import PackType, Scheduler
from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
-from rpython.jit.metainterp.optimizeopt.test.test_dependency import
DependencyBaseTest
+from rpython.jit.metainterp.optimizeopt.test.test_dependency import
(DependencyBaseTest,
+ FakeDependencyGraph)
from rpython.jit.metainterp.optimizeopt.test.test_vecopt import
(FakeMetaInterpStaticData,
FakeJitDriverStaticData)
from rpython.jit.metainterp.resoperation import rop, ResOperation
@@ -29,19 +30,10 @@
self.packs = packs
self.vec_reg_size = 16
-class FakeDependencyGraph(DependencyGraph):
- """ A dependency graph that is able to emit every instruction
- one by one. """
- def __init__(self, loop):
- self.nodes = [Node(op,i) for i,op in \
- enumerate(loop.operations)]
- self.schedulable_nodes = list(reversed(self.nodes))
- self.guards = []
-
class SchedulerBaseTest(DependencyBaseTest):
- def namespace(self):
- return {
+ def setup_class(self):
+ self.namespace = {
'double': self.floatarraydescr,
'float': self.float32arraydescr,
'long': self.arraydescr,
@@ -50,12 +42,8 @@
'char': self.chararraydescr,
}
- def parse(self, source, inc_label_jump=True,
- pargs=2,
- iargs=10,
- fargs=6,
- additional_args=None,
- replace_args=None):
+ def parse_trace(self, source, inc_label_jump=True, pargs=2, iargs=10,
+ fargs=6, additional_args=None, replace_args=None):
args = []
for prefix, rang in [('p',range(pargs)),
('i',range(iargs)),
@@ -75,16 +63,8 @@
joinedargs = ','.join(args)
fmt = (indent, joinedargs, source, indent, joinedargs)
src = "%s[%s]\n%s\n%sjump(%s)" % fmt
- loop = opparse(src, cpu=self.cpu, namespace=self.namespace())
- if inc_label_jump:
- token = JitCellToken()
- label = ResOperation(rop.LABEL, loop.inputargs,
descr=TargetToken(token))
- loop.operations = [label] + loop.operations
- loop.graph = FakeDependencyGraph(loop)
- return loop
- else:
- loop.graph = FakeDependencyGraph(loop)
- del loop.operations[-1]
+ loop = self.parse_loop(src)
+ loop.graph = FakeDependencyGraph(loop)
return loop
def pack(self, loop, l, r, input_type, output_type):
@@ -92,21 +72,8 @@
def schedule(self, loop, packs, vec_reg_size=16,
prepend_invariant=False, overwrite_funcs=None):
- ops = []
- cm = X86_CostModel(0, vec_reg_size)
- def profitable():
- return True
- cm.profitable = profitable
- vsd = VecScheduleData(vec_reg_size, cm, loop.inputargs[:])
- for name, overwrite in (overwrite_funcs or {}).items():
- setattr(vsd, name, overwrite)
- renamer = Renamer()
- metainterp_sd = FakeMetaInterpStaticData(self.cpu)
- jitdriver_sd = FakeJitDriverStaticData()
- opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0)
- opt.costmodel = cm
- opt.dependency_graph = loop.graph
- del loop.graph
+ cm = X86_CostModel(self.cpu, 0)
+ cm.profitable = lambda: True
pairs = []
for pack in packs:
for i in range(len(pack.operations)-1):
@@ -115,29 +82,39 @@
o2 = pack.operations[i+1]
pair = Pair(o1,o2,pack.input_type,pack.output_type)
pairs.append(pair)
-
- opt.packset = FakePackSet(pairs)
-
+ packset = FakePackSet(pairs)
+ state = VecScheduleState(loop.graph, packset, self.cpu, cm)
+ for name, overwrite in (overwrite_funcs or {}).items():
+ setattr(state, name, overwrite)
+ renamer = Renamer()
+ metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+ jitdriver_sd = FakeJitDriverStaticData()
+ opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0)
+ opt.packset = packset
if not prepend_invariant:
- def pio(oplist, labels):
- return oplist
- vsd.prepend_invariant_operations = pio
-
+ state.prepend_invariant_operations = lambda list, _: list
opt.combine_packset()
- opt.schedule(True, sched_data=vsd)
-
- loop.operations = \
- [op for op in loop.operations \
- if not (op.is_final() or op.is_label())]
-
- return loop
-
- def assert_operations_match(self, loop_a, loop_b):
- assert equaloplists(loop_a.operations, loop_b.operations)
+ opt.schedule(state)
class Test(SchedulerBaseTest, LLtypeMixin):
+
+ def test_next_must_not_loop_forever(self):
+ scheduler = Scheduler()
+ def schedulable(node):
+ node.count += 1
+ return False
+ scheduler.schedulable = schedulable
+ class State(object): pass
+ class Node(object): emitted = False; pack = None; count = 0
+ state = State()
+ state.worklist = [Node(), Node(), Node(), Node(), Node()]
+ assert scheduler.next(state) is None
+ for node in state.worklist:
+ assert node.count == 1
+ # must return here, then the test passed
+
def test_schedule_split_load(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
f10 = raw_load_f(p0, i0, descr=float)
f11 = raw_load_f(p0, i1, descr=float)
f12 = raw_load_f(p0, i2, descr=float)
@@ -147,15 +124,15 @@
""")
pack1 = self.pack(loop1, 0, 6, None, F32)
loop2 = self.schedule(loop1, [pack1])
- loop3 = self.parse("""
- v10[i32|4] = vec_raw_load(p0, i0, 4, descr=float)
+ loop3 = self.parse_trace("""
+ v10[i32|4] = vec_raw_load_i(p0, i0, 4, descr=float)
f10 = raw_load_f(p0, i4, descr=float)
f11 = raw_load_f(p0, i5, descr=float)
""", False)
self.assert_equal(loop2, loop3)
def test_int_to_float(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = raw_load(p0, i0, descr=long)
i11 = raw_load(p0, i1, descr=long)
i12 = int_signext(i10, 4)
@@ -167,21 +144,21 @@
pack2 = self.pack(loop1, 2, 4, I64, I32_2)
pack3 = self.pack(loop1, 4, 6, I32_2, F32_2)
loop2 = self.schedule(loop1, [pack1, pack2, pack3])
- loop3 = self.parse("""
- v10[i64|2] = vec_raw_load(p0, i0, 2, descr=long)
+ loop3 = self.parse_trace("""
+ v10[i64|2] = vec_raw_load_i(p0, i0, 2, descr=long)
v20[i32|2] = vec_int_signext(v10[i64|2], 4)
v30[f64|2] = vec_cast_int_to_float(v20[i32|2])
""", False)
self.assert_equal(loop2, loop3)
def test_scalar_pack(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = int_add(i0, 73)
i11 = int_add(i1, 73)
""")
pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
- loop3 = self.parse("""
+ loop3 = self.parse_trace("""
v10[i64|2] = vec_box(2)
v20[i64|2] = vec_int_pack(v10[i64|2], i0, 0, 1)
v30[i64|2] = vec_int_pack(v20[i64|2], i1, 1, 1)
@@ -191,13 +168,13 @@
""", False)
self.assert_equal(loop2, loop3)
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
f10 = float_add(f0, 73.0)
f11 = float_add(f1, 73.0)
""")
pack1 = self.pack(loop1, 0, 2, F64, F64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
- loop3 = self.parse("""
+ loop3 = self.parse_trace("""
v10[f64|2] = vec_box(2)
v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
@@ -208,7 +185,7 @@
self.assert_equal(loop2, loop3)
def test_scalar_remember_expansion(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
f10 = float_add(f0, f5)
f11 = float_add(f1, f5)
f12 = float_add(f10, f5)
@@ -217,7 +194,7 @@
pack1 = self.pack(loop1, 0, 2, F64, F64)
pack2 = self.pack(loop1, 2, 4, F64, F64)
loop2 = self.schedule(loop1, [pack1, pack2], prepend_invariant=True)
- loop3 = self.parse("""
+ loop3 = self.parse_trace("""
v10[f64|2] = vec_box(2)
v20[f64|2] = vec_float_pack(v10[f64|2], f0, 0, 1)
v30[f64|2] = vec_float_pack(v20[f64|2], f1, 1, 1)
@@ -235,7 +212,7 @@
raise Exception("could not find %s in args %s" % (name,
loop.inputargs))
def test_signext_int32(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = int_signext(i1, 4)
i11 = int_signext(i1, 4)
""", additional_args=['v10[i64|2]'])
@@ -247,13 +224,13 @@
overwrite_funcs = {
'getvector_of_box': i1inv103204,
})
- loop3 = self.parse("""
+ loop3 = self.parse_trace("""
v11[i32|2] = vec_int_signext(v10[i64|2], 4)
""", False, additional_args=['v10[i64|2]'])
self.assert_equal(loop2, loop3)
def test_cast_float_to_int(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
f10 = raw_load(p0, i1, descr=double)
f11 = raw_load(p0, i2, descr=double)
f12 = raw_load(p0, i3, descr=double)
@@ -301,11 +278,11 @@
overwrite_funcs={
'_prevent_signext': void
})
- loop3 = self.parse("""
- v10[f64|2] = vec_raw_load(p0, i1, 2, descr=double)
- v11[f64|2] = vec_raw_load(p0, i3, 2, descr=double)
- v12[f64|2] = vec_raw_load(p0, i5, 2, descr=double)
- v13[f64|2] = vec_raw_load(p0, i7, 2, descr=double)
+ loop3 = self.parse_trace("""
+ v10[f64|2] = vec_raw_load_f(p0, i1, 2, descr=double)
+ v11[f64|2] = vec_raw_load_f(p0, i3, 2, descr=double)
+ v12[f64|2] = vec_raw_load_f(p0, i5, 2, descr=double)
+ v13[f64|2] = vec_raw_load_f(p0, i7, 2, descr=double)
v14[i32|2] = vec_cast_float_to_int(v10[f64|2])
v15[i32|2] = vec_cast_float_to_int(v11[f64|2])
v16[i32|2] = vec_cast_float_to_int(v12[f64|2])
@@ -322,7 +299,7 @@
self.assert_equal(loop2, loop3)
def test_cast_float_to_single_float(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
f10 = raw_load(p0, i1, descr=double)
f11 = raw_load(p0, i2, descr=double)
f12 = raw_load(p0, i3, descr=double)
@@ -342,9 +319,9 @@
pack2 = self.pack(loop1, 4, 8, F64, I32_2)
pack3 = self.pack(loop1, 8, 12, I32, None)
loop2 = self.schedule(loop1, [pack1,pack2,pack3])
- loop3 = self.parse("""
- v44[f64|2] = vec_raw_load(p0, i1, 2, descr=double)
- v45[f64|2] = vec_raw_load(p0, i3, 2, descr=double)
+ loop3 = self.parse_trace("""
+ v44[f64|2] = vec_raw_load_f(p0, i1, 2, descr=double)
+ v45[f64|2] = vec_raw_load_f(p0, i3, 2, descr=double)
v46[i32|2] = vec_cast_float_to_singlefloat(v44[f64|2])
v47[i32|2] = vec_cast_float_to_singlefloat(v45[f64|2])
v41[i32|4] = vec_int_pack(v46[i32|2], v47[i32|2], 2, 2)
@@ -353,7 +330,7 @@
self.assert_equal(loop2, loop3)
def test_all(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = raw_load(p0, i1, descr=long)
i11 = raw_load(p0, i2, descr=long)
#
@@ -367,9 +344,9 @@
pack2 = self.pack(loop1, 2, 4, I64, I64)
pack3 = self.pack(loop1, 4, 6, I64, None)
loop2 = self.schedule(loop1, [pack1,pack2,pack3],
prepend_invariant=True)
- loop3 = self.parse("""
+ loop3 = self.parse_trace("""
v9[i64|2] = vec_int_expand(255,2)
- v10[i64|2] = vec_raw_load(p0, i1, 2, descr=long)
+ v10[i64|2] = vec_raw_load_i(p0, i1, 2, descr=long)
v11[i64|2] = vec_int_and(v10[i64|2], v9[i64|2])
guard_true(v11[i64|2]) []
""", False)
@@ -377,7 +354,7 @@
def test_split_load_store(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = raw_load(p0, i1, descr=float)
i11 = raw_load(p0, i2, descr=float)
i12 = raw_load(p0, i3, descr=float)
@@ -388,8 +365,8 @@
pack1 = self.pack(loop1, 0, 4, None, I32)
pack2 = self.pack(loop1, 4, 6, I32_2, None)
loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True)
- loop3 = self.parse("""
- v1[i32|4] = vec_raw_load(p0, i1, 4, descr=float)
+ loop3 = self.parse_trace("""
+ v1[i32|4] = vec_raw_load_i(p0, i1, 4, descr=float)
i10 = vec_int_unpack(v1[i32|4], 0, 1)
raw_store(p0, i3, i10, descr=float)
i11 = vec_int_unpack(v1[i32|4], 1, 1)
@@ -400,13 +377,13 @@
self.assert_equal(loop2, loop3)
def test_split_arith(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = int_and(255, i1)
i11 = int_and(255, i1)
""")
pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
- loop3 = self.parse("""
+ loop3 = self.parse_trace("""
v1[i64|2] = vec_int_expand(255,2)
v2[i64|2] = vec_int_expand(i1,2)
v3[i64|2] = vec_int_and(v1[i64|2], v2[i64|2])
@@ -414,13 +391,13 @@
self.assert_equal(loop2, loop3)
def test_split_arith(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = int_and(255, i1)
i11 = int_and(255, i1)
""")
pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
- loop3 = self.parse("""
+ loop3 = self.parse_trace("""
v1[i64|2] = vec_int_expand(255, 2)
v2[i64|2] = vec_int_expand(i1, 2)
v3[i64|2] = vec_int_and(v1[i64|2], v2[i64|2])
@@ -428,7 +405,7 @@
self.assert_equal(loop2, loop3)
def test_no_vec_impl(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = int_and(255, i1)
i11 = int_and(255, i2)
i12 = uint_floordiv(i10,1)
@@ -439,7 +416,7 @@
pack1 = self.pack(loop1, 0, 2, I64, I64)
pack4 = self.pack(loop1, 4, 6, I64, I64)
loop2 = self.schedule(loop1, [pack1,pack4], prepend_invariant=True)
- loop3 = self.parse("""
+ loop3 = self.parse_trace("""
v1[i64|2] = vec_int_expand(255,2)
v2[i64|2] = vec_box(2)
v3[i64|2] = vec_int_pack(v2[i64|2], i1, 0, 1)
@@ -457,7 +434,7 @@
self.assert_equal(loop2, loop3)
def test_split_cast(self):
- trace = self.parse("""
+ trace = self.parse_trace("""
f10 = cast_int_to_float(i1)
f11 = cast_int_to_float(i2)
f12 = cast_int_to_float(i3)
@@ -470,7 +447,7 @@
assert len(packs) == 2
def test_combine_packset_nearly_empty_pack(self):
- trace = self.parse("""
+ trace = self.parse_trace("""
i10 = int_add(i1, i3)
i11 = int_add(i2, i3)
""")
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -11,9 +11,9 @@
import rpython.jit.metainterp.optimizeopt.optimizer as optimizeopt
import rpython.jit.metainterp.optimizeopt.virtualize as virtualize
from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph
-from rpython.jit.metainterp.optimizeopt.vectorize import
(VectorizingOptimizer, MemoryRef,
+from rpython.jit.metainterp.optimizeopt.vector import (VectorizingOptimizer,
MemoryRef,
isomorphic, Pair, NotAVectorizeableLoop, NotAProfitableLoop,
GuardStrengthenOpt,
- CostModel)
+ CostModel, VectorLoop)
from rpython.jit.metainterp.optimize import InvalidLoop
from rpython.jit.metainterp import compile
from rpython.jit.metainterp.resoperation import rop, ResOperation
@@ -41,25 +41,6 @@
jitdriver_sd = FakeJitDriverStaticData()
- def parse_loop(self, ops, add_label=True):
- loop = self.parse(ops, postprocess=self.postprocess)
- token = JitCellToken()
- pre = []
- tt = TargetToken(token)
- if add_label:
- pre = [ResOperation(rop.LABEL, loop.inputargs, None, descr=tt)]
- else:
- for i,op in enumerate(loop.operations):
- if op.getopnum() == rop.LABEL:
- op.setdescr(tt)
- loop.operations = pre + filter(lambda op: op.getopnum() !=
rop.DEBUG_MERGE_POINT, loop.operations)
- if loop.operations[-1].getopnum() == rop.JUMP:
- loop.operations[-1].setdescr(token)
- for op in loop.operations:
- if op.getopnum() == rop.GUARD_EARLY_EXIT and op.getdescr() is None:
- op.setdescr(compile.ResumeAtLoopHeaderDescr())
- return loop
-
def assert_vectorize(self, loop, expected_loop, call_pure_results=None):
self._do_optimize_loop(loop, call_pure_results, export_state=True)
self.assert_equal(loop, expected_loop)
@@ -67,7 +48,7 @@
def vectoroptimizer(self, loop):
metainterp_sd = FakeMetaInterpStaticData(self.cpu)
jitdriver_sd = FakeJitDriverStaticData()
- opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0)
+ opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0)
label_index = loop.find_first_index(rop.LABEL)
opt.orig_label_args = loop.operations[label_index].getarglist()[:]
return opt
@@ -89,48 +70,48 @@
guard.setdescr(compile.ResumeAtLoopHeaderDescr())
loop.operations.insert(idx+1, guard)
self.show_dot_graph(DependencyGraph(opt.loop), "original_" +
self.test_name)
- opt.analyse_index_calculations()
- if opt.dependency_graph is not None:
+ graph = opt.analyse_index_calculations()
+ if graph is not None:
cycle = opt.dependency_graph.cycles()
if cycle is not None:
print "CYCLE found %s" % cycle
self.show_dot_graph(opt.dependency_graph, "early_exit_" +
self.test_name)
assert cycle is None
- opt.schedule(False)
+ loop.operations = opt.schedule(False)
opt.unroll_loop_iterations(loop, unroll_factor)
opt.loop.operations = opt.get_newoperations()
self.debug_print_operations(opt.loop)
opt.clear_newoperations()
- opt.dependency_graph = DependencyGraph(loop)
- self.last_graph = opt.dependency_graph
+ graph = DependencyGraph(loop)
+ self.last_graph = graph
self.show_dot_graph(self.last_graph, self.test_name)
- return opt
+ return opt, graph
def init_packset(self, loop, unroll_factor = -1):
- opt = self.vectoroptimizer_unrolled(loop, unroll_factor)
- opt.find_adjacent_memory_refs()
+ opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
+ opt.find_adjacent_memory_refs(graph)
return opt
def extend_packset(self, loop, unroll_factor = -1):
- opt = self.vectoroptimizer_unrolled(loop, unroll_factor)
- opt.find_adjacent_memory_refs()
+ opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
+ opt.find_adjacent_memory_refs(graph)
opt.extend_packset()
return opt
def combine_packset(self, loop, unroll_factor = -1):
- opt = self.vectoroptimizer_unrolled(loop, unroll_factor)
- opt.find_adjacent_memory_refs()
+ opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
+ opt.find_adjacent_memory_refs(graph)
opt.extend_packset()
opt.combine_packset()
return opt
def schedule(self, loop, unroll_factor = -1, with_guard_opt=False):
- opt = self.vectoroptimizer_unrolled(loop, unroll_factor)
+ opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
opt.costmodel = FakeCostModel()
- opt.find_adjacent_memory_refs()
+ opt.find_adjacent_memory_refs(graph)
opt.extend_packset()
opt.combine_packset()
- opt.schedule(True)
+ opt.schedule(graph, True)
if with_guard_opt:
gso = GuardStrengthenOpt(opt.dependency_graph.index_vars,
opt.has_two_labels)
gso.propagate_all_forward(opt.loop)
@@ -204,8 +185,7 @@
class BaseTestVectorize(VecTestHelper):
- def test_vectorize_skip_impossible_1(self):
- """ this trace does not contain a raw load / raw store from an array
"""
+ def test_vectorize_skip(self):
ops = """
[p0,i0]
i1 = int_add(i0,1)
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -20,7 +20,7 @@
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
MemoryRef, Node, IndexVar)
from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo
-from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleData,
+from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState,
Scheduler, Pack, Pair, AccumPair, vectorbox_outof_box, getpackopnum,
getunpackopnum, PackType, determine_input_output_types)
from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
@@ -31,9 +31,10 @@
from rpython.rlib.jit import Counters
from rpython.rtyper.lltypesystem import lltype, rffi
-class TraceLoop(object):
+class VectorLoop(object):
def __init__(self, label, oplist, jump):
self.label = label
+ self.inputargs = label.getarglist()
self.prefix = []
self.prefix_label = None
assert self.label.getopnum() == rop.LABEL
@@ -41,7 +42,7 @@
self.jump = jump
assert self.jump.getopnum() == rop.JUMP
- def all_operations(self):
+ def operation_list(self):
return [self.label] + self.operations + [self.jump]
def optimize_vector(metainterp_sd, jitdriver_sd, warmstate, loop_info,
loop_ops):
@@ -52,10 +53,10 @@
# the original loop (output of optimize_unroll)
info = LoopVersionInfo(loop_info)
version = info.snapshot(loop_ops, info.label_op)
- loop = TraceLoop(loop_info.label_op, loop_ops[:-1], loop_ops[-1])
+ loop = VectorLoop(loop_info.label_op, loop_ops[:-1], loop_ops[-1])
try:
debug_start("vec-opt-loop")
- metainterp_sd.logger_noopt.log_loop([], loop.all_operations(), -2,
None, None, "pre vectorize")
+ metainterp_sd.logger_noopt.log_loop([], loop.operation_list(), -2,
None, None, "pre vectorize")
metainterp_sd.profiler.count(Counters.OPT_VECTORIZE_TRY)
#
start = time.clock()
@@ -67,7 +68,7 @@
end = time.clock()
#
metainterp_sd.profiler.count(Counters.OPT_VECTORIZED)
- metainterp_sd.logger_noopt.log_loop([], loop.all_operations(), -2,
None, None, "post vectorize")
+ metainterp_sd.logger_noopt.log_loop([], loop.operation_list(), -2,
None, None, "post vectorize")
#
nano = int((end-start)*10.0**9)
debug_print("# vecopt factor: %d opcount: (%d -> %d) took %dns" % \
@@ -142,8 +143,7 @@
def __init__(self, metainterp_sd, jitdriver_sd, cost_threshold):
Optimizer.__init__(self, metainterp_sd, jitdriver_sd)
self.cpu = metainterp_sd.cpu
- self.costmodel = X86_CostModel(cost_threshold,
self.cpu.vector_register_size)
- self.dependency_graph = None
+ self.cost_threshold = cost_threshold
self.packset = None
self.unroll_count = 0
self.smallest_type_bytes = 0
@@ -171,9 +171,10 @@
raise NotAVectorizeableLoop()
# find index guards and move to the earliest position
- self.analyse_index_calculations(loop)
- if self.dependency_graph is not None:
- self.schedule(False) # reorder the trace
+ graph = self.analyse_index_calculations(loop)
+ if graph is not None:
+ state = SchedulerState(graph)
+ self.schedule(state) # reorder the trace
# unroll
self.unroll_count = self.get_unroll_count(vsize)
@@ -182,13 +183,15 @@
self.clear_newoperations();
# vectorize
- self.dependency_graph = DependencyGraph(self.loop)
+ graph = DependencyGraph(loop)
self.find_adjacent_memory_refs()
self.extend_packset()
self.combine_packset()
- self.costmodel.reset_savings()
- self.schedule(True)
- if not self.costmodel.profitable():
+ # TODO move cost model to CPU
+ costmodel = X86_CostModel(self.cpu, self.cost_threshold)
+ state = VecScheduleState(graph, self.packset, self.cpu, costmodel)
+ self.schedule(state)
+ if not state.profitable():
raise NotAProfitableLoop()
def emit_unrolled_operation(self, op):
@@ -308,7 +311,7 @@
unroll_count = simd_vec_reg_bytes // byte_count
return unroll_count-1 # it is already unrolled once
- def find_adjacent_memory_refs(self):
+ def find_adjacent_memory_refs(self, graph):
""" The pre pass already builds a hash of memory references and the
operations. Since it is in SSA form there are no array indices.
If there are two array accesses in the unrolled loop
@@ -320,7 +323,6 @@
operations = loop.operations
self.packset = PackSet(self.cpu.vector_register_size)
- graph = self.dependency_graph
memory_refs = graph.memory_refs.items()
# initialize the pack set
for node_a,memref_a in memory_refs:
@@ -447,59 +449,22 @@
if fail:
assert False
- def schedule(self, vector=False, sched_data=None):
+ def schedule(self, state): # TODO vector=False, sched_data=None):
""" Scheduling the trace and emitting vector operations
for packed instructions.
"""
-
- self.clear_newoperations()
- if sched_data is None:
- sched_data = VecScheduleData(self.cpu.vector_register_size,
- self.costmodel, self.orig_label_args)
- self.dependency_graph.prepare_for_scheduling()
- scheduler = Scheduler(self.dependency_graph, sched_data)
- renamer = Renamer()
- #
- if vector:
- self.packset.accumulate_prepare(sched_data, renamer)
- #
- for node in scheduler.schedulable_nodes:
- op = node.getoperation()
- if op.is_label():
- seen = sched_data.seen
- for arg in op.getarglist():
- sched_data.seen[arg] = None
- break
- #
- scheduler.emit_into(self._newoperations, renamer, unpack=vector)
+ state.prepare()
+ scheduler = Scheduler()
+ scheduler.walk_and_emit(state)
#
if not we_are_translated():
- for node in self.dependency_graph.nodes:
+ for node in graph.nodes:
assert node.emitted
- if vector and not self.costmodel.profitable():
+ #
+ if state.profitable():
return
- if vector:
- # add accumulation info to the descriptor
- for version in self.loop.versions:
- # this needs to be done for renamed (accum arguments)
- version.renamed_inputargs = [ renamer.rename_map.get(arg,arg)
for arg in version.inputargs ]
- self.appended_arg_count = len(sched_data.invariant_vector_vars)
- #for guard_node in self.dependency_graph.guards:
- # op = guard_node.getoperation()
- # failargs = op.getfailargs()
- # for i,arg in enumerate(failargs):
- # if arg is None:
- # continue
- # accum = arg.getaccum()
- # if accum:
- # pass
- # #accum.save_to_descr(op.getdescr(),i)
- self.has_two_labels = len(sched_data.invariant_oplist) > 0
- self.loop.operations =
self.prepend_invariant_operations(sched_data)
- else:
- self.loop.operations = self._newoperations
-
- self.clear_newoperations()
+ #
+ state.post_schedule()
def prepend_invariant_operations(self, sched_data):
""" Add invariant operations to the trace loop. returns the operation
list
@@ -540,7 +505,7 @@
that guards fail 'early' and relax dependencies. Without this
step vectorization would not be possible!
"""
- self.dependency_graph = graph = DependencyGraph(loop)
+ graph = DependencyGraph(loop)
ee_guard_node = graph.getnode(0)
if ee_guard_node.getopnum() != rop.GUARD_EARLY_EXIT:
raise NotAVectorizeableLoop()
@@ -618,9 +583,9 @@
The main reaons to have this is of frequent unpack instructions,
and the missing ability (by design) to detect not vectorizable loops.
"""
- def __init__(self, threshold, vec_reg_size):
+ def __init__(self, cpu, threshold):
self.threshold = threshold
- self.vec_reg_size = vec_reg_size
+ self.vec_reg_size = cpu.vector_register_size
self.savings = 0
def reset_savings(self):
@@ -850,11 +815,12 @@
#
return None, -1
- def accumulate_prepare(self, sched_data, renamer):
- vec_reg_size = sched_data.vec_reg_size
+ def accumulate_prepare(self, state):
+ vec_reg_size = state.vec_reg_size
for pack in self.packs:
if not pack.is_accumulating():
continue
+ xxx
accum = pack.accum
# create a new vector box for the parameters
box = pack.input_type.new_vector_box()
@@ -862,27 +828,27 @@
# reset the box to zeros or ones
if accum.operator == Accum.PLUS:
op = ResOperation(rop.VEC_BOX, [ConstInt(size)], box)
- sched_data.invariant_oplist.append(op)
+ state.invariant_oplist.append(op)
result = box.clonebox()
op = ResOperation(rop.VEC_INT_XOR, [box, box], result)
- sched_data.invariant_oplist.append(op)
+ state.invariant_oplist.append(op)
box = result
elif accum.operator == Accum.MULTIPLY:
# multiply is only supported by floats
op = ResOperation(rop.VEC_FLOAT_EXPAND, [ConstFloat(1.0),
ConstInt(size)], box)
- sched_data.invariant_oplist.append(op)
+ state.invariant_oplist.append(op)
else:
- raise NotImplementedError("can only handle + and *")
+ raise NotImplementedError("can only handle %s" %
accum.operator)
result = box.clonebox()
assert isinstance(result, BoxVector)
result.accum = accum
# pack the scalar value
op = ResOperation(getpackopnum(box.gettype()),
[box, accum.var, ConstInt(0), ConstInt(1)],
result)
- sched_data.invariant_oplist.append(op)
+ state.invariant_oplist.append(op)
# rename the variable with the box
- sched_data.setvector_of_box(accum.getoriginalbox(), 0, result) #
prevent it from expansion
- renamer.start_renaming(accum.getoriginalbox(), result)
+ state.setvector_of_box(accum.getoriginalbox(), 0, result) #
prevent it from expansion
+ state.renamer.start_renaming(accum.getoriginalbox(), result)
def split_overloaded_packs(self):
newpacks = []
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -66,6 +66,9 @@
def is_inputarg(self):
return False
+ def returns_vector(self):
+ return False
+
def ResOperation(opnum, args, descr=None):
cls = opclasses[opnum]
op = cls()
@@ -88,6 +91,7 @@
return self._forwarded
+
class AbstractResOp(AbstractResOpOrInputArg):
"""The central ResOperation class, representing one operation."""
@@ -101,8 +105,7 @@
type = 'v'
boolreflex = -1
boolinverse = -1
- vector = -1
- casts = ('\x00', -1, '\x00', -1)
+ vector = -1 # -1 means, no vector equivalent, -2 it is a vector statement
def getopnum(self):
return self.opnum
@@ -357,6 +360,12 @@
def is_label(self):
return self.getopnum() == rop.LABEL
+ def returns_void(self):
+ return self.type == 'v'
+
+ def returns_vector(self):
+ return self.type != 'v' and self.vector == -2
+
# ===================
# Top of the hierachy
# ===================
@@ -365,6 +374,9 @@
pass
class CastResOp(AbstractResOp):
+ _attrs_ = ('casts')
+ casts = ('\x00', -1, '\x00', -1)
+
def casts_box(self):
return True
@@ -546,8 +558,6 @@
_attrs_ = ('item_type','item_count','item_size','item_signed','accum')
_extended_display = False
- type = 'V'
-
#def __init__(self, item_type=FLOAT, item_count=2, item_size=8,
item_signed=False, accum=None):
# assert item_type in (FLOAT, INT)
# self.item_type = item_type
@@ -651,6 +661,13 @@
def reset_value(self):
self.setref_base(lltype.nullptr(llmemory.GCREF.TO))
+class InputArgVector(VectorOp, AbstractInputArg):
+ def __init__(self):
+ pass
+
+ def returns_vector(self):
+ return True
+
# ============
# arity mixins
# ============
@@ -1154,6 +1171,8 @@
mixins.append(RefOp)
else:
assert result_type == 'n'
+ if name.startswith('VEC'):
+ mixins.insert(1,VectorOp)
cls_name = '%s_OP' % name
bases = (get_base_class(tuple(mixins), baseclass),)
@@ -1271,6 +1290,8 @@
cls.vector = _opvector[opnum]
if name in _cast_ops:
cls.casts = _cast_ops[name]
+ if name.startswith('VEC'):
+ cls.vector = -2
setup2()
del _opboolinverse
del _opboolreflex
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit