Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77391:43ebe5044bd1 Date: 2015-05-19 11:33 +0200 http://bitbucket.org/pypy/pypy/changeset/43ebe5044bd1/
Log: cvtpd2dq packs ints to the lower quadword. that is why it did not work, starting to rethink the conversion function diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py --- a/pypy/module/micronumpy/compile.py +++ b/pypy/module/micronumpy/compile.py @@ -324,6 +324,15 @@ return W_TypeObject(w_obj.typedef.name) def call_function(self, tp, w_dtype, *args): + if tp is self.w_float + if isinstance(w_dtype, boxes.W_Float64Box): + return FloatObject(float(w_dtype.value)) + if isinstance(w_dtype, boxes.W_Float32Box): + return FloatObject(float(w_dtype.value)) + if isinstance(w_dtype, boxes.W_Int64Box): + return FloatObject(float(int(w_dtype.value))) + if isinstance(w_dtype, boxes.W_Int32Box): + return FloatObject(float(int(w_dtype.value))) return w_dtype def call_method(self, w_obj, s, *args): @@ -534,9 +543,9 @@ if isinstance(w_res, boxes.W_Float32Box): print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value) if isinstance(w_res, boxes.W_Int64Box): - print "access", w_lhs, "[", w_rhs.intval, "] => ", float(int(w_res.value)) + print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value) if isinstance(w_res, boxes.W_Int32Box): - print "access", w_lhs, "[", w_rhs.intval, "] => ", float(int(w_res.value)) + print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value) else: raise NotImplementedError if (not isinstance(w_res, W_NDimArray) and diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -107,6 +107,35 @@ retval = self.interp.eval_graph(self.graph, [i]) return retval + def define_float32_copy(): + return """ + a = astype(|30|, float32) + x1 = a -> 7 + x2 = a -> 8 + x3 = a -> 9 + x4 = a -> 10 + r = x1 + x2 + x3 + x4 + r + """ + def test_float32_copy(self): + result = self.run("float32_copy") + assert int(result) == 7+8+9+10 + self.check_vectorized(1, 1) + + def define_int32_copy(): + return """ + a = astype(|30|, int32) + x1 = a -> 7 + x2 = a -> 8 + x3 = a -> 9 + x4 = a -> 10 + x1 + x2 + x3 + x4 + """ + def test_int32_copy(self): + result = self.run("int32_copy") + assert int(result) == 7+8+9+10 + self.check_vectorized(1, 1) + def define_float32_add(): return """ a = astype(|30|, float32) @@ -175,7 +204,8 @@ x2 = b -> 8 x3 = b -> 9 x4 = b -> 10 - x1 + x2 + x3 + x4 + r = x1 + x2 + x3 + x4 + r """ #return """ #a = astype(|30|, int32) @@ -192,25 +222,6 @@ assert int(result) == 7+1+8+1+9+1+10+1 self.check_vectorized(1, 1) - def define_int32_copy(): - return """ - a = astype(|30|, float32) - x1 = a -> 7 - x2 = a -> 8 - x3 = a -> 9 - x4 = a -> 10 - x5 = a -> 11 - x6 = a -> 12 - x7 = a -> 13 - x8 = a -> 14 - x9 = a -> 15 - x1 + x2 + x3 + x4 - """ - def test_int32_copy(self): - result = self.run("int32_copy") - assert int(result) == 7+8+9+10 - self.check_vectorized(1, 1) - def define_pow(): return """ diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2565,11 +2565,13 @@ # is there a better sequence to move them? scratch = X86_64_SCRATCH_REG.value #print resloc, "[0] <- int32(", srcloc, "[0])" + #66 48 0f 7e c0 movq %xmm0,%rax print resloc, "[1] <- int32(", srcloc, "[1])" + #self.mc.MOVDQ(scratch, srcloc) #self.mc.PEXTRQ_rxi(scratch, srcloc.value, 0) #self.mc.PINSRD_xri(resloc.value, scratch, 0) - self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1) - self.mc.PINSRD_xri(resloc.value, scratch, 1) + #self.mc.PEXTRQ_rxi(scratch, srcloc.value, 1) + #self.mc.PINSRD_xri(resloc.value, scratch, 1) else: py.test.set_trace() raise NotImplementedError("sign ext missing") diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -814,8 +814,6 @@ continue new_box = tgt_box.clonebox() new_box.item_count += src_box.item_count - if opnum == rop.VEC_FLOAT_PACK: - py.test.set_trace() op = ResOperation(opnum, [tgt_box, src_box, ConstInt(i), ConstInt(src_box.item_count)], new_box) self.preamble_ops.append(op) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit