Hi,
One final patch in the series, this one for vec_sum2s. This builtin
requires some additional code generation for the case of little endian
without -maltivec=be. Here's an example:
va = {-10,1,2,3}; 0x 00000003 00000002 00000001 fffffff6
vb = {100,101,102,-103}; 0x ffffff99 00000066 00000065 00000064
vc = vec_sum2s (va, vb); 0x ffffff9e 00000000 0000005c 00000000
= {0,92,0,-98};
We need to add -10 + 1 + 101 = 92 and place it in vc[1], and add 2 + 3 +
-103 and place the result in vc[3], with zeroes in the other two
elements. To do this, we first use "vsldoi vs,vb,vb,12" to rotate 101
and -103 into big-endian elements 1 and 3, as required by the vsum2sws
instruction:
0x ffffff99 00000066 00000065 00000064 ffffff99 00000066 00000065 00000064
^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^
vs = 00000064 ffffff99 00000066 00000065
Executing "vsum2sws vs,va,vs" then gives
vs = 0x 00000000 ffffff9e 00000000 0000005c
which then must be shifted into position with "vsldoi vc,vs,vs,4"
0x 00000000 ffffff9e 00000000 0000005c 00000000 ffffff9e 00000000 0000005c
^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^
vc = ffffff9e 00000000 0000005c 00000000
which is the desired result.
In addition to this change, I noticed a redundant test from one of my
previous patches and simplified it. (BYTES_BIG_ENDIAN implies
VECTOR_ELT_ORDER_BIG, so we don't need to test BYTES_BIG_ENDIAN.)
As usual, new test cases are added to cover the possible cases. These
are simpler this time since only vector signed integer is a legal type
for vec_sum2s.
Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions. Is this ok for trunk?
Thanks,
Bill
gcc:
2014-02-04 Bill Schmidt <[email protected]>
* config/rs6000/altivec.md (altivec_vsum2sws): Adjust code
generation for -maltivec=be.
(altivec_vsumsws): Simplify redundant test.
gcc/testsuite:
2014-02-04 Bill Schmidt <[email protected]>
* gcc.dg/vmx/sum2s.c: New.
* gcc.dg/vmx/sum2s-be-order.c: New.
Index: gcc/testsuite/gcc.dg/vmx/sum2s.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/sum2s.c (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/sum2s.c (revision 0)
@@ -0,0 +1,13 @@
+#include "harness.h"
+
+static void test()
+{
+ vector signed int vsia = {-10,1,2,3};
+ vector signed int vsib = {100,101,102,-103};
+ vector signed int vsir;
+ vector signed int vsier = {0,92,0,-98};
+
+ vsir = vec_sum2s (vsia, vsib);
+
+ check (vec_all_eq (vsir, vsier), "vsir");
+}
Index: gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c (revision 0)
@@ -0,0 +1,19 @@
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
+
+#include "harness.h"
+
+static void test()
+{
+ vector signed int vsia = {-10,1,2,3};
+ vector signed int vsib = {100,101,102,-103};
+ vector signed int vsir;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ vector signed int vsier = {91,0,107,0};
+#else
+ vector signed int vsier = {0,92,0,-98};
+#endif
+
+ vsir = vec_sum2s (vsia, vsib);
+
+ check (vec_all_eq (vsir, vsier), "vsir");
+}
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md (revision 207479)
+++ gcc/config/rs6000/altivec.md (working copy)
@@ -1592,10 +1610,21 @@
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
(match_operand:V4SI 2 "register_operand" "v")]
UNSPEC_VSUM2SWS))
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+ (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+ (clobber (match_scratch:V4SI 3 "=v"))]
"TARGET_ALTIVEC"
- "vsum2sws %0,%1,%2"
- [(set_attr "type" "veccomplex")])
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ return "vsum2sws %0,%1,%2";
+ else
+ return "vsldoi %3,%2,%2,12\n\tvsum2sws %3,%1,%3\n\tvsldoi %0,%3,%3,4";
+}
+ [(set_attr "type" "veccomplex")
+ (set (attr "length")
+ (if_then_else
+ (match_test "VECTOR_ELT_ORDER_BIG")
+ (const_string "4")
+ (const_string "12")))])
(define_insn "altivec_vsumsws"
[(set (match_operand:V4SI 0 "register_operand" "=v")
@@ -1606,7 +1635,7 @@
(clobber (match_scratch:V4SI 3 "=v"))]
"TARGET_ALTIVEC"
{
- if (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)
+ if (VECTOR_ELT_ORDER_BIG)
return "vsumsws %0,%1,%2";
else
return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvspltw %0,%3,3";
@@ -1614,7 +1643,7 @@
[(set_attr "type" "veccomplex")
(set (attr "length")
(if_then_else
- (match_test "(BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)")
+ (match_test "(VECTOR_ELT_ORDER_BIG)")
(const_string "4")
(const_string "12")))])