Hi,
Since relaxing the constraints for permutes in r16-6671 for simplifying
vector constructors there is an additional case to be handled as it
interacts with r16-5561 (that allows nop conversions).
In
vector(8) short unsigned int _4;
short int _5;
vector(4) unsigned int _17;
_3 = *a_14(D);
_2 = {_3, { 0, 0, 0, 0 }};
_4 = VEC_PERM_EXPR <{ 0, 0, 0, 0, 0, 0, 0, 0 }, _2, { 0, 9, 11, 3, 10, 2, 9,
8 }>;
_5 = BIT_FIELD_REF <_4, 16, 32>;
d_16 = (unsigned int) _5;
_17 = {d_16, d_16, d_16, d_16};
we failed to consider that _5 is a signed type and directly converted to
unsigned.
With this patch we do
_3 = *a_14(D);
_5 = BIT_FIELD_REF <_3, 16, 48>;
d_16 = (unsigned int) _5;
_15 = VEC_PERM_EXPR <_3, _3, { 3, 3, 3, 3 }>;
_1 = VIEW_CONVERT_EXPR<vector(4) short int>(_15);
_21 = (vector(4) unsigned int) _1;
where the two conversions can often be done in one instruction.
Bootstrapped and regtested on x86 and power10, aarch64 still running.
Regtested on riscv64.
Regards
Robin
PR tree-optimization/123731
gcc/ChangeLog:
* tree-ssa-forwprop.cc (simplify_vector_constructor): Handle nop
conversion between during extraction.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/pr123731.c: New test.
---
gcc/testsuite/gcc.dg/vect/pr123731.c | 28 ++++++++++++++++++++++++++++
gcc/tree-ssa-forwprop.cc | 19 ++++++++++++++++---
2 files changed, 44 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr123731.c
diff --git a/gcc/testsuite/gcc.dg/vect/pr123731.c
b/gcc/testsuite/gcc.dg/vect/pr123731.c
new file mode 100644
index 00000000000..81fdb5d53bc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr123731.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+typedef unsigned short A __attribute__ ((vector_size (4 * sizeof (short))));
+typedef short B __attribute__ ((vector_size (8 * sizeof (short))));
+typedef unsigned C __attribute__ ((vector_size (4 * sizeof (int))));
+unsigned long long c;
+
+__attribute__ ((noipa)) void
+foo (A *a)
+{
+ C b[9] = {};
+ unsigned d = __builtin_convertvector (
+ __builtin_shufflevector ((A) {}, *a, 0, 5, 7, 3, 6, 2, 5, 4), B)[2];
+ b[0] += (C) {d, d, d, d};
+ c += b[0][0];
+}
+
+int
+main ()
+{
+ A t = (A) {0, 0, 0, -6};
+ foo (&t);
+ if (sizeof (short) == 2
+ && sizeof (int) == 4
+ && __CHAR_BIT__ == 8
+ && c != -6U)
+ __builtin_abort ();
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index bdc63a7a71b..51de6308a09 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -4137,9 +4137,15 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
return false;
tree mask_type, perm_type, conv_src_type;
perm_type = TREE_TYPE (orig[0]);
- conv_src_type = (nelts == refnelts
- ? perm_type
- : build_vector_type (TREE_TYPE (perm_type), nelts));
+ /* Determine the element type for the conversion source.
+ As orig_elem_type keeps track of the original type, check
+ if we need to perform a sign swap after permuting. */
+ tree conv_elem_type = TREE_TYPE (perm_type);
+ if (conv_code != ERROR_MARK
+ && orig_elem_type[0]
+ && tree_nop_conversion_p (orig_elem_type[0], conv_elem_type))
+ conv_elem_type = orig_elem_type[0];
+ conv_src_type = build_vector_type (conv_elem_type, nelts);
if (conv_code != ERROR_MARK
&& !supportable_convert_operation (conv_code, type, conv_src_type,
&conv_code))
@@ -4257,6 +4263,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
= converted_orig1 ? build_zero_cst (perm_type) : orig[1];
tree res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type,
orig[0], orig1_for_perm, op2);
+ /* If we're building a smaller vector, extract the element
+ with the proper type. */
if (nelts != refnelts)
res = gimple_build (&stmts, BIT_FIELD_REF,
conv_code != ERROR_MARK ? conv_src_type : type,
@@ -4264,6 +4272,11 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
TYPE_SIZE (conv_code != ERROR_MARK ? conv_src_type
: type),
bitsize_zero_node);
+ /* Otherwise, we can still have an intermediate sign change. */
+ else if (conv_code != ERROR_MARK
+ && tree_nop_conversion_p (conv_src_type, perm_type))
+ res = gimple_build (&stmts, VIEW_CONVERT_EXPR, conv_src_type, res);
+ /* Finally, apply the conversion. */
if (conv_code != ERROR_MARK)
res = gimple_build (&stmts, conv_code, type, res);
else if (!useless_type_conversion_p (type, TREE_TYPE (res)))
--
2.52.0