On Tue, Nov 14, 2017 at 11:11 PM, Will Schmidt <will_schm...@vnet.ibm.com> wrote: > > Hi, > Add support for gimple folding of vec_cmp_{eq,ge,gt,le,ne} > for the integer data types. > > As part of this change, several define_insn stanzas have been added/updated > in vsx.md that specify the "ne: -> not: + eq: " combinations to allow for the > generation > of the desired vcmpne[bhw] instructions, where we otherwise > would have generated a vcmpeq + vnor combination. The defines > also obsoleted the need for the UNSPEC versions of the same, so this ends up > being just an update to those existing defines. > > Several entries have been added to the switch statement in > builtin_function_type to identify the builtins having unsigned arguments. > > A handful of existing tests required updates to their specified optimization > levels to continue to generate the desired code. builtins-3-p9.c in > particular > has been updated to reflect improved code gen with the higher specified > optimization level. > Testcase coverage is otherwise handled by the already-in-tree > gcc.target/powerpc/fold-vec-cmp-*.c tests. > > Per feedback from the prior version, v2 changes also include: > * Reworked the actual folding to use a VEC_COND_EXPR. For cleanliness, I > moved this to a new fold_build_vec_cmp() helper function, which itself > is based on build_vec_cmp() as found in typeck.c. > * Added an additional fold_compare_helper() function to further factor out > the steps that are common to all of the vector compare operations. > > Testing is currently underway on P6 and newer. OK for trunk?
The folding part looks good to me. Richard. > Thanks, > -Will > > > 2017-11-14 Will Schmidt <will_schm...@vnet.ibm.com> > [gcc] > * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for > folding of vector compares. > (fold_build_vec_cmp): New helper function. > (fold_compare_helper): New helper function. > (builtin_function_type): Add compare builtins to the list of functions > having unsigned arguments. > * config/rs6000/vsx.md (vcmpneb, vcmpneh, vcmpnew): Update to specify > the not+eq combination. > > [testsuite] > * gcc.target/powerpc/builtins-3-p9.c: Add -O1, update > expected codegen checks. > * gcc.target/powerpc/vec-cmp-sel.c: Mark vars as volatile. > * gcc.target/powerpc/vsu/vec-cmpne-0.c: Add -O1. > * gcc.target/powerpc/vsu/vec-cmpne-1.c: Add -O1. > * gcc.target/powerpc/vsu/vec-cmpne-2.c: Add -O1. > * gcc.target/powerpc/vsu/vec-cmpne-3.c: Add -O1. > * gcc.target/powerpc/vsu/vec-cmpne-4.c: Add -O1. > * gcc.target/powerpc/vsu/vec-cmpne-5.c: Add -O1. > * gcc.target/powerpc/vsu/vec-cmpne-6.c: Add -O1. > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 2c80a2f..0317324 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -16206,10 +16206,40 @@ rs6000_builtin_valid_without_lhs (enum > rs6000_builtins fn_code) > default: > return false; > } > } > > +/* Helper function to handle the gimple folding of a vector compare > + operation. This sets up true/false vectors, and uses the > + VEC_COND_EXPR operation. > + 'code' indicates which comparison is to be made. (EQ, GT, ...). > + 'type' indicates the type of the result. */ > +static tree > +fold_build_vec_cmp (tree_code code, tree type, > + tree arg0, tree arg1) > +{ > + tree cmp_type = build_same_sized_truth_vector_type (type); > + tree zero_vec = build_zero_cst (type); > + tree minus_one_vec = build_minus_one_cst (type); > + tree cmp = fold_build2 (code, cmp_type, arg0, arg1); > + return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec); > +} > + > +/* Helper function to handle the in-between steps for the > + vector compare built-ins. */ > +static void > +fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt) > +{ > + tree arg0 = gimple_call_arg (stmt, 0); > + tree arg1 = gimple_call_arg (stmt, 1); > + tree lhs = gimple_call_lhs (stmt); > + gimple *g = gimple_build_assign (lhs, > + fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1)); > + gimple_set_location (g, gimple_location (stmt)); > + gsi_replace (gsi, g, true); > +} > + > /* Fold a machine-dependent built-in in GIMPLE. (For folding into > a constant, use rs6000_fold_builtin.) */ > > bool > rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > @@ -16701,10 +16731,67 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator > *gsi) > gimple_set_location (g, gimple_location (stmt)); > gsi_replace (gsi, g, true); > return true; > } > > + /* Vector compares; EQ, NE, GE, GT, LE. */ > + case ALTIVEC_BUILTIN_VCMPEQUB: > + case ALTIVEC_BUILTIN_VCMPEQUH: > + case ALTIVEC_BUILTIN_VCMPEQUW: > + case P8V_BUILTIN_VCMPEQUD: > + { > + fold_compare_helper (gsi, EQ_EXPR, stmt); > + return true; > + } > + > + case P9V_BUILTIN_CMPNEB: > + case P9V_BUILTIN_CMPNEH: > + case P9V_BUILTIN_CMPNEW: > + { > + fold_compare_helper (gsi, NE_EXPR, stmt); > + return true; > + } > + > + case VSX_BUILTIN_CMPGE_16QI: > + case VSX_BUILTIN_CMPGE_U16QI: > + case VSX_BUILTIN_CMPGE_8HI: > + case VSX_BUILTIN_CMPGE_U8HI: > + case VSX_BUILTIN_CMPGE_4SI: > + case VSX_BUILTIN_CMPGE_U4SI: > + case VSX_BUILTIN_CMPGE_2DI: > + case VSX_BUILTIN_CMPGE_U2DI: > + { > + fold_compare_helper (gsi, GE_EXPR, stmt); > + return true; > + } > + > + case ALTIVEC_BUILTIN_VCMPGTSB: > + case ALTIVEC_BUILTIN_VCMPGTUB: > + case ALTIVEC_BUILTIN_VCMPGTSH: > + case ALTIVEC_BUILTIN_VCMPGTUH: > + case ALTIVEC_BUILTIN_VCMPGTSW: > + case ALTIVEC_BUILTIN_VCMPGTUW: > + case P8V_BUILTIN_VCMPGTUD: > + case P8V_BUILTIN_VCMPGTSD: > + { > + fold_compare_helper (gsi, GT_EXPR, stmt); > + return true; > + } > + > + case VSX_BUILTIN_CMPLE_16QI: > + case VSX_BUILTIN_CMPLE_U16QI: > + case VSX_BUILTIN_CMPLE_8HI: > + case VSX_BUILTIN_CMPLE_U8HI: > + case VSX_BUILTIN_CMPLE_4SI: > + case VSX_BUILTIN_CMPLE_U4SI: > + case VSX_BUILTIN_CMPLE_2DI: > + case VSX_BUILTIN_CMPLE_U2DI: > + { > + fold_compare_helper (gsi, LE_EXPR, stmt); > + return true; > + } > + > default: > if (TARGET_DEBUG_BUILTIN) > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", > fn_code, fn_name1, fn_name2); > break; > @@ -18260,10 +18347,27 @@ builtin_function_type (machine_mode mode_ret, > machine_mode mode_arg0, > case MISC_BUILTIN_UNPACK_TD: > case MISC_BUILTIN_UNPACK_V1TI: > h.uns_p[0] = 1; > break; > > + /* unsigned arguments, bool return (compares). */ > + case ALTIVEC_BUILTIN_VCMPEQUB: > + case ALTIVEC_BUILTIN_VCMPEQUH: > + case ALTIVEC_BUILTIN_VCMPEQUW: > + case P8V_BUILTIN_VCMPEQUD: > + case VSX_BUILTIN_CMPGE_U16QI: > + case VSX_BUILTIN_CMPGE_U8HI: > + case VSX_BUILTIN_CMPGE_U4SI: > + case VSX_BUILTIN_CMPGE_U2DI: > + case ALTIVEC_BUILTIN_VCMPGTUB: > + case ALTIVEC_BUILTIN_VCMPGTUH: > + case ALTIVEC_BUILTIN_VCMPGTUW: > + case P8V_BUILTIN_VCMPGTUD: > + h.uns_p[1] = 1; > + h.uns_p[2] = 1; > + break; > + > /* unsigned arguments for 128-bit pack instructions. */ > case MISC_BUILTIN_PACK_TD: > case MISC_BUILTIN_PACK_V1TI: > h.uns_p[1] = 1; > h.uns_p[2] = 1; > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 335e5c1..0c00774 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -4596,16 +4596,16 @@ > emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); > emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp)); > DONE; > }) > > -;; Vector Compare Not Equal Byte > +;; Vector Compare Not Equal Byte (specified/not+eq:) > (define_insn "vcmpneb" > [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") > - (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v") > - (match_operand:V16QI 2 "altivec_register_operand" "v")] > - UNSPEC_VCMPNEB))] > + (not:V16QI > + (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v") > + (match_operand:V16QI 2 "altivec_register_operand" > "v"))))] > "TARGET_P9_VECTOR" > "vcmpneb %0,%1,%2" > [(set_attr "type" "vecsimple")]) > > ;; Vector Compare Not Equal or Zero Byte > @@ -4617,16 +4617,16 @@ > UNSPEC_VCMPNEZB))] > "TARGET_P9_VECTOR" > "vcmpnezb %0,%1,%2" > [(set_attr "type" "vecsimple")]) > > -;; Vector Compare Not Equal Half Word > +;; Vector Compare Not Equal Half Word (specified/not+eq:) > (define_insn "vcmpneh" > [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") > - (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") > - (match_operand:V8HI 2 "altivec_register_operand" "v")] > - UNSPEC_VCMPNEH))] > + (not:V8HI > + (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v") > + (match_operand:V8HI 2 "altivec_register_operand" "v"))))] > "TARGET_P9_VECTOR" > "vcmpneh %0,%1,%2" > [(set_attr "type" "vecsimple")]) > > ;; Vector Compare Not Equal or Zero Half Word > @@ -4637,17 +4637,16 @@ > UNSPEC_VCMPNEZH))] > "TARGET_P9_VECTOR" > "vcmpnezh %0,%1,%2" > [(set_attr "type" "vecsimple")]) > > -;; Vector Compare Not Equal Word > +;; Vector Compare Not Equal Word (specified/not+eq:) > (define_insn "vcmpnew" > [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") > - (unspec:V4SI > - [(match_operand:V4SI 1 "altivec_register_operand" "v") > - (match_operand:V4SI 2 "altivec_register_operand" "v")] > - UNSPEC_VCMPNEH))] > + (not:V4SI > + (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v") > + (match_operand:V4SI 2 "altivec_register_operand" "v"))))] > "TARGET_P9_VECTOR" > "vcmpnew %0,%1,%2" > [(set_attr "type" "vecsimple")]) > > ;; Vector Compare Not Equal or Zero Word > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c > b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c > index 46a31ae..9dc53da 100644 > --- a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c > @@ -1,8 +1,8 @@ > /* { dg-do compile } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > -/* { dg-options "-mcpu=power9" } */ > +/* { dg-options "-mcpu=power9 -O1" } */ > > #include <altivec.h> > > vector bool char > test_ne_char (vector bool char x, vector bool char y) > @@ -51,21 +51,22 @@ test_vull_bperm_vull_vuc (vector unsigned long long x, > > test_ne_char 1 vcmpneb > test_ne_short 1 vcmpneh > test_ne_int 1 vcmpnew > test_ne_long 1 vcmpequd, 1 xxlnor inst > - test_nabs_long_long 1 xxspltib, 1 vsubudm, 1 vminsd > test_neg_long_long 1 vnegd > test_vull_bperm_vull_vuc 1 vbpermd > - > + test_nabs_long_long (-O0) 1 xxspltib, 1 vsubudm, 1 vminsd > + test_nabs_long_long (-O1) 1 vnegd, vminsd > +*/ > > /* { dg-final { scan-assembler-times "vcmpneb" 1 } } */ > /* { dg-final { scan-assembler-times "vcmpneh" 1 } } */ > /* { dg-final { scan-assembler-times "vcmpnew" 1 } } */ > /* { dg-final { scan-assembler-times "vcmpequd" 1 } } */ > /* { dg-final { scan-assembler-times "xxlnor" 1 } } */ > -/* { dg-final { scan-assembler-times "xxspltib" 1 } } */ > -/* { dg-final { scan-assembler-times "vsubudm" 1 } } */ > +/* { dg-final { scan-assembler-times "xxspltib" 0 } } */ > +/* { dg-final { scan-assembler-times "vsubudm" 0 } } */ > /* { dg-final { scan-assembler-times "vminsd" 1 } } */ > -/* { dg-final { scan-assembler-times "vnegd" 1 } } */ > +/* { dg-final { scan-assembler-times "vnegd" 2 } } */ > /* { dg-final { scan-assembler-times "vbpermd" 1 } } */ > > diff --git a/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c > b/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c > index 6f3c093..f74a117 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c > +++ b/gcc/testsuite/gcc.target/powerpc/vec-cmp-sel.c > @@ -10,12 +10,13 @@ > into > c != {0,...,0} ? b : a */ > > #include <altivec.h> > > +volatile vector signed long long x = { 25399, -12900 }; > +volatile vector signed long long y = { 12178, -9987 }; > + > vector signed long long foo () { > - vector signed long long x = { 25399, -12900 }; > - vector signed long long y = { 12178, -9987 }; > vector bool long long b = vec_cmpge (x, y); > vector signed long long z = vec_sel (y, x, b); > return z; > } > diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c > b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c > index 8e036e3..5c09c70 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c > +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-0.c > @@ -1,9 +1,9 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > -/* { dg-options "-mcpu=power9" } */ > +/* { dg-options "-mcpu=power9 -O1" } */ > > #include <altivec.h> > > vector bool char > fetch_data (vector bool char *arg1_p, vector bool char *arg2_p) > diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c > b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c > index e510a44..a74f739 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c > +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-1.c > @@ -1,9 +1,9 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > -/* { dg-options "-mcpu=power9" } */ > +/* { dg-options "-mcpu=power9 -O1" } */ > > #include <altivec.h> > > vector bool char > fetch_data (vector signed char *arg1_p, vector signed char *arg2_p) > diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c > b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c > index 0ea5aa7..f7f1e0d 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c > +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-2.c > @@ -1,9 +1,9 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > -/* { dg-options "-mcpu=power9" } */ > +/* { dg-options "-mcpu=power9 -O1" } */ > > #include <altivec.h> > > vector bool char > fetch_data (vector unsigned char *arg1_p, vector unsigned char *arg2_p) > diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c > b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c > index 6bb5ebe..8ec94bd 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c > +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-3.c > @@ -1,9 +1,9 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > -/* { dg-options "-mcpu=power9" } */ > +/* { dg-options "-mcpu=power9 -O1" } */ > > #include <altivec.h> > > vector bool short > fetch_data (vector signed short *arg1_p, vector signed short *arg2_p) > diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c > b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c > index a8d3f17..2f47697 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c > +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-4.c > @@ -1,9 +1,9 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > -/* { dg-options "-mcpu=power9" } */ > +/* { dg-options "-mcpu=power9 -O1" } */ > > #include <altivec.h> > > vector bool short > fetch_data (vector unsigned short *arg1_p, vector unsigned short *arg2_p) > diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c > b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c > index dae3e22..1167085 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c > +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-5.c > @@ -1,9 +1,9 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > -/* { dg-options "-mcpu=power9" } */ > +/* { dg-options "-mcpu=power9 -O1" } */ > > #include <altivec.h> > > vector bool int > fetch_data (vector signed int *arg1_p, vector signed int *arg2_p) > diff --git a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c > b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c > index 550a353..031a48f 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c > +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-cmpne-6.c > @@ -1,9 +1,9 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power9" } } */ > /* { dg-require-effective-target powerpc_p9vector_ok } */ > -/* { dg-options "-mcpu=power9" } */ > +/* { dg-options "-mcpu=power9 -O1" } */ > > #include <altivec.h> > > vector bool int > fetch_data (vector unsigned int *arg1_p, vector unsigned int *arg2_p) > >