On Tue, Apr 5, 2016 at 3:36 PM, Bill Seurer <seu...@linux.vnet.ibm.com> wrote: > This patch adds support for the signed and unsigned int versions of the > vec_adde altivec builtins from the Power Architecture 64-Bit ELF V2 ABI > OpenPOWER ABI for Linux Supplement (16 July 2015 Version 1.1). There are > many of the builtins that are missing and this is the first of a series > of patches to add them. > > There aren't instructions for the int versions of vec_adde so the > output code is built from other built-ins that do have instructions > which in this case is just two vec_adds. > > The new test cases are executable tests which verify that the generated > code produces expected values. C macros were used so that the same > test case could be used for both the signed and unsigned versions. An > extra executable test case is also included to ensure that the modified > support for the __int128 versions of vec_adde is not broken. The same > test case could not be used for both int and __int128 because of some > differences in loading and storing the vectors. > > Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no > regressions. Is this ok for trunk? > > [gcc] > > 2016-04-06 Bill Seurer <seu...@linux.vnet.ibm.com> > > * config/rs6000/rs6000-builtin.def (vec_adde): Change vec_adde to a > special case builtin. > * config/rs6000/rs6000-c.c (altivec_overloaded_builtins, > altivec_resolve_overloaded_builtin): Remove ALTIVEC_BUILTIN_VEC_ADDE > from altivec_overloaded_builtins structure. Add support for it to > altivec_resolve_overloaded_builtin function. > * config/rs6000/rs6000.c (altivec_init_builtins): Add definition > for __builtin_vec_adde. > > [gcc/testsuite] > > 2016-04-06 Bill Seurer <seu...@linux.vnet.ibm.com> > > * gcc.target/powerpc/vec-adde.c: New test. > * gcc.target/powerpc/vec-adde-int128.c: New test. > > Index: gcc/config/rs6000/rs6000-builtin.def > =================================================================== > --- gcc/config/rs6000/rs6000-builtin.def (revision 234745) > +++ gcc/config/rs6000/rs6000-builtin.def (working copy) > @@ -951,7 +951,6 @@ BU_ALTIVEC_X (VEC_EXT_V4SF, "vec_ext_v4sf", CO > before we get to the point about classifying the builtin type. */ > > /* 3 argument Altivec overloaded builtins. */ > -BU_ALTIVEC_OVERLOAD_3 (ADDE, "adde") > BU_ALTIVEC_OVERLOAD_3 (ADDEC, "addec") > BU_ALTIVEC_OVERLOAD_3 (MADD, "madd") > BU_ALTIVEC_OVERLOAD_3 (MADDS, "madds") > @@ -1137,6 +1136,7 @@ BU_ALTIVEC_OVERLOAD_P (VCMPGT_P, "vcmpgt_p") > BU_ALTIVEC_OVERLOAD_P (VCMPGE_P, "vcmpge_p") > > /* Overloaded Altivec builtins that are handled as special cases. */ > +BU_ALTIVEC_OVERLOAD_X (ADDE, "adde") > BU_ALTIVEC_OVERLOAD_X (CTF, "ctf") > BU_ALTIVEC_OVERLOAD_X (CTS, "cts") > BU_ALTIVEC_OVERLOAD_X (CTU, "ctu") > Index: gcc/config/rs6000/rs6000-c.c > =================================================================== > --- gcc/config/rs6000/rs6000-c.c (revision 234745) > +++ gcc/config/rs6000/rs6000-c.c (working copy) > @@ -842,11 +842,6 @@ const struct altivec_builtin_types altivec_overloa > RS6000_BTI_unsigned_V1TI, 0 }, > { ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ, > RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, > - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM, > - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, > - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, > - { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM, > - RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, > { ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ, > RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, > RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, > @@ -4515,6 +4510,59 @@ assignment for unaligned loads and stores"); > warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use > \ > assignment for unaligned loads and stores"); > > + if (fcode == ALTIVEC_BUILTIN_VEC_ADDE) > + { > + /* vec_adde needs to be special cased because there is no instruction > + for the {un}signed int version */
End comment sentence with period and two spaces > + if (nargs != 3) > + { > + error ("vec_adde only accepts 3 arguments"); > + return error_mark_node; > + } > + > + tree arg0 = (*arglist)[0]; > + tree arg0_type = TREE_TYPE (arg0); > + tree arg1 = (*arglist)[1]; > + tree arg1_type = TREE_TYPE (arg1); > + tree arg2 = (*arglist)[2]; > + tree arg2_type = TREE_TYPE (arg2); > + > + /* All 3 arguments must be vectors of (signed or unsigned) (int or > + __int128) and the types must match */ Same. > + if ((arg0_type != arg1_type) || (arg1_type != arg2_type)) > + goto bad; > + if (TREE_CODE (arg0_type) != VECTOR_TYPE) > + goto bad; > + > + switch (TYPE_MODE (TREE_TYPE (arg0_type))) > + { > + /* for {un}signed ints, > + vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), carryv) > */ Same. > + case SImode: > + { > + vec<tree, va_gc> *params = make_tree_vector(); > + vec_safe_push (params, arg0); > + vec_safe_push (params, arg1); > + tree call = altivec_resolve_overloaded_builtin > + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params); > + params = make_tree_vector(); > + vec_safe_push (params, call); > + vec_safe_push (params, arg2); > + return altivec_resolve_overloaded_builtin > + (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD], params); > + } > + /* for {un}signed __int128s use the vaddeuqm instruction directly */ Same. > + case TImode: > + return altivec_resolve_overloaded_builtin > + (loc, rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM], > arglist); > + > + /* Types other than {un}signed int and {un}signed __int128 > + are errors */ Same. > + default: > + goto bad; > + } > + } > + > /* For now treat vec_splats and vec_promote as the same. */ > if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS > || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE) > Index: gcc/config/rs6000/rs6000.c > =================================================================== > --- gcc/config/rs6000/rs6000.c (revision 234745) > +++ gcc/config/rs6000/rs6000.c (working copy) > @@ -15582,6 +15582,10 @@ altivec_init_builtins (void) > = build_function_type_list (opaque_V4SI_type_node, > opaque_V4SI_type_node, opaque_V4SI_type_node, > integer_type_node, NULL_TREE); > + tree opaque_ftype_opaque_opaque_opaque > + = build_function_type_list (opaque_V4SI_type_node, > + opaque_V4SI_type_node, opaque_V4SI_type_node, > + opaque_V4SI_type_node, NULL_TREE); > tree int_ftype_int_opaque_opaque > = build_function_type_list (integer_type_node, > integer_type_node, opaque_V4SI_type_node, > @@ -15818,6 +15822,8 @@ altivec_init_builtins (void) > def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, > ALTIVEC_BUILTIN_VEC_CTS); > def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, > ALTIVEC_BUILTIN_VEC_CTU); > > + def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque, > ALTIVEC_BUILTIN_VEC_ADDE); > + > /* Cell builtins. */ > def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, > ALTIVEC_BUILTIN_LVLX); > def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, > ALTIVEC_BUILTIN_LVLXL); > Index: gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vec-adde-int128.c (working copy) > @@ -0,0 +1,78 @@ > +/* { dg-do run { target { powerpc64le-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power8" } } */ > +/* { dg-options "-mcpu=power8 -O3" } */ > + > +/* Test that the vec_adde builtin works as expected */ Same. > + > +#include "altivec.h" > + > +#define N 4096 > + > +void abort (); > + > +#define define_test_functions(STYPE, NAMESUFFIX) \ > +\ > +STYPE result_##NAMESUFFIX[N]; \ > +STYPE addend1_##NAMESUFFIX[N]; \ > +STYPE addend2_##NAMESUFFIX[N]; \ > +STYPE carry_##NAMESUFFIX[N]; \ > +STYPE expected_##NAMESUFFIX[N]; \ > +\ > +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \ > +{ \ > + int i; \ > + vector STYPE v1, v2, v3, tmp; \ > + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \ > + /* result=addend1+addend2+carry */ \ > + v1 = (vector STYPE) { addend1_##NAMESUFFIX[i] }; \ > + v2 = (vector STYPE) { addend2_##NAMESUFFIX[i] }; \ > + v3 = (vector STYPE) { carry_##NAMESUFFIX[i] }; \ > +\ > + tmp = vec_adde (v1, v2, v3); \ > + result_##NAMESUFFIX[i] = tmp[0]; \ > + } \ > +} \ > +\ > +__attribute__((noinline)) void init_##NAMESUFFIX () \ > +{ \ > + int i; \ > + for (i = 0; i < N; ++i) { \ > + result_##NAMESUFFIX[i] = 0; \ > + addend1_##NAMESUFFIX[i] = 1; \ > + addend2_##NAMESUFFIX[i] = 2; \ > + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \ > + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \ > + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \ > + } \ > +} \ > +\ > +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \ > +{ \ > + int i; \ > + for (i = 0; i < N; ++i) { \ > + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \ > + abort(); \ > + } \ > +} > + > + > +#define execute_test_functions(STYPE, NAMESUFFIX) \ > +{ \ > + init_##NAMESUFFIX (); \ > + vector_tests_##NAMESUFFIX (); \ > + verify_results_##NAMESUFFIX (); \ > +} > + > + > +define_test_functions(signed __int128, si128); > +define_test_functions(unsigned __int128, ui128); > + > +int main () > +{ > + execute_test_functions(signed __int128, si128); > + execute_test_functions(unsigned __int128, ui128); > + > + return 0; > +} > + > + > Index: gcc/testsuite/gcc.target/powerpc/vec-adde.c > =================================================================== > --- gcc/testsuite/gcc.target/powerpc/vec-adde.c (revision 0) > +++ gcc/testsuite/gcc.target/powerpc/vec-adde.c (working copy) > @@ -0,0 +1,78 @@ > +/* { dg-do run { target { powerpc64le-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power8" } } */ > +/* { dg-options "-mcpu=power8 -O3" } */ > + > +/* Test that the vec_adde builtin works as expected */ Same. > + > +#include "altivec.h" > + > +#define N 4096 > + > +void abort (); > + > +#define define_test_functions(STYPE, NAMESUFFIX) \ > +\ > +STYPE result_##NAMESUFFIX[N]; \ > +STYPE addend1_##NAMESUFFIX[N]; \ > +STYPE addend2_##NAMESUFFIX[N]; \ > +STYPE carry_##NAMESUFFIX[N]; \ > +STYPE expected_##NAMESUFFIX[N]; \ > +\ > +__attribute__((noinline)) void vector_tests_##NAMESUFFIX () \ > +{ \ > + int i; \ > + vector STYPE v1, v2, v3, tmp; \ > + for (i = 0; i < N; i+=16/sizeof(STYPE)) { \ > + /* result=addend1+addend2+carry */ \ > + v1 = vec_vsx_ld (0, &addend1_##NAMESUFFIX[i]); \ > + v2 = vec_vsx_ld (0, &addend2_##NAMESUFFIX[i]); \ > + v3 = vec_vsx_ld (0, &carry_##NAMESUFFIX[i]); \ > +\ > + tmp = vec_adde (v1, v2, v3); \ > + vec_vsx_st (tmp, 0, &result_##NAMESUFFIX[i]); \ > + } \ > +} \ > +\ > +__attribute__((noinline)) void init_##NAMESUFFIX () \ > +{ \ > + int i; \ > + for (i = 0; i < N; ++i) { \ > + result_##NAMESUFFIX[i] = 0; \ > + addend1_##NAMESUFFIX[i] = 1; \ > + addend2_##NAMESUFFIX[i] = 2; \ > + carry_##NAMESUFFIX[i] = (i%2==0)? 1: 0; \ > + expected_##NAMESUFFIX[i] = addend1_##NAMESUFFIX[i] + \ > + addend2_##NAMESUFFIX[i] + carry_##NAMESUFFIX[i]; \ > + } \ > +} \ > +\ > +__attribute__((noinline)) void verify_results_##NAMESUFFIX () \ > +{ \ > + int i; \ > + for (i = 0; i < N; ++i) { \ > + if (result_##NAMESUFFIX[i] != expected_##NAMESUFFIX[i]) \ > + abort(); \ > + } \ > +} > + > + > +#define execute_test_functions(STYPE, NAMESUFFIX) \ > +{ \ > + init_##NAMESUFFIX (); \ > + vector_tests_##NAMESUFFIX (); \ > + verify_results_##NAMESUFFIX (); \ > +} > + > + > +define_test_functions(signed int, si); > +define_test_functions(unsigned int, ui); > + > +int main () > +{ > + execute_test_functions(signed int, si); > + execute_test_functions(unsigned int, ui); > + > + return 0; > +} > + > + > -- > > -Bill Seurer >