Hi, ISA 3.0 adds the vbpermd instruction, related to the vbpermq instruction added in ISA 2.7. This patch adds support for that instruction, and also ensures that vec_bperm provides access to the three supported interfaces mandated by the ELFv2 ABI:
vector unsigned char vec_bperm (vector unsigned char, vector unsigned char); vector unsigned long long vec_bperm (vector unsigned __int128, vector unsigned char); vector unsigned long long vec_bperm (vector unsigned long long, vector unsigned char); The first two forms correspond to vbpermq, and the third corresponds to vbpermd. Prior to this patch, vec_bperm was an alias for __builtin_vec_vbpermq, which corresponds to the first two forms above, except that it returns vector unsigned long long for the first case. We need to keep __builtin_vec_vbpermq as it is a published interface, but vec_bperm needs to use the correct return value for the first form, and be broadened to include the third form. Thus vec_bperm is now an alias for __builtin_vec_vbperm_api, which is a new interface covering all three forms. The change in return value for the first form is not expected to cause difficulties, as this is a rarely used interface and any incompatibility can be solved with a cast. The previous version was a violation of the published API. We may want to make note of this in the release notes. Bootstrapped and tested on powerpc64-unknown-linux-gnu and on powerpc64le-unknown-linux-gnu with no regressions. Is this ok for trunk? Thanks, Bill [gcc] 2016-01-17 Bill Schmidt <wschm...@linux.vnet.ibm.com> * config/rs6000/altivec.h (vec_bperm): Change #define. * config/rs6000/altivec.md (UNSPEC_VBPERMD): New enum constant. (altivec_vbpermq2): New define_insn. (altivec_vbpermd): Likewise. * config/rs6000/rs6000-builtin.def (VBPERMQ2): New monomorphic function interface. (VBPERMD): Likewise. (VBPERM): New polymorphic function interface. * config/rs6000/r6000-c.c (altivec_overloaded_builtins_table): Add entries for P9V_BUILTIN_VEC_VBPERM. * doc/extend.texi: Add interfaces for vec_bperm. [gcc/testsuite] 2016-01-17 Bill Schmidt <wschm...@linux.vnet.ibm.com> * gcc.target/powerpc/p8vector-builtin-8.c: Add new form for vec_bperm. * gcc.target/powerpc/p9-vbpermd.c: New file. Index: gcc/config/rs6000/altivec.h =================================================================== --- gcc/config/rs6000/altivec.h (revision 244498) +++ gcc/config/rs6000/altivec.h (working copy) @@ -347,7 +347,7 @@ #define vec_vaddudm __builtin_vec_vaddudm #define vec_vadduqm __builtin_vec_vadduqm #define vec_vbpermq __builtin_vec_vbpermq -#define vec_bperm __builtin_vec_vbpermq +#define vec_bperm __builtin_vec_vbperm_api #define vec_vclz __builtin_vec_vclz #define vec_cntlz __builtin_vec_vclz #define vec_vclzb __builtin_vec_vclzb Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 244498) +++ gcc/config/rs6000/altivec.md (working copy) @@ -150,6 +150,7 @@ UNSPEC_VSUBEUQM UNSPEC_VSUBECUQ UNSPEC_VBPERMQ + UNSPEC_VBPERMD UNSPEC_BCDADD UNSPEC_BCDSUB UNSPEC_BCD_OVERFLOW @@ -3632,6 +3633,27 @@ [(set_attr "length" "4") (set_attr "type" "vecsimple")]) +; One of the vector API interfaces requires returning vector unsigned char. +(define_insn "altivec_vbpermq2" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VBPERMQ))] + "TARGET_P8_VECTOR" + "vbpermq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vbpermd" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VBPERMD))] + "TARGET_P9_VECTOR" + "vbpermd %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + ;; Decimal Integer operations (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) Index: gcc/config/rs6000/rs6000-builtin.def =================================================================== --- gcc/config/rs6000/rs6000-builtin.def (revision 244498) +++ gcc/config/rs6000/rs6000-builtin.def (working copy) @@ -1802,6 +1802,7 @@ BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3) BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew) BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow) BU_P8V_AV_2 (VBPERMQ, "vbpermq", CONST, altivec_vbpermq) +BU_P8V_AV_2 (VBPERMQ2, "vbpermq2", CONST, altivec_vbpermq2) BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum) BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss) BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus) @@ -1918,6 +1919,7 @@ BU_P9V_OVERLOAD_2 (VSRV, "vsrv") BU_P9V_AV_2 (VADUB, "vadub", CONST, vaduv16qi3) BU_P9V_AV_2 (VADUH, "vaduh", CONST, vaduv8hi3) BU_P9V_AV_2 (VADUW, "vaduw", CONST, vaduv4si3) +BU_P9V_AV_2 (VBPERMD, "vbpermd", CONST, altivec_vbpermd) /* ISA 3.0 vector overloaded 2 argument functions. */ BU_P9V_OVERLOAD_2 (VADU, "vadu") @@ -1924,6 +1926,7 @@ BU_P9V_OVERLOAD_2 (VADU, "vadu") BU_P9V_OVERLOAD_2 (VADUB, "vadub") BU_P9V_OVERLOAD_2 (VADUH, "vaduh") BU_P9V_OVERLOAD_2 (VADUW, "vaduw") +BU_P9V_OVERLOAD_2 (VBPERM, "vbperm_api") /* 1 argument vsx scalar functions added in ISA 3.0 (power9). */ BU_P9V_64BIT_VSX_1 (VSEEDP, "scalar_extract_exp", CONST, xsxexpdp) Index: gcc/config/rs6000/rs6000-c.c =================================================================== --- gcc/config/rs6000/rs6000-c.c (revision 244498) +++ gcc/config/rs6000/rs6000-c.c (working copy) @@ -4180,6 +4180,16 @@ const struct altivec_builtin_types altivec_overloa RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { P9V_BUILTIN_VEC_VBPERM, P9V_BUILTIN_VBPERMD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VBPERM, P8V_BUILTIN_VBPERMQ, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VBPERM, P8V_BUILTIN_VBPERMQ2, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_VBPERMQ, P8V_BUILTIN_VBPERMQ, RS6000_BTI_V2DI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, { P8V_BUILTIN_VEC_VBPERMQ, P8V_BUILTIN_VBPERMQ, Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi (revision 244498) +++ gcc/doc/extend.texi (working copy) @@ -17828,6 +17828,10 @@ vector unsigned long long vec_vaddudm (vector unsi vector long long vec_vbpermq (vector signed char, vector signed char); vector long long vec_vbpermq (vector unsigned char, vector unsigned char); +vector unsigned char vec_bperm (vector unsigned char, vector unsigned char); +vector unsigned long long vec_bperm (vector unsigned __int128, + vector unsigned char); + vector long long vec_cntlz (vector long long); vector unsigned long long vec_cntlz (vector unsigned long long); vector int vec_cntlz (vector int); @@ -17996,6 +18000,9 @@ If the ISA 3.0 instruction set additions (@option{ are available: @smallexample +vector unsigned long long vec_bperm (vector unsigned long long, + vector unsigned char); + vector bool char vec_cmpne (vector bool char, vector bool char); vector bool short vec_cmpne (vector bool short, vector bool short); vector bool int vec_cmpne (vector bool int, vector bool int); Index: gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c (revision 244498) +++ gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c (working copy) @@ -38,6 +38,7 @@ void foo (vector unsigned char *vucr, *vuxr++ = vec_adde (vuxa, vuxb, vuxc); *vsxr++ = vec_addec (vsxa, vsxb, vsxc); *vuxr++ = vec_addec (vuxa, vuxb, vuxc); + *vucr++ = vec_bperm (vuca, vucb); *vulr++ = vec_bperm (vuxa, vucb); *vbcr++ = vec_eqv (vbca, vbcb); *vbir++ = vec_eqv (vbia, vbib); @@ -64,7 +65,7 @@ void foo (vector unsigned char *vucr, /* { dg-final { scan-assembler-times "vaddcuq" 2 } } */ /* { dg-final { scan-assembler-times "vaddeuqm" 2 } } */ /* { dg-final { scan-assembler-times "vaddecuq" 2 } } */ -/* { dg-final { scan-assembler-times "vbpermq" 1 } } */ +/* { dg-final { scan-assembler-times "vbpermq" 2 } } */ /* { dg-final { scan-assembler-times "xxleqv" 4 } } */ /* { dg-final { scan-assembler-times "vgbbd" 1 } } */ /* { dg-final { scan-assembler-times "xxlnand" 4 } } */ Index: gcc/testsuite/gcc.target/powerpc/p9-vbpermd.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-vbpermd.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/p9-vbpermd.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile { target { powerpc64*-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +/* Verify P9 vector bit-permute doubleword instruction. */ + +#include <altivec.h> + +vector unsigned long long +test_vbpermd (vector unsigned long long a, vector unsigned char b) +{ + return vec_bperm (a, b); +} + +/* { dg-final { scan-assembler "vbpermd" } } */