This patch adds built-in function support for the Power9 vslv and vsrv instructions.
I have bootstrapped and tested this patch against the trunk on powerpc64le-unkonwn-linux-gnu with no regressions. Is this ok for the trunk? I have not yet tested against the gcc-6 branch as this patch depends on infrastructure that has not yet been backported to gcc-6. Once the necessary infrastructure is available, is this ok for backporting to gcc6 following bootstrap and regression testing? Thanks, Kelvin gcc/ChangeLog: 2016-05-27 Kelvin Nilsen <kel...@gcc.gnu.org> * config/rs6000/altivec.h (vec_slv): New macro. (vec_srv): New macro. * config/rs6000/altivec.md (UNSPEC_VSLV): New value. (UNSPEC_VSRV): New value. (vslv): New insn. (vsrv): New insn. * config/rs6000/rs6000-builtin.def (vslv): New builtin definition. (vsrv): New builtin definition. * config/rs6000/rs6000-c.c (P9V_BUILTIN_VSLV): Macro expansion to define argument types for new builtin. (P9V_BUILTIN_VSRV): Macro expansion to define argument types for new builtin. * doc/extend.texi: Document the new vec_vslv and vec_srv built-in functions. gcc/testsuite/ChangeLog: 2016-05-27 Kelvin Nilsen <kel...@gcc.gnu.org> * gcc.target/powerpc/vslv-0.c: New test. * gcc.target/powerpc/vslv-1.c: New test. * gcc.target/powerpc/vsrv-0.c: New test. * gcc.target/powerpc/vsrv-1.c: New test. Index: gcc/config/rs6000/altivec.h =================================================================== --- gcc/config/rs6000/altivec.h (revision 236796) +++ gcc/config/rs6000/altivec.h (working copy) @@ -400,6 +400,9 @@ #ifdef _ARCH_PPC64 #define vec_vprtybq __builtin_vec_vprtybq #endif + +#define vec_slv __builtin_vec_vslv +#define vec_srv __builtin_vec_vsrv #endif /* Predicates. Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 236796) +++ gcc/config/rs6000/altivec.md (working copy) @@ -114,6 +114,8 @@ UNSPEC_STVLXL UNSPEC_STVRX UNSPEC_STVRXL + UNSPEC_VSLV + UNSPEC_VSRV UNSPEC_VMULWHUB UNSPEC_VMULWLUB UNSPEC_VMULWHSB @@ -1631,6 +1633,24 @@ "vslo %0,%1,%2" [(set_attr "type" "vecperm")]) +(define_insn "vslv" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSLV))] + "TARGET_P9_VECTOR" + "vslv %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "vsrv" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSRV))] + "TARGET_P9_VECTOR" + "vsrv %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "*altivec_vsl<VI_char>" [(set (match_operand:VI2 0 "register_operand" "=v") (ashift:VI2 (match_operand:VI2 1 "register_operand" "v") Index: gcc/config/rs6000/rs6000-builtin.def =================================================================== --- gcc/config/rs6000/rs6000-builtin.def (revision 236796) +++ gcc/config/rs6000/rs6000-builtin.def (working copy) @@ -1749,6 +1749,14 @@ BU_P8V_OVERLOAD_3 (VADDEUQM, "vaddeuqm") BU_P8V_OVERLOAD_3 (VSUBECUQ, "vsubecuq") BU_P8V_OVERLOAD_3 (VSUBEUQM, "vsubeuqm") +/* ISA 3.0 vector overloaded 2-argument functions. */ +BU_P9V_AV_2 (VSLV, "vslv", CONST, vslv) +BU_P9V_AV_2 (VSRV, "vsrv", CONST, vsrv) + +/* ISA 3.0 vector overloaded 2-argument functions. */ +BU_P9V_OVERLOAD_2 (VSLV, "vslv") +BU_P9V_OVERLOAD_2 (VSRV, "vsrv") + /* 2 argument extended divide functions added in ISA 2.06. */ BU_P7_MISC_2 (DIVWE, "divwe", CONST, dive_si) Index: gcc/config/rs6000/rs6000-c.c =================================================================== --- gcc/config/rs6000/rs6000-c.c (revision 236796) +++ gcc/config/rs6000/rs6000-c.c (working copy) @@ -4488,6 +4488,13 @@ const struct altivec_builtin_types altivec_overloa { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, RS6000_BTI_unsigned_V16QI, 0, 0, 0 }, + { P9V_BUILTIN_VEC_VSLV, P9V_BUILTIN_VSLV, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VSRV, P9V_BUILTIN_VSRV, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + /* Crypto builtins. */ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi (revision 236796) +++ gcc/doc/extend.texi (working copy) @@ -14686,8 +14686,8 @@ The @code{__builtin_divde}, @code{__builtin_divdeo 64-bit environment support ISA 2.06 or later. The following built-in functions are available for the PowerPC family -of processors, starting with ISA 3.0 or later (@option{-mcpu=power9} -or @option{-mmodulo}): +of processors, starting with ISA 3.0 or later (@option{-mcpu=power9}) +or with @option{-mmodulo}: @smallexample long long __builtin_darn (void); long long __builtin_darn_raw (void); @@ -17341,6 +17341,40 @@ vector __int128_t vec_vprtybq (vector __int128_t); vector __uint128_t vec_vprtybd (vector __uint128_t); @end smallexample +The following built-in vector functions are available for the PowerPC family +of processors, starting with ISA 3.0 or later (@option{-mcpu=power9}) +or with @option{-mpower9-vector}: +@smallexample +__vector unsigned char +vec_slv (__vector unsigned char src, __vector unsigned char shift_distance); +__vector unsigned char +vec_srv (__vector unsigned char src, __vector unsigned char shift_distance); +@end smallexample + +Tne @code{vec_slv} and @code{vec_srv} functions operate in parallel on +all of the bytes of their @code{src} and @code{shift_distance} +arguments in parallel. The behavior of the @code{vec_slv} is as if +there existed a temporary array of 17 unsigned characters +@code{slv_array} within which elements 0 through 15 are the same as +the entries in the @code{src} array and element 16 equals 0. The +result returned from the @code{vec_slv} function is a +@code{__vector} of 16 unsigned characters within which element +@code{i} is computed using the C expression +@code{0xff & (*((unsigned short *)(slv_array + i)) << (0x07 & +shift_distance[i]))}, +with this resulting value coerced to the @code{unsigned char} type. +The behavior of the @code{vec_srv} is as if +there existed a temporary array of 17 unsigned characters +@code{srv_array} within which element 0 equals zero and +elements 1 through 16 equal the elements 0 through 15 of +the @code{src} array. The +result returned from the @code{vec_srv} function is a +@code{__vector} of 16 unsigned characters within which element +@code{i} is computed using the C expression +@code{0xff & (*((unsigned short *)(srv_array + i)) >> +(0x07 & shift_distance[i]))}, +with this resulting value coerced to the @code{unsigned char} type. + If the cryptographic instructions are enabled (@option{-mcrypto} or @option{-mcpu=power8}), the following builtins are enabled. Index: gcc/testsuite/gcc.target/powerpc/vslv-0.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vslv-0.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vslv-0.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-options "-mcpu=power9" } */ + +#include <altivec.h> + +__vector unsigned char +doCharShiftLeft (__vector unsigned char *p, __vector unsigned char *q) +{ + __vector unsigned char result, input, shift_distance; + result = __builtin_vec_vslv (input, shift_distance); + return result; +} + +/* { dg-final { scan-assembler "vslv" } } */ Index: gcc/testsuite/gcc.target/powerpc/vslv-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vslv-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vslv-1.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-options "-mcpu=power9" } */ + +#include <altivec.h> + +__vector unsigned char +doCharShiftLeft (__vector unsigned char *p, __vector unsigned char *q) +{ + __vector unsigned char result, input, shift_distance; + result = vec_slv (input, shift_distance); + return result; +} + +/* { dg-final { scan-assembler "vslv" } } */ Index: gcc/testsuite/gcc.target/powerpc/vsrv-0.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsrv-0.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsrv-0.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-options "-mcpu=power9" } */ + +#include <altivec.h> + +__vector unsigned char +doCharShiftLeft (__vector unsigned char *p, __vector unsigned char *q) +{ + __vector unsigned char result, input, shift_distance; + result = __builtin_vec_vsrv (input, shift_distance); + return result; +} + +/* { dg-final { scan-assembler "vsrv" } } */ Index: gcc/testsuite/gcc.target/powerpc/vsrv-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsrv-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsrv-1.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-options "-mcpu=power9" } */ + +#include <altivec.h> + +__vector unsigned char +doCharShiftLeft (__vector unsigned char *p, __vector unsigned char *q) +{ + __vector unsigned char result, input, shift_distance; + result = vec_srv (input, shift_distance); + return result; +} + +/* { dg-final { scan-assembler "vsrv" } } */