Hi all, This patch implements some saturating math *laneq_s* intrinsics.The implementation is fairly straightforward, just use more general mode iterators, add appropriate builtins etc.
Some execution tests are added with some scan-assembly parts to make sure we generate the correct lane number for both big and little endian versions of the lanewise intrinsics.
Tested aarch64-none-elf, aarch64_be-none-elf and bootstrapped on aarch64-linux.
Ok for trunk? 2014-08-04 Kyrylo Tkachov <kyrylo.tkac...@arm.com> * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_laneq<mode>): Use VSDQ_HSI mode iterator. (aarch64_sqrdmulh_laneq<mode>): Likewise. (aarch64_sq<r>dmulh_laneq<mode>_internal): New define_insn. * config/aarch64/aarch64-simd-builtins.def (sqdmulh_laneq): Use BUILTIN_VDQHS macro. (sqrdmulh_laneq): Likewise. * config/aarch64/arm_neon.h (vqdmlalh_laneq_s16): New intrinsic. (vqdmlals_laneq_s32): Likewise. (vqdmlslh_laneq_s16): Likewise. (vqdmlsls_laneq_s32): Likewise. (vqdmulhh_laneq_s16): Likewise. (vqdmulhs_laneq_s32): Likewise. (vqrdmulhh_laneq_s16): Likewise. (vqrdmulhs_laneq_s32): Likewise. 2014-08-04 Kyrylo Tkachov <kyrylo.tkac...@arm.com> * gcc.target/aarch64/simd/vqdmlalh_laneq_s16_1.c: New test. * gcc.target/aarch64/simd/vqdmlals_laneq_s32_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_laneq_s16_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_laneq_s32_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhh_laneq_s16_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhs_laneq_s32_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_laneq_s16_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_laneq_s32_1.c: Likewise.
commit 53142c1282ab6a902ed8c5c1afc5089657c4437a Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com> Date: Tue Jun 17 13:33:57 2014 +0100 [AArch64] Implement some saturating math *laneq_s* intrinsics diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 268432c..a33b151 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -192,9 +192,9 @@ BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0) /* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */ BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0) - BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0) + BUILTIN_VSDQ_HSI (TERNOP, sqdmulh_laneq, 0) BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0) - BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0) + BUILTIN_VSDQ_HSI (TERNOP, sqrdmulh_laneq, 0) BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0) BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 6300b9b..39faf2f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2793,8 +2793,8 @@ ) (define_expand "aarch64_sqdmulh_laneq<mode>" - [(match_operand:VDQHS 0 "register_operand" "") - (match_operand:VDQHS 1 "register_operand" "") + [(match_operand:VSDQ_HSI 0 "register_operand" "") + (match_operand:VSDQ_HSI 1 "register_operand" "") (match_operand:<VCONQ> 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "")] "TARGET_SIMD" @@ -2810,8 +2810,8 @@ ) (define_expand "aarch64_sqrdmulh_laneq<mode>" - [(match_operand:VDQHS 0 "register_operand" "") - (match_operand:VDQHS 1 "register_operand" "") + [(match_operand:VSDQ_HSI 0 "register_operand" "") + (match_operand:VSDQ_HSI 1 "register_operand" "") (match_operand:<VCONQ> 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "")] "TARGET_SIMD" @@ -2890,6 +2890,21 @@ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] ) +(define_insn "aarch64_sq<r>dmulh_laneq<mode>_internal" + [(set (match_operand:SD_HSI 0 "register_operand" "=w") + (unspec:SD_HSI + [(match_operand:SD_HSI 1 "register_operand" "w") + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + VQDMULH))] + "TARGET_SIMD" + "* + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] +) + ;; vqdml[sa]l (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 66968e8..3e26345 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -19426,6 +19426,12 @@ vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); } +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d); +} + __extension__ static __inline int64_t __attribute__ ((__always_inline__)) vqdmlals_s32 (int64_t __a, int32x1_t __b, int32x1_t __c) { @@ -19439,6 +19445,12 @@ vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) {__builtin_aarch64_sqdmlal_lanesi (__a[0], __b, __c, __d)}; } +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d); +} + /* vqdmlsl */ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -19553,6 +19565,12 @@ vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); } +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d); +} + __extension__ static __inline int64_t __attribute__ ((__always_inline__)) vqdmlsls_s32 (int64_t __a, int32x1_t __b, int32x1_t __c) { @@ -19565,6 +19583,12 @@ vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) return (int64x1_t) {__builtin_aarch64_sqdmlsl_lanesi (__a[0], __b, __c, __d)}; } +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d); +} + /* vqdmulh */ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) @@ -19603,6 +19627,12 @@ vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); } +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c); +} + __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) vqdmulhs_s32 (int32x1_t __a, int32x1_t __b) { @@ -19615,6 +19645,12 @@ vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); } +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c); +} + /* vqdmull */ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -19919,6 +19955,12 @@ vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); } +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c); +} + __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b) { @@ -19931,6 +19973,12 @@ vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); } +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c); +} + /* vqrshl */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlalh_laneq_s16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlalh_laneq_s16_1.c new file mode 100644 index 0000000..7178e28 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlalh_laneq_s16_1.c @@ -0,0 +1,35 @@ +/* Test the vqdmlalh_laneq_s16 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + int32_t arg1; + int16_t arg2; + int16x8_t arg3; + int32_t actual; + int32_t expected; + + arg1 = 0x80000000; + arg2 = -24497; + arg3 = vcombine_s16 (vcreate_s16 (0x008a80007fff7fffULL), + vcreate_s16 (0xfffffa797fff8000ULL)); + + actual = vqdmlalh_laneq_s16 (arg1, arg2, arg3, 7); + expected = -2147434654; + + if (expected != actual) + abort (); + + return 0; +} + + +/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[7\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlals_laneq_s32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlals_laneq_s32_1.c new file mode 100644 index 0000000..7c508e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlals_laneq_s32_1.c @@ -0,0 +1,35 @@ +/* Test the vqdmlals_laneq_s32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + int64_t arg1; + int32_t arg2; + int32x4_t arg3; + int64_t actual; + int64_t expected; + + arg1 = -9223182289494545592LL; + arg2 = 32768; + arg3 = vcombine_s32 (vcreate_s32 (0xffff7fff8000ffffULL), + vcreate_s32 (0x80000000ffff0000ULL)); + + actual = vqdmlals_laneq_s32 (arg1, arg2, arg3, 3); + expected = -9223323026982900920LL; + + if (expected != actual) + abort (); + + return 0; +} + + +/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlslh_laneq_s16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlslh_laneq_s16_1.c new file mode 100644 index 0000000..46201e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlslh_laneq_s16_1.c @@ -0,0 +1,35 @@ +/* Test the vqdmlslh_laneq_s16 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + int32_t arg1; + int16_t arg2; + int16x8_t arg3; + int32_t actual; + int32_t expected; + + arg1 = -2147450881; + arg2 = 32767; + arg3 = vcombine_s16 (vcreate_s16 (0x359d7fff00007fffULL), + vcreate_s16 (0xe678ffff00008000ULL)); + + actual = vqdmlslh_laneq_s16 (arg1, arg2, arg3, 4); + expected = -32769; + + if (expected != actual) + abort (); + + return 0; +} + + +/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[4\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlsls_laneq_s32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlsls_laneq_s32_1.c new file mode 100644 index 0000000..8644ac6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlsls_laneq_s32_1.c @@ -0,0 +1,35 @@ +/* Test the vqdmlsls_laneq_s32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + int64_t arg1; + int32_t arg2; + int32x4_t arg3; + int64_t actual; + int64_t expected; + + arg1 = 140733193453567LL; + arg2 = 25544; + arg3 = vcombine_s32 (vcreate_s32 (0x417b8000ffff8397LL), + vcreate_s32 (0x7fffffff58488000LL)); + + + actual = vqdmlsls_laneq_s32 (arg1, arg2, arg3, 3); + expected = 31022548895631LL; + + if (expected != actual) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmulhh_laneq_s16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulhh_laneq_s16_1.c new file mode 100644 index 0000000..0e72254 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulhh_laneq_s16_1.c @@ -0,0 +1,33 @@ +/* Test the vqdmulhh_laneq_s16 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + int16_t arg1; + int16x8_t arg2; + int16_t actual; + int16_t expected; + + arg1 = 268; + arg2 = vcombine_s16 (vcreate_s16 (0xffffffff00000000ULL), + vcreate_s16 (0x0000800018410000ULL)); + + actual = vqdmulhh_laneq_s16 (arg1, arg2, 7); + expected = 0; + + if (expected != actual) + abort (); + + return 0; +} + + +/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[7\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmulhs_laneq_s32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulhs_laneq_s32_1.c new file mode 100644 index 0000000..0c75c5a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulhs_laneq_s32_1.c @@ -0,0 +1,33 @@ +/* Test the vqdmulhs_laneq_s32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + int32_t arg1; + int32x4_t arg2; + int32_t actual; + int32_t expected; + + arg1 = 0x80000000; + arg2 = vcombine_s32 (vcreate_s32 (0x950dffffc4f40000ULL), + vcreate_s32 (0x7fff8000274a8000ULL)); + + actual = vqdmulhs_laneq_s32 (arg1, arg2, 3); + expected = -2147450880; + + if (expected != actual) + abort (); + + return 0; +} + + +/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqrdmulhh_laneq_s16_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vqrdmulhh_laneq_s16_1.c new file mode 100644 index 0000000..3c2a074 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqrdmulhh_laneq_s16_1.c @@ -0,0 +1,33 @@ +/* Test the vqrdmulhh_laneq_s16 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + int16_t arg1; + int16x8_t arg2; + int16_t actual; + int16_t expected; + + arg1 = 0; + arg2 = vcombine_s16 (vcreate_s16 (0x7fffffffa7908000ULL), + vcreate_s16 (0x8000d2607fff0000ULL)); + + actual = vqrdmulhh_laneq_s16 (arg1, arg2, 7); + expected = 0; + + if (expected != actual) + abort (); + + return 0; +} + + +/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[7\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqrdmulhs_laneq_s32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vqrdmulhs_laneq_s32_1.c new file mode 100644 index 0000000..7e66213 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqrdmulhs_laneq_s32_1.c @@ -0,0 +1,32 @@ +/* Test the vqrdmulhs_laneq_s32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + int32_t arg1; + int32x4_t arg2; + int32_t actual; + int32_t expected; + + arg1 = 32768; + arg2 = vcombine_s32 (vcreate_s32 (0x8000ffffffffcd5bULL), + vcreate_s32 (0x7fffffffffffffffULL)); + + actual = vqrdmulhs_laneq_s32 (arg1, arg2, 3); + expected = 32768; + + if (expected != actual) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */