Andrew Carlotti <andrew.carlo...@arm.com> writes: > This lowers vcombine intrinsics to a GIMPLE vector constructor, which enables > better optimisation during GIMPLE passes. > > gcc/ > > * config/aarch64/aarch64-builtins.c > (aarch64_general_gimple_fold_builtin): Add combine. > > gcc/testsuite/ > > * gcc.target/aarch64/advsimd-intrinsics/combine.c: > New test. > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc > b/gcc/config/aarch64/aarch64-builtins.cc > index > 5753988a9964967c27a03aca5fddb9025fd8ed6e..a25756cfed5fab3a98ebf3e2ee29a5e117cbd2aa > 100644 > --- a/gcc/config/aarch64/aarch64-builtins.cc > +++ b/gcc/config/aarch64/aarch64-builtins.cc > @@ -2857,6 +2857,28 @@ aarch64_general_gimple_fold_builtin (unsigned int > fcode, gcall *stmt, > gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); > break; > > + BUILTIN_VDC (BINOP, combine, 0, AUTO_FP) > + BUILTIN_VD_I (BINOPU, combine, 0, NONE) > + BUILTIN_VDC_P (BINOPP, combine, 0, NONE) > + { > + tree first_part, second_part; > + if (BYTES_BIG_ENDIAN) > + { > + second_part = args[0]; > + first_part = args[1]; > + } > + else > + { > + first_part = args[0]; > + second_part = args[1]; > + } > + tree ret_type = TREE_TYPE (gimple_call_lhs (stmt));
Just repeating what we discussed off-list for the record: this needs to be gimple_call_return_type. LGTM with that change. Thanks, Richard > + tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part, > + NULL_TREE, second_part); > + new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ctor); > + } > + break; > + > /*lower store and load neon builtins to gimple. */ > BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD) > BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD) > diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/combine.c > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/combine.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..d08faf7a4a160a1e83428ed9b270731bbf7b8c8a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/combine.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile { target { aarch64*-*-* } } } */ > +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ > +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ > + > +#include <arm_neon.h> > + > +/* > +** foo: > +** umov w0, v1\.s\[1\] > +** ret > +*/ > + > +int32_t foo (int32x2_t a, int32x2_t b) > +{ > + int32x4_t c = vcombine_s32(a, b); > + return vgetq_lane_s32(c, 3); > +} > +