Kyrylo Tkachov <[email protected]> writes:
> Hi all,
>
> The MD pattern for the XAR instruction in SVE2 is currently expressed with
> non-canonical RTL by using a ROTATERT code with a constant rotate amount.
> Fix it by using the left ROTATE code. This necessitates adjusting the rotate
> amount during expand.
>
> Additionally, as the SVE2 XAR instruction is unpredicated and can handle all
> element sizes from .b to .d, it is a good fit for implementing the XOR+ROTATE
> operation for Advanced SIMD modes where the TARGET_SHA3 cannot be used
> (that can only handle V2DImode operands). Therefore let's extend the accepted
> modes of the SVE2 patternt to include the Advanced SIMD integer modes.
>
> This leads to some tests for the svxar* intrinsics to fail because they now
> simplify to a plain EOR when the rotate amount is the width of the element.
> This simplification is desirable (EOR instructions have better or equal
> throughput than XAR, and they are non-destructive of their input) so the
> tests are adjusted.
>
> For V2DImode XAR operations we should prefer the Advanced SIMD version when
> it is available (TARGET_SHA3) because it is non-destructive, so restrict the
> SVE2 pattern accordingly. Tests are added to confirm this.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
> Ok for mainline?
> Thanks,
> Kyrill
>
> Signed-off-by: Kyrylo Tkachov <[email protected]>
>
> gcc/
>
> * config/aarch64/iterators.md (SVE_ASIMD_FULL_I): New mode iterator.
> * config/aarch64/aarch64-sve2.md (@aarch64_sve2_xar<mode>):
> Use SVE_ASIMD_FULL_I modes. Use ROTATE code for the rotate step.
> Adjust output logic.
> * config/aarch64/aarch64-sve-builtins-sve2.cc (svxar_impl): Define.
> (svxar): Use the above.
>
> gcc/testsuite/
>
> * gcc.target/aarch64/xar_neon_modes.c: New test.
> * gcc.target/aarch64/xar_v2di_nonsve.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/xar_s16.c: Scan for EOR rather than
> XAR.
> * gcc.target/aarch64/sve2/acle/asm/xar_s32.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/xar_s64.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/xar_s8.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/xar_u16.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/xar_u32.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/xar_u64.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/xar_u8.c: Likewise.
Looks great to me. Just one very minor nit:
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> index ddd6e466ee3..62c17281ec7 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> @@ -90,6 +90,23 @@ public:
> }
> };
>
> +class svxar_impl : public function_base
> +{
> +public:
> + rtx
> + expand (function_expander &e) const override
> + {
> + /* aarch64_sve2_xar represents this operation with a left-rotate RTX.
> + Convert the right-rotate amount from the intrinsic to fit this. */
> + machine_mode mode = e.vector_mode (0);
> + HOST_WIDE_INT rot = GET_MODE_UNIT_BITSIZE (mode)
> + - INTVAL (e.args[2]);
> + e.args[2]
> + = aarch64_simd_gen_const_vector_dup (mode, rot);
The split line seems unnecessary.
OK with that change as far as I'm concerned.
Thanks,
Richard
> + return e.use_exact_insn (code_for_aarch64_sve2_xar (mode));
> + }
> +};
> +
> class svcdot_impl : public function_base
> {
> public:
> @@ -773,6 +790,6 @@ FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE,
> UNSPEC_WHILEHS))
> FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
> FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
> FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR))
> -FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),)
> +FUNCTION (svxar, svxar_impl,)
>
> } /* end namespace aarch64_sve */
> diff --git a/gcc/config/aarch64/aarch64-sve2.md
> b/gcc/config/aarch64/aarch64-sve2.md
> index 5f2697c3179..8047f405a17 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -1266,18 +1266,28 @@
> ;; - XAR
> ;; -------------------------------------------------------------------------
>
> +;; Also allow the Advanced SIMD modes as the the SVE2 XAR instruction
> +;; can handle more element sizes than the TARGET_SHA3 one from Advanced SIMD.
> +;; Don't allow the V2DImode use here unless !TARGET_SHA3 as the Advanced SIMD
> +;; version should be preferred when available as it is non-destructive on its
> +;; input.
> (define_insn "@aarch64_sve2_xar<mode>"
> - [(set (match_operand:SVE_FULL_I 0 "register_operand")
> - (rotatert:SVE_FULL_I
> - (xor:SVE_FULL_I
> - (match_operand:SVE_FULL_I 1 "register_operand")
> - (match_operand:SVE_FULL_I 2 "register_operand"))
> - (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
> - "TARGET_SVE2"
> - {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
> - [ w , %0 , w ; * ] xar\t%0.<Vetype>, %0.<Vetype>,
> %2.<Vetype>, #%3
> - [ ?&w , w , w ; yes ] movprfx\t%0,
> %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
> + [(set (match_operand:SVE_ASIMD_FULL_I 0 "register_operand" "=w,?&w")
> + (rotate:SVE_ASIMD_FULL_I
> + (xor:SVE_ASIMD_FULL_I
> + (match_operand:SVE_ASIMD_FULL_I 1 "register_operand" "%0,w")
> + (match_operand:SVE_ASIMD_FULL_I 2 "register_operand" "w,w"))
> + (match_operand:SVE_ASIMD_FULL_I 3 "aarch64_simd_lshift_imm")))]
> + "TARGET_SVE2 && !(<MODE>mode == V2DImode && TARGET_SHA3)"
> + {
> + operands[3]
> + = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)
> + - INTVAL (unwrap_const_vec_duplicate (operands[3])));
> + if (which_alternative == 0)
> + return "xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
> + return "movprfx\t%Z0, %Z1\;xar\t%Z0.<Vetype>, %Z0.<Vetype>,
> %Z2.<Vetype>, #%3";
> }
> + [(set_attr "movprfx" "*,yes")]
> )
>
> ;; -------------------------------------------------------------------------
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 0bc98315bb6..8269b0cdcd9 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -446,6 +446,9 @@
> ;; All fully-packed SVE integer vector modes.
> (define_mode_iterator SVE_FULL_I [VNx16QI VNx8HI VNx4SI VNx2DI])
>
> +;; All fully-packed SVE integer and Advanced SIMD integer modes.
> +(define_mode_iterator SVE_ASIMD_FULL_I [SVE_FULL_I VDQ_I])
> +
> ;; All fully-packed SVE floating-point vector modes.
> (define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c
> index 34351d52718..f69ba3f7b06 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c
> @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s16_untied, svint16_t,
>
> /*
> ** xar_16_s16_tied1:
> -** xar z0\.h, z0\.h, z1\.h, #16
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t,
> @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t,
>
> /*
> ** xar_16_s16_tied2:
> -** xar z0\.h, z0\.h, z1\.h, #16
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t,
> @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t,
> /*
> ** xar_16_s16_untied:
> ** (
> -** movprfx z0, z1
> -** xar z0\.h, z0\.h, z2\.h, #16
> +** eor z0\.d, z1\.d, z2\.d
> ** |
> -** movprfx z0, z2
> -** xar z0\.h, z0\.h, z1\.h, #16
> +** eor z0\.d, z2\.d, z1\.d
> ** )
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c
> index 366a6172807..540f7b875ec 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c
> @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s32_untied, svint32_t,
>
> /*
> ** xar_32_s32_tied1:
> -** xar z0\.s, z0\.s, z1\.s, #32
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t,
> @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t,
>
> /*
> ** xar_32_s32_tied2:
> -** xar z0\.s, z0\.s, z1\.s, #32
> +** (
> +** eor z0\.d, z0\.d, z1\.d
> +** |
> +** eor z0\.d, z1\.d, z0\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t,
> @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t,
> /*
> ** xar_32_s32_untied:
> ** (
> -** movprfx z0, z1
> -** xar z0\.s, z0\.s, z2\.s, #32
> +** eor z0\.d, z1\.d, z2\.d
> ** |
> -** movprfx z0, z2
> -** xar z0\.s, z0\.s, z1\.s, #32
> +** eor z0\.d, z2\.d, z1\.d
> ** )
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c
> index dedda2ed044..9491dbdb848 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c
> @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s64_untied, svint64_t,
>
> /*
> ** xar_64_s64_tied1:
> -** xar z0\.d, z0\.d, z1\.d, #64
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t,
> @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t,
>
> /*
> ** xar_64_s64_tied2:
> -** xar z0\.d, z0\.d, z1\.d, #64
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t,
> @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t,
> /*
> ** xar_64_s64_untied:
> ** (
> -** movprfx z0, z1
> -** xar z0\.d, z0\.d, z2\.d, #64
> +** eor z0\.d, z1\.d, z2\.d
> ** |
> -** movprfx z0, z2
> -** xar z0\.d, z0\.d, z1\.d, #64
> +** eor z0\.d, z2\.d, z1\.d
> ** )
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c
> index 904352b93da..e62e5bca5ba 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c
> @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s8_untied, svint8_t,
>
> /*
> ** xar_8_s8_tied1:
> -** xar z0\.b, z0\.b, z1\.b, #8
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t,
> @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t,
>
> /*
> ** xar_8_s8_tied2:
> -** xar z0\.b, z0\.b, z1\.b, #8
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t,
> @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t,
> /*
> ** xar_8_s8_untied:
> ** (
> -** movprfx z0, z1
> -** xar z0\.b, z0\.b, z2\.b, #8
> +** eor z0\.d, z1\.d, z2\.d
> ** |
> -** movprfx z0, z2
> -** xar z0\.b, z0\.b, z1\.b, #8
> +** eor z0\.d, z2\.d, z1\.d
> ** )
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c
> index c7b9665aeed..6269145bc6d 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c
> @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u16_untied, svuint16_t,
>
> /*
> ** xar_16_u16_tied1:
> -** xar z0\.h, z0\.h, z1\.h, #16
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t,
> @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t,
>
> /*
> ** xar_16_u16_tied2:
> -** xar z0\.h, z0\.h, z1\.h, #16
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t,
> @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t,
> /*
> ** xar_16_u16_untied:
> ** (
> -** movprfx z0, z1
> -** xar z0\.h, z0\.h, z2\.h, #16
> +** eor z0\.d, z1\.d, z2\.d
> ** |
> -** movprfx z0, z2
> -** xar z0\.h, z0\.h, z1\.h, #16
> +** eor z0\.d, z2\.d, z1\.d
> ** )
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c
> index 115ead7701c..99efd14e1ed 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c
> @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u32_untied, svuint32_t,
>
> /*
> ** xar_32_u32_tied1:
> -** xar z0\.s, z0\.s, z1\.s, #32
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t,
> @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t,
>
> /*
> ** xar_32_u32_tied2:
> -** xar z0\.s, z0\.s, z1\.s, #32
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t,
> @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t,
> /*
> ** xar_32_u32_untied:
> ** (
> -** movprfx z0, z1
> -** xar z0\.s, z0\.s, z2\.s, #32
> +** eor z0\.d, z1\.d, z2\.d
> ** |
> -** movprfx z0, z2
> -** xar z0\.s, z0\.s, z1\.s, #32
> +** eor z0\.d, z2\.d, z1\.d
> ** )
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c
> index 1d0d90e90d6..5c770ffdadb 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c
> @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u64_untied, svuint64_t,
>
> /*
> ** xar_64_u64_tied1:
> -** xar z0\.d, z0\.d, z1\.d, #64
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t,
> @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t,
>
> /*
> ** xar_64_u64_tied2:
> -** xar z0\.d, z0\.d, z1\.d, #64
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t,
> @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t,
> /*
> ** xar_64_u64_untied:
> ** (
> -** movprfx z0, z1
> -** xar z0\.d, z0\.d, z2\.d, #64
> +** eor z0\.d, z1\.d, z2\.d
> ** |
> -** movprfx z0, z2
> -** xar z0\.d, z0\.d, z1\.d, #64
> +** eor z0\.d, z2\.d, z1\.d
> ** )
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c
> index 3b6161729cb..5ae5323a08a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c
> @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u8_untied, svuint8_t,
>
> /*
> ** xar_8_u8_tied1:
> -** xar z0\.b, z0\.b, z1\.b, #8
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_8_u8_tied1, svuint8_t,
> @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_8_u8_tied1, svuint8_t,
>
> /*
> ** xar_8_u8_tied2:
> -** xar z0\.b, z0\.b, z1\.b, #8
> +** (
> +** eor z0\.d, z1\.d, z0\.d
> +** |
> +** eor z0\.d, z0\.d, z1\.d
> +** )
> ** ret
> */
> TEST_UNIFORM_Z (xar_8_u8_tied2, svuint8_t,
> @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_8_u8_tied2, svuint8_t,
> /*
> ** xar_8_u8_untied:
> ** (
> -** movprfx z0, z1
> -** xar z0\.b, z0\.b, z2\.b, #8
> +** eor z0\.d, z1\.d, z2\.d
> ** |
> -** movprfx z0, z2
> -** xar z0\.b, z0\.b, z1\.b, #8
> +** eor z0\.d, z2\.d, z1\.d
> ** )
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c
> b/gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c
> new file mode 100644
> index 00000000000..750fbcfc48a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#pragma GCC target "+sve2+nosha3"
> +
> +typedef char __attribute__ ((vector_size (16))) v16qi;
> +typedef unsigned short __attribute__ ((vector_size (16))) v8hi;
> +typedef unsigned int __attribute__ ((vector_size (16))) v4si;
> +typedef unsigned long long __attribute__ ((vector_size (16))) v2di;
> +
> +v16qi
> +xar_v16qi (v16qi a, v16qi b) {
> + v16qi c = a ^ b;
> + return (c << 2) ^ (c >> 6);
> +}
> +/* { dg-final { scan-assembler {\txar\tz0.b, z[0-9]+.b, z[0-9]+.b, #6} } } */
> +
> +v8hi
> +xar_v8hi (v8hi a, v8hi b) {
> + v8hi c = a ^ b;
> + return (c << 13) ^ (c >> 3);
> +}
> +/* { dg-final { scan-assembler {\txar\tz0.h, z[0-9]+.h, z[0-9]+.h, #3} } } */
> +
> +v4si
> +xar_v4si (v4si a, v4si b) {
> + v4si c = a ^ b;
> + return (c << 9) ^ (c >> 23);
> +}
> +/* { dg-final { scan-assembler {\txar\tz0.s, z[0-9]+.s, z[0-9]+.s, #23} } }
> */
> +
> +/* When +sha3 for Advanced SIMD is not available we should still use the
> + SVE2 form of XAR. */
> +v2di
> +xar_v2di (v2di a, v2di b) {
> + v2di c = a ^ b;
> + return (c << 22) ^ (c >> 42);
> +}
> +/* { dg-final { scan-assembler {\txar\tz0.d, z[0-9]+.d, z[0-9]+.d, #42} } }
> */
> diff --git a/gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c
> b/gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c
> new file mode 100644
> index 00000000000..b0f1a97222b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#pragma GCC target "+sve2+sha3"
> +
> +typedef unsigned long long __attribute__ ((vector_size (16))) v2di;
> +
> +/* Both +sve2 and +sha3 have V2DImode XAR instructions, but we should
> + prefer the Advanced SIMD one when both are available. */
> +v2di
> +xar_v2di (v2di a, v2di b) {
> + v2di c = a ^ b;
> + return (c << 22) ^ (c >> 42);
> +}
> +/* { dg-final { scan-assembler {\txar\tv0.2d, v[0-9]+.2d, v[0-9]+.2d, 42} }
> } */
> +