<[email protected]> writes:
> @@ -4899,7 +4876,9 @@
> if (CONST_INT_P (operands[2]))
> {
> amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
> - if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
> + if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)
> + && !aarch64_simd_shift_imm_p (operands[2], <MODE>mode,
> + <optab>_optab == ashl_optab))
> amount = force_reg (<MODE>mode, amount);
> }
> else
I think the problem here is that the old code is testing the wrong thing.
It should instead be testing "amount", as in:
amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
if (!aarch64_sve_<lr>shift_operand (amount, <MODE>mode))
amount = force_reg (<MODE>mode, amount);
> @@ -4923,6 +4902,13 @@
> UNSPEC_PRED_X))]
> "TARGET_SVE"
> {
> + if (aarch64_simd_shift_imm_p (operands[2], <MODE>mode,
> + <optab>_optab == ashl_optab))
<CODE> == ASHIFT would be more direct than <optab>_optab == ashl_optab.
Thanks,
Richard
> + {
> + emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[1],
> + operands[2]));
> + DONE;
> + }
> operands[3] = aarch64_ptrue_reg (<VPRED>mode);
> }
> )
> @@ -4952,27 +4938,27 @@
> ""
> )
>
> -;; Unpredicated shift operations by a constant (post-RA only).
> +;; Unpredicated shift operations by a constant.
> ;; These are generated by splitting a predicated instruction whose
> ;; predicate is unused.
> -(define_insn "*post_ra_v_ashl<mode>3"
> +(define_insn "aarch64_vashl<mode>3_const"
> [(set (match_operand:SVE_I 0 "register_operand")
> (ashift:SVE_I
> (match_operand:SVE_I 1 "register_operand")
> (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
> - "TARGET_SVE && reload_completed"
> + "TARGET_SVE"
> {@ [ cons: =0 , 1 , 2 ]
> [ w , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
> [ w , w , Dl ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
> }
> )
>
> -(define_insn "*post_ra_v_<optab><mode>3"
> +(define_insn "aarch64_v<optab><mode>3_const"
> [(set (match_operand:SVE_I 0 "register_operand" "=w")
> (SHIFTRT:SVE_I
> (match_operand:SVE_I 1 "register_operand" "w")
> (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
> - "TARGET_SVE && reload_completed"
> + "TARGET_SVE"
> "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
> )
>
> diff --git a/gcc/config/aarch64/aarch64-sve2.md
> b/gcc/config/aarch64/aarch64-sve2.md
> index 871cf0bd2e8..62524f36de6 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -1932,40 +1932,27 @@
> (define_expand "@aarch64_sve_add_<sve_int_op><mode>"
> [(set (match_operand:SVE_FULL_I 0 "register_operand")
> (plus:SVE_FULL_I
> - (unspec:SVE_FULL_I
> - [(match_dup 4)
> - (SHIFTRT:SVE_FULL_I
> - (match_operand:SVE_FULL_I 2 "register_operand")
> - (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
> - UNSPEC_PRED_X)
> - (match_operand:SVE_FULL_I 1 "register_operand")))]
> + (SHIFTRT:SVE_FULL_I
> + (match_operand:SVE_FULL_I 2 "register_operand")
> + (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
> + (match_operand:SVE_FULL_I 1 "register_operand")))]
> "TARGET_SVE2"
> - {
> - operands[4] = CONSTM1_RTX (<VPRED>mode);
> - }
> )
>
> ;; Pattern-match SSRA and USRA as a predicated operation whose predicate
> ;; isn't needed.
> -(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
> +(define_insn "*aarch64_sve2_sra<mode>"
> [(set (match_operand:SVE_FULL_I 0 "register_operand")
> (plus:SVE_FULL_I
> - (unspec:SVE_FULL_I
> - [(match_operand 4)
> - (SHIFTRT:SVE_FULL_I
> - (match_operand:SVE_FULL_I 2 "register_operand")
> - (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
> - UNSPEC_PRED_X)
> + (SHIFTRT:SVE_FULL_I
> + (match_operand:SVE_FULL_I 2 "register_operand")
> + (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
> (match_operand:SVE_FULL_I 1 "register_operand")))]
> "TARGET_SVE2"
> {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
> [ w , 0 , w ; * ] <sra_op>sra\t%0.<Vetype>,
> %2.<Vetype>, #%3
> [ ?&w , w , w ; yes ] movprfx\t%0,
> %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
> }
> - "&& !CONSTANT_P (operands[4])"
> - {
> - operands[4] = CONSTM1_RTX (<VPRED>mode);
> - }
> )
>
> ;; SRSRA and URSRA.
> @@ -2715,17 +2702,14 @@
> ;; Optimize ((a + b) >> n) where n is half the bitsize of the vector
> (define_insn "*bitmask_shift_plus<mode>"
> [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
> - (unspec:SVE_FULL_HSDI
> - [(match_operand:<VPRED> 1)
> - (lshiftrt:SVE_FULL_HSDI
> - (plus:SVE_FULL_HSDI
> - (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
> - (match_operand:SVE_FULL_HSDI 3 "register_operand" "w"))
> - (match_operand:SVE_FULL_HSDI 4
> - "aarch64_simd_shift_imm_vec_exact_top" ""))]
> - UNSPEC_PRED_X))]
> + (lshiftrt:SVE_FULL_HSDI
> + (plus:SVE_FULL_HSDI
> + (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
> + (match_operand:SVE_FULL_HSDI 2 "register_operand" "w"))
> + (match_operand:SVE_FULL_HSDI 3
> + "aarch64_simd_shift_imm_vec_exact_top" "")))]
> "TARGET_SVE2"
> - "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
> + "addhnb\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
> )
>
> ;; -------------------------------------------------------------------------