On Thu, Oct 5, 2023 at 11:22 AM Tamar Christina <[email protected]> wrote:
>
> Hi All,
>
> copysign (x, -1) is effectively fneg (abs (x)) which on AArch64 can be
> most efficiently done by doing an OR of the signbit.
>
> The middle-end will optimize fneg (abs (x)) now to copysign as the
> canonical form and so this optimizes the expansion.
>
> If the target has an inclusive-OR that takes an immediate, then the
> transformed
> instruction is both shorter and faster. For those that don't, the immediate
> has to be separately constructed, but this still ends up being faster as the
> immediate construction is not on the critical path.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Note that this is part of another patch series, the additional testcases
> are mutually dependent on the match.pd patch. As such the tests are added
> there insteadof here.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/109154
> * config/aarch64/aarch64.md (copysign<GPF:mode>3): Handle
> copysign (x, -1).
> * config/aarch64/aarch64-simd.md (copysign<mode>3): Likewise.
> * config/aarch64/aarch64-sve.md (copysign<mode>3): Likewise.
>
> --- inline copy of patch --
> diff --git a/gcc/config/aarch64/aarch64-simd.md
> b/gcc/config/aarch64/aarch64-simd.md
> index
> 25a1e4e8ecf767636c0ff3cdab6cad6e1482f73e..a78e77dcc3473445108b06c50f9c28a8369f3e3f
> 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -754,15 +754,33 @@ (define_insn
> "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><
> (define_expand "copysign<mode>3"
> [(match_operand:VHSDF 0 "register_operand")
> (match_operand:VHSDF 1 "register_operand")
> - (match_operand:VHSDF 2 "register_operand")]
> + (match_operand:VHSDF 2 "nonmemory_operand")]
> "TARGET_SIMD"
> {
> - rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
> + machine_mode int_mode = <V_INT_EQUIV>mode;
> + rtx v_bitmask = gen_reg_rtx (int_mode);
> int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
>
> emit_move_insn (v_bitmask,
> aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
> HOST_WIDE_INT_M1U <<
> bits));
> +
> + /* copysign (x, -1) should instead be expanded as orr with the sign
> + bit. */
> + if (!REG_P (operands[2]))
> + {
> + auto r0
> + = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[2]));
> + if (-1 == real_to_integer (r0))
I think this should be: REAL_VALUE_NEGATIVE (r0) instead. Just copying
the sign here is needed, right?
Also seems like double check that this is a vec_duplicate of a const
and that the constant is a CONST_DOUBLE?
> + {
> + emit_insn (gen_ior<v_int_equiv>3 (
> + lowpart_subreg (int_mode, operands[0], <MODE>mode),
> + lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
> + DONE;
> + }
> + }
> +
> + operands[2] = force_reg (<MODE>mode, operands[2]);
> emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
> operands[2], operands[1]));
> DONE;
> diff --git a/gcc/config/aarch64/aarch64-sve.md
> b/gcc/config/aarch64/aarch64-sve.md
> index
> 5a652d8536a0ef9461f40da7b22834e683e73ceb..071400c820a5b106ddf9dc9faebb117975d74ea0
> 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -6387,7 +6387,7 @@ (define_insn "*<optab><mode>3"
> (define_expand "copysign<mode>3"
> [(match_operand:SVE_FULL_F 0 "register_operand")
> (match_operand:SVE_FULL_F 1 "register_operand")
> - (match_operand:SVE_FULL_F 2 "register_operand")]
> + (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
> "TARGET_SVE"
> {
> rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
> @@ -6398,11 +6398,26 @@ (define_expand "copysign<mode>3"
> rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
> rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
>
> - emit_insn (gen_and<v_int_equiv>3
> - (sign, arg2,
> - aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
> - HOST_WIDE_INT_M1U
> - << bits)));
> + rtx v_sign_bitmask
> + = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
> + HOST_WIDE_INT_M1U << bits);
> +
> + /* copysign (x, -1) should instead be expanded as orr with the sign
> + bit. */
> + if (!REG_P (operands[2]))
> + {
> + auto r0
> + = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate
> (operands[2]));
> + if (-1 == real_to_integer (r0))
Likewise.
> + {
> + emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask));
> + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
> + DONE;
> + }
> + }
> +
> + operands[2] = force_reg (<MODE>mode, operands[2]);
> + emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask));
> emit_insn (gen_and<v_int_equiv>3
> (mant, arg1,
> aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index
> 24349ecdbbab875f21975f116732a9e53762d4c1..d6c581ad81615b4feb095391cbcf4f5b78fa72f1
> 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -6940,12 +6940,25 @@ (define_expand "lrint<GPF:mode><GPI:mode>2"
> (define_expand "copysign<GPF:mode>3"
> [(match_operand:GPF 0 "register_operand")
> (match_operand:GPF 1 "register_operand")
> - (match_operand:GPF 2 "register_operand")]
> + (match_operand:GPF 2 "nonmemory_operand")]
> "TARGET_SIMD"
> {
> - rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
> + machine_mode int_mode = <V_INT_EQUIV>mode;
> + rtx bitmask = gen_reg_rtx (int_mode);
> emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
> << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
> + /* copysign (x, -1) should instead be expanded as orr with the sign
> + bit. */
> + auto r0 = CONST_DOUBLE_REAL_VALUE (operands[2]);
> + if (-1 == real_to_integer (r0))
Likewise.
Thanks,
Andrew
> + {
> + emit_insn (gen_ior<v_int_equiv>3 (
> + lowpart_subreg (int_mode, operands[0], <MODE>mode),
> + lowpart_subreg (int_mode, operands[1], <MODE>mode), bitmask));
> + DONE;
> + }
> +
> + operands[2] = force_reg (<MODE>mode, operands[2]);
> emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
> bitmask));
> DONE;
>
>
>
>
> --