On Thu, Sep 9, 2021 at 3:54 PM liuhongt <hongtao....@intel.com> wrote: > > Hi: > As a follow up of [1], the patch removes all scalar mode copysign related > post_reload splitter/define_insn and expand copysign directly into below using > paradoxical subregs.
[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579057.html > > op3 = op1 & ~mask; > op4 = op2 & mask; > dest = op3 | op4; > > It can sometimes generate better code just like avx512dq-abs-copysign-1.c > shows. > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > > gcc/ChangeLog: > > * config/i386/i386-expand.c (ix86_expand_copysign): Expand > right into ANDNOT + AND + IOR, using paradoxical subregs. > (ix86_split_copysign_const): Remove. > (ix86_split_copysign_var): Ditto. > * config/i386/i386-protos.h (ix86_split_copysign_const): Dotto. > (ix86_split_copysign_var): Ditto. > * config/i386/i386.md (@copysign<mode>3_const): Ditto. > (@copysign<mode>3_var): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512dq-abs-copysign-1.c: Adjust testcase. > * gcc.target/i386/avx512vl-abs-copysign-1.c: Adjust testcase. > --- > gcc/config/i386/i386-expand.c | 152 +++--------------- > gcc/config/i386/i386-protos.h | 2 - > gcc/config/i386/i386.md | 44 ----- > .../gcc.target/i386/avx512dq-abs-copysign-1.c | 4 +- > .../gcc.target/i386/avx512vl-abs-copysign-1.c | 4 +- > 5 files changed, 30 insertions(+), 176 deletions(-) > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index badbacc19d8..a0262a8f47d 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -2115,13 +2115,9 @@ void > ix86_expand_copysign (rtx operands[]) > { > machine_mode mode, vmode; > - rtx dest, op0, op1, mask; > + rtx dest, op0, op1, mask, op2, op3; > > - dest = operands[0]; > - op0 = operands[1]; > - op1 = operands[2]; > - > - mode = GET_MODE (dest); > + mode = GET_MODE (operands[0]); > > if (mode == SFmode) > vmode = V4SFmode; > @@ -2132,136 +2128,40 @@ ix86_expand_copysign (rtx operands[]) > else > gcc_unreachable (); > > - mask = ix86_build_signbit_mask (vmode, 0, 0); > - > - if (CONST_DOUBLE_P (op0)) > + if (rtx_equal_p (operands[1], operands[2])) > { > - if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) > - op0 = simplify_unary_operation (ABS, mode, op0, mode); > - > - if (mode == SFmode || mode == DFmode) > - { > - if (op0 == CONST0_RTX (mode)) > - op0 = CONST0_RTX (vmode); > - else > - { > - rtx v = ix86_build_const_vector (vmode, false, op0); > - > - op0 = force_reg (vmode, v); > - } > - } > - else if (op0 != CONST0_RTX (mode)) > - op0 = force_reg (mode, op0); > - > - emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask)); > - } > - else > - { > - rtx nmask = ix86_build_signbit_mask (vmode, 0, 1); > - > - emit_insn (gen_copysign3_var > - (mode, dest, NULL_RTX, op0, op1, nmask, mask)); > - } > -} > - > -/* Deconstruct a copysign operation into bit masks. Operand 0 is known to > - be a constant, and so has already been expanded into a vector constant. > */ > - > -void > -ix86_split_copysign_const (rtx operands[]) > -{ > - machine_mode mode, vmode; > - rtx dest, op0, mask, x; > - > - dest = operands[0]; > - op0 = operands[1]; > - mask = operands[3]; > - > - mode = GET_MODE (dest); > - vmode = GET_MODE (mask); > - > - dest = lowpart_subreg (vmode, dest, mode); > - x = gen_rtx_AND (vmode, dest, mask); > - emit_insn (gen_rtx_SET (dest, x)); > - > - if (op0 != CONST0_RTX (vmode)) > - { > - x = gen_rtx_IOR (vmode, dest, op0); > - emit_insn (gen_rtx_SET (dest, x)); > - } > -} > - > -/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, > - so we have to do two masks. */ > - > -void > -ix86_split_copysign_var (rtx operands[]) > -{ > - machine_mode mode, vmode; > - rtx dest, scratch, op0, op1, mask, nmask, x; > - > - dest = operands[0]; > - scratch = operands[1]; > - op0 = operands[2]; > - op1 = operands[3]; > - nmask = operands[4]; > - mask = operands[5]; > - > - mode = GET_MODE (dest); > - vmode = GET_MODE (mask); > - > - if (rtx_equal_p (op0, op1)) > - { > - /* Shouldn't happen often (it's useless, obviously), but when it does > - we'd generate incorrect code if we continue below. */ > - emit_move_insn (dest, op0); > + emit_move_insn (operands[0], operands[1]); > return; > } > > - if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ > - { > - gcc_assert (REGNO (op1) == REGNO (scratch)); > - > - x = gen_rtx_AND (vmode, scratch, mask); > - emit_insn (gen_rtx_SET (scratch, x)); > + dest = lowpart_subreg (vmode, operands[0], mode); > + op1 = lowpart_subreg (vmode, operands[2], mode); > + mask = ix86_build_signbit_mask (vmode, 0, 0); > > - dest = mask; > - op0 = lowpart_subreg (vmode, op0, mode); > - x = gen_rtx_NOT (vmode, dest); > - x = gen_rtx_AND (vmode, x, op0); > - emit_insn (gen_rtx_SET (dest, x)); > - } > - else > + if (CONST_DOUBLE_P (operands[1])) > { > - if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ > - { > - x = gen_rtx_AND (vmode, scratch, mask); > - } > - else /* alternative 2,4 */ > + op0 = simplify_unary_operation (ABS, mode, operands[1], mode); > + /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a. > */ > + if (op0 == CONST0_RTX (mode)) > { > - gcc_assert (REGNO (mask) == REGNO (scratch)); > - op1 = lowpart_subreg (vmode, op1, mode); > - x = gen_rtx_AND (vmode, scratch, op1); > + emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1)); > + return; > } > - emit_insn (gen_rtx_SET (scratch, x)); > > - if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ > - { > - dest = lowpart_subreg (vmode, op0, mode); > - x = gen_rtx_AND (vmode, dest, nmask); > - } > - else /* alternative 3,4 */ > - { > - gcc_assert (REGNO (nmask) == REGNO (dest)); > - dest = nmask; > - op0 = lowpart_subreg (vmode, op0, mode); > - x = gen_rtx_AND (vmode, dest, op0); > - } > - emit_insn (gen_rtx_SET (dest, x)); > + if (GET_MODE_SIZE (mode) < 16) > + op0 = ix86_build_const_vector (vmode, false, op0); > + op0 = force_reg (vmode, op0); > } > - > - x = gen_rtx_IOR (vmode, dest, scratch); > - emit_insn (gen_rtx_SET (dest, x)); > + else > + op0 = lowpart_subreg (vmode, operands[1], mode); > + > + op2 = gen_reg_rtx (vmode); > + op3 = gen_reg_rtx (vmode); > + emit_move_insn (op2, gen_rtx_AND (vmode, > + gen_rtx_NOT (vmode, mask), > + op0)); > + emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1)); > + emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3)); > } > > /* Expand an xorsign operation. */ > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index 72644e33a92..dcae34b915e 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -135,8 +135,6 @@ extern void ix86_expand_fp_absneg_operator (enum > rtx_code, machine_mode, > extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode, > rtx[]); > extern void ix86_expand_copysign (rtx []); > -extern void ix86_split_copysign_const (rtx []); > -extern void ix86_split_copysign_var (rtx []); > extern void ix86_expand_xorsign (rtx []); > extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]); > extern bool ix86_match_ccmode (rtx, machine_mode); > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 6b4ceb2bce3..ba0058dad81 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -10861,50 +10861,6 @@ (define_expand "copysign<mode>3" > || (TARGET_SSE && (<MODE>mode == TFmode))" > "ix86_expand_copysign (operands); DONE;") > > -(define_insn_and_split "@copysign<mode>3_const" > - [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv") > - (unspec:SSEMODEF > - [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC") > - (match_operand:SSEMODEF 2 "register_operand" "0") > - (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")] > - UNSPEC_COPYSIGN))] > - "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) > - || (TARGET_SSE && (<MODE>mode == TFmode))" > - "#" > - "&& reload_completed" > - [(const_int 0)] > - "ix86_split_copysign_const (operands); DONE;") > - > -(define_insn "@copysign<mode>3_var" > - [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv") > - (unspec:SSEMODEF > - [(match_operand:SSEMODEF 2 "register_operand" "Yv,0,0,Yv,Yv") > - (match_operand:SSEMODEF 3 "register_operand" "1,1,Yv,1,Yv") > - (match_operand:<ssevecmodef> 4 > - "nonimmediate_operand" "X,Yvm,Yvm,0,0") > - (match_operand:<ssevecmodef> 5 > - "nonimmediate_operand" "0,Yvm,1,Yvm,1")] > - UNSPEC_COPYSIGN)) > - (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))] > - "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) > - || (TARGET_SSE && (<MODE>mode == TFmode))" > - "#") > - > -(define_split > - [(set (match_operand:SSEMODEF 0 "register_operand") > - (unspec:SSEMODEF > - [(match_operand:SSEMODEF 2 "register_operand") > - (match_operand:SSEMODEF 3 "register_operand") > - (match_operand:<ssevecmodef> 4) > - (match_operand:<ssevecmodef> 5)] > - UNSPEC_COPYSIGN)) > - (clobber (match_scratch:<ssevecmodef> 1))] > - "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) > - || (TARGET_SSE && (<MODE>mode == TFmode))) > - && reload_completed" > - [(const_int 0)] > - "ix86_split_copysign_var (operands); DONE;") > - > (define_expand "xorsign<mode>3" > [(match_operand:MODEF 0 "register_operand") > (match_operand:MODEF 1 "register_operand") > diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c > b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c > index cb542d09058..0107df7741a 100644 > --- a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c > @@ -64,8 +64,8 @@ f6 (double x) > } > > /* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */ > -/* { dg-final { scan-assembler "vorps\[^\n\r\]*xmm16" } } */ > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */ > /* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */ > /* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */ > -/* { dg-final { scan-assembler "vorpd\[^\n\r\]*xmm18" } } */ > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */ > /* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c > b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c > index b375c5fad80..b27335b9d99 100644 > --- a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c > @@ -64,8 +64,8 @@ f6 (double x) > } > > /* { dg-final { scan-assembler "vpandd\[^\n\r\]*xmm16" } } */ > -/* { dg-final { scan-assembler "vpord\[^\n\r\]*xmm16" } } */ > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */ > /* { dg-final { scan-assembler "vpxord\[^\n\r\]*xmm16" } } */ > /* { dg-final { scan-assembler "vpandq\[^\n\r\]*xmm18" } } */ > -/* { dg-final { scan-assembler "vporq\[^\n\r\]*xmm18" } } */ > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */ > /* { dg-final { scan-assembler "vpxorq\[^\n\r\]*xmm18" } } */ > -- > 2.27.0 > -- BR, Hongtao