On Mon, Jul 17, 2023 at 8:44 AM Hongtao Liu <crazy...@gmail.com> wrote:
>
> Ping.
>
> On Tue, Jul 11, 2023 at 5:16 PM liuhongt via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Similar like we did for CMPXCHG, but extended to all
> > ix86_comparison_int_operator since CMPCCXADD set EFLAGS exactly same
> > as CMP.
> >
> > When operand order in CMP insn is same as that in CMPCCXADD,
> > CMP insn can be eliminated directly.
> >
> > When operand order is swapped in CMP insn, only optimize
> > cmpccxadd + cmpl + jcc/setcc to cmpccxadd + jcc/setcc when FLAGS_REG is dead
> > after jcc/setcc plus adjusting code for jcc/setcc.
> >
> > gcc/ChangeLog:
> >
> >         PR target/110591
> >         * config/i386/sync.md (cmpccxadd_<mode>): Adjust the pattern
> >         to explicitly set FLAGS_REG like *cmp<mode>_1, also add extra
> >         3 define_peephole2 after the pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/pr110591.c: New test.
> >         * gcc.target/i386/pr110591-2.c: New test.

LGTM.

Thanks,
Uros.

> > ---
> >  gcc/config/i386/sync.md                    | 160 ++++++++++++++++++++-
> >  gcc/testsuite/gcc.target/i386/pr110591-2.c |  90 ++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr110591.c   |  66 +++++++++
> >  3 files changed, 315 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110591-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110591.c
> >
> > diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
> > index e1fa1504deb..e84226cf895 100644
> > --- a/gcc/config/i386/sync.md
> > +++ b/gcc/config/i386/sync.md
> > @@ -1093,7 +1093,9 @@ (define_insn "cmpccxadd_<mode>"
> >           UNSPECV_CMPCCXADD))
> >     (set (match_dup 1)
> >         (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> > -   (clobber (reg:CC FLAGS_REG))]
> > +   (set (reg:CC FLAGS_REG)
> > +       (compare:CC (match_dup 1)
> > +                   (match_dup 2)))]
> >    "TARGET_CMPCCXADD && TARGET_64BIT"
> >  {
> >    char buf[128];
> > @@ -1105,3 +1107,159 @@ (define_insn "cmpccxadd_<mode>"
> >    output_asm_insn (buf, operands);
> >    return "";
> >  })
> > +
> > +(define_peephole2
> > +  [(set (match_operand:SWI48x 0 "register_operand")
> > +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_operand:SWI48x 2 "memory_operand")
> > +                     (match_dup 0)
> > +                     (match_operand:SWI48x 3 "register_operand")
> > +                     (match_operand:SI 4 "const_int_operand")]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] 
> > UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (reg FLAGS_REG)
> > +       (compare (match_operand:SWI48x 5 "register_operand")
> > +                (match_operand:SWI48x 6 "x86_64_general_operand")))]
> > +  "TARGET_CMPCCXADD && TARGET_64BIT
> > +   && rtx_equal_p (operands[0], operands[5])
> > +   && rtx_equal_p (operands[1], operands[6])"
> > +  [(set (match_dup 0)
> > +       (match_dup 1))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_dup 2)
> > +                     (match_dup 0)
> > +                     (match_dup 3)
> > +                     (match_dup 4)]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] 
> > UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (match_dup 7)
> > +       (match_op_dup 8
> > +         [(match_dup 9) (const_int 0)]))])
> > +
> > +(define_peephole2
> > +  [(set (match_operand:SWI48x 0 "register_operand")
> > +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_operand:SWI48x 2 "memory_operand")
> > +                     (match_dup 0)
> > +                     (match_operand:SWI48x 3 "register_operand")
> > +                     (match_operand:SI 4 "const_int_operand")]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] 
> > UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (reg FLAGS_REG)
> > +       (compare (match_operand:SWI48x 5 "register_operand")
> > +                (match_operand:SWI48x 6 "x86_64_general_operand")))
> > +   (set (match_operand:QI 7 "nonimmediate_operand")
> > +       (match_operator:QI 8 "ix86_comparison_int_operator"
> > +         [(reg FLAGS_REG) (const_int 0)]))]
> > +  "TARGET_CMPCCXADD && TARGET_64BIT
> > +   && rtx_equal_p (operands[0], operands[6])
> > +   && rtx_equal_p (operands[1], operands[5])
> > +   && peep2_regno_dead_p (4, FLAGS_REG)"
> > +  [(set (match_dup 0)
> > +       (match_dup 1))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_dup 2)
> > +                     (match_dup 0)
> > +                     (match_dup 3)
> > +                     (match_dup 4)]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] 
> > UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (match_dup 7)
> > +       (match_op_dup 8
> > +         [(match_dup 9) (const_int 0)]))]
> > +{
> > +  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG);
> > +  if (swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8]))
> > +     {
> > +       operands[8] = shallow_copy_rtx (operands[8]);
> > +       enum rtx_code ccode = swap_condition (GET_CODE (operands[8]));
> > +       PUT_CODE (operands[8], ccode);
> > +       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
> > +                                                 operands[6],
> > +                                                 operands[5]),
> > +                                  FLAGS_REG);
> > +     }
> > +})
> > +
> > +(define_peephole2
> > +  [(set (match_operand:SWI48x 0 "register_operand")
> > +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_operand:SWI48x 2 "memory_operand")
> > +                     (match_dup 0)
> > +                     (match_operand:SWI48x 3 "register_operand")
> > +                     (match_operand:SI 4 "const_int_operand")]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] 
> > UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (reg FLAGS_REG)
> > +       (compare (match_operand:SWI48x 5 "register_operand")
> > +                (match_operand:SWI48x 6 "x86_64_general_operand")))
> > +   (set (pc)
> > +       (if_then_else (match_operator 7 "ix86_comparison_int_operator"
> > +         [(reg FLAGS_REG) (const_int 0)])
> > +         (label_ref (match_operand 8))
> > +         (pc)))]
> > +  "TARGET_CMPCCXADD && TARGET_64BIT
> > +   && rtx_equal_p (operands[0], operands[6])
> > +   && rtx_equal_p (operands[1], operands[5])
> > +   && peep2_regno_dead_p (4, FLAGS_REG)"
> > +  [(set (match_dup 0)
> > +       (match_dup 1))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_dup 2)
> > +                     (match_dup 0)
> > +                     (match_dup 3)
> > +                     (match_dup 4)]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] 
> > UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (pc)
> > +       (if_then_else
> > +        (match_op_dup 7
> > +         [(match_dup 9) (const_int 0)])
> > +         (label_ref (match_dup 8))
> > +         (pc)))]
> > +{
> > +  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[7], 0)), FLAGS_REG);
> > +  if (swap_condition (GET_CODE (operands[7])) != GET_CODE (operands[7]))
> > +     {
> > +       operands[7] = shallow_copy_rtx (operands[7]);
> > +       enum rtx_code ccode = swap_condition (GET_CODE (operands[7]));
> > +       PUT_CODE (operands[7], ccode);
> > +       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
> > +                                                 operands[6],
> > +                                                 operands[5]),
> > +                                  FLAGS_REG);
> > +     }
> > +})
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110591-2.c 
> > b/gcc/testsuite/gcc.target/i386/pr110591-2.c
> > new file mode 100644
> > index 00000000000..92ffdb97d62
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110591-2.c
> > @@ -0,0 +1,90 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-mcmpccxadd -O2 -fno-if-conversion -fno-if-conversion2" } 
> > */
> > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
> > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
> > +
> > +#include <immintrin.h>
> > +
> > +int foo_jg (int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v)
> > +    return 100;
> > +  return 200;
> > +}
> > +
> > +int foo_jl (int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v)
> > +    return 300;
> > +  return 100;
> > +}
> > +
> > +int foo_je(int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v)
> > +    return 123;
> > +  return 134;
> > +}
> > +
> > +int foo_jne(int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v)
> > +    return 111;
> > +  return 12;
> > +}
> > +
> > +int foo_jge(int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v)
> > +    return 413;
> > +  return 23;
> > +}
> > +
> > +int foo_jle(int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v)
> > +    return 3141;
> > +  return 341;
> > +}
> > +
> > +int fooq_jg (long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v)
> > +    return 123;
> > +  return 3;
> > +}
> > +
> > +int fooq_jl (long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v)
> > +    return 313;
> > +  return 5;
> > +}
> > +
> > +int fooq_je(long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v)
> > +    return 1313;
> > +  return 13;
> > +}
> > +
> > +int fooq_jne(long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v)
> > +    return 1314;
> > +  return 132;
> > +}
> > +
> > +int fooq_jge(long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v)
> > +    return 14314;
> > +  return 434;
> > +}
> > +
> > +int fooq_jle(long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v)
> > +    return 14414;
> > +  return 43;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c 
> > b/gcc/testsuite/gcc.target/i386/pr110591.c
> > new file mode 100644
> > index 00000000000..32a515b429e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110591.c
> > @@ -0,0 +1,66 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-mcmpccxadd -O2" } */
> > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
> > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
> > +
> > +#include <immintrin.h>
> > +
> > +_Bool foo_setg (int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v;
> > +}
> > +
> > +_Bool foo_setl (int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v;
> > +}
> > +
> > +_Bool foo_sete(int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v;
> > +}
> > +
> > +_Bool foo_setne(int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v;
> > +}
> > +
> > +_Bool foo_setge(int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v;
> > +}
> > +
> > +_Bool foo_setle(int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v;
> > +}
> > +
> > +_Bool fooq_setg (long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v;
> > +}
> > +
> > +_Bool fooq_setl (long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v;
> > +}
> > +
> > +_Bool fooq_sete(long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v;
> > +}
> > +
> > +_Bool fooq_setne(long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v;
> > +}
> > +
> > +_Bool fooq_setge(long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v;
> > +}
> > +
> > +_Bool fooq_setle(long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v;
> > +}
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >
>
>
> --
> BR,
> Hongtao

Reply via email to