On Thu, Jun 6, 2024 at 4:49 PM Kong, Lingling <lingling.k...@intel.com> wrote: > > Enable ZU for IMUL (opcodes 0x69 and 0x6B) and SETcc. > > gcc/ChangeLog: > > * config/i386/i386-opts.h (enum apx_features):Add apx_zu. > * config/i386/i386.h (TARGET_APX_ZU): Define. > * config/i386/i386.md (*imulhi<mode>zu): New define_insn. > (*setcc_<mode>_zu): Ditto. > * config/i386/i386.opt: Add enum value for zu. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/apx-zu-1.c: New test. > * gcc.target/i386/apx-zu-2.c: Ditto. > > Bootstrapped & regtested on x86-64-pc-linux-gnu with binutils 2.42 branch. > OK for trunk? Ok. > > --- > gcc/config/i386/i386-opts.h | 3 +- > gcc/config/i386/i386.h | 1 + > gcc/config/i386/i386.md | 25 ++++++++++++++-- > gcc/config/i386/i386.opt | 3 ++ > gcc/testsuite/gcc.target/i386/apx-zu-1.c | 38 ++++++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/apx-zu-2.c | 19 ++++++++++++ > 6 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 > gcc/testsuite/gcc.target/i386/apx-zu-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/apx-zu-2.c > > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index > 5fcc4927978..c7ec0d9fd39 100644 > --- a/gcc/config/i386/i386-opts.h > +++ b/gcc/config/i386/i386-opts.h > @@ -142,8 +142,9 @@ enum apx_features { > apx_ppx = 1 << 3, > apx_nf = 1 << 4, > apx_ccmp = 1 << 5, > + apx_zu = 1 << 6, > apx_all = apx_egpr | apx_push2pop2 | apx_ndd > - | apx_ppx | apx_nf | apx_ccmp, > + | apx_ppx | apx_nf | apx_ccmp | apx_zu, > }; > > #endif > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index > 7051c6c13e4..dc1a1f44320 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -57,6 +57,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. > If not, see #define TARGET_APX_PPX (ix86_apx_features & apx_ppx) #define > TARGET_APX_NF (ix86_apx_features & apx_nf) > #define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp) > +#define TARGET_APX_ZU (ix86_apx_features & apx_zu) > > #include "config/vxworks-dummy.h" > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index > ffcf63e1cba..a2765f65754 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -9967,6 +9967,19 @@ > (const_string "direct"))) > (set_attr "mode" "<MODE>")]) > > +(define_insn "*imulhi<mode>zu" > + [(set (match_operand:SWI48x 0 "register_operand" "=r,r") > + (zero_extend:SWI48x > + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm") > + (match_operand:HI 2 "immediate_operand" "K,n")))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_APX_ZU" > + "@ > + imulzu{w}\t{%2, %1, %w0|%w0, %1, %2} > + imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}" > + [(set_attr "type" "imul") > + (set_attr "mode" "HI")]) > + > (define_insn "*mulsi3_1_zext" > [(set (match_operand:DI 0 "register_operand" "=r,r,r") > (zero_extend:DI > @@ -18354,11 +18367,19 @@ > ;; For all sCOND expanders, also expand the compare or test insn that ;; > generates cc0. Generate an equality comparison if `seq' or `sne'. > > +(define_insn "*setcc_<mode>_zu" > + [(set (match_operand:SWI248 0 "register_operand" "=r") > + (match_operator:SWI248 1 "ix86_comparison_operator" > + [(reg FLAGS_REG) (const_int 0)]))] > + "TARGET_APX_ZU" > + "setzu%C1\t%b0" > + [(set_attr "type" "setcc")]) > + > (define_insn_and_split "*setcc_di_1" > [(set (match_operand:DI 0 "register_operand" "=q") > (match_operator:DI 1 "ix86_comparison_operator" > [(reg FLAGS_REG) (const_int 0)]))] > - "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL" > + "!TARGET_APX_ZU && TARGET_64BIT && !TARGET_PARTIAL_REG_STALL" > "#" > "&& reload_completed" > [(set (match_dup 2) (match_dup 1)) > @@ -18391,7 +18412,7 @@ > [(set (match_operand:SWI24 0 "register_operand" "=q") > (match_operator:SWI24 1 "ix86_comparison_operator" > [(reg FLAGS_REG) (const_int 0)]))] > - "!TARGET_PARTIAL_REG_STALL > + "!TARGET_APX_ZU && !TARGET_PARTIAL_REG_STALL > && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))" > "#" > "&& reload_completed" > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index > 7017cc87cec..353fffb2343 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -1342,6 +1342,9 @@ Enum(apx_features) String(nf) Value(apx_nf) Set(6) > EnumValue > Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7) > > +EnumValue > +Enum(apx_features) String(zu) Value(apx_zu) Set(8) > + > EnumValue > Enum(apx_features) String(all) Value(apx_all) Set(1) > > diff --git a/gcc/testsuite/gcc.target/i386/apx-zu-1.c > b/gcc/testsuite/gcc.target/i386/apx-zu-1.c > new file mode 100644 > index 00000000000..927a87673a7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-zu-1.c > @@ -0,0 +1,38 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mapxf -march=x86-64 -O2" } */ > +/* { dg-final { scan-assembler-not "setle"} } */ > +/* { dg-final { scan-assembler-not "setge"} } */ > +/* { dg-final { scan-assembler-not "sete"} } */ > +/* { dg-final { scan-assembler-not "xor"} } */ > +/* { dg-final { scan-assembler-times "setzune" 1} } */ > +/* { dg-final { scan-assembler-times "setzule" 1} } */ > +/* { dg-final { scan-assembler-times "setzue" 1} } */ > +/* { dg-final { scan-assembler-times "setzuge" 1} } */ > +/* { dg-final { scan-assembler "imulzu"} } */ > +long long foo0 (int a) { > + return a == 0 ? 0 : 1; > +} > + > +long foo1 (int a, int b) > +{ > + return a > b ? 0 : 1; > +} > + > +int foo2 (int a, int b) > +{ > + return a != b ? 0 : 1; > +} > + > +short foo3 (int a, int b) > +{ > + return a < b ? 0 : 1; > +} > + > +unsigned long > +f1(unsigned short x) > +{ > + unsigned short a; > + a = x * 1000; > + return a; > +} > diff --git a/gcc/testsuite/gcc.target/i386/apx-zu-2.c > b/gcc/testsuite/gcc.target/i386/apx-zu-2.c > new file mode 100644 > index 00000000000..3ee04495d98 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-zu-2.c > @@ -0,0 +1,19 @@ > +/* { dg-do run { target { ! ia32 } } } */ > +/* { dg-require-effective-target apxf } */ > +/* { dg-options "-mapxf -march=x86-64 -O2" } */ > +#include "apx-zu-1.c" > + > +int main(void) > +{ > + if (foo0 (0)) > + __builtin_abort (); > + if (foo1 (3, 2)) > + __builtin_abort (); > + if (foo2 (3, 2)) > + __builtin_abort (); > + if (foo3 (2, 3)) > + __builtin_abort (); > + if (f1 (2) != 2000) > + __builtin_abort (); > + return 0; > +} > -- > 2.31.1 >
-- BR, Hongtao