On Fri, Oct 14, 2022 at 1:38 AM Haochen Jiang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > gcc/ChangeLog: > > * common/config/i386/cpuinfo.h (get_available_features): > Detect PREFETCHI. > * common/config/i386/i386-common.cc > (OPTION_MASK_ISA2_PREFETCHI_SET, > OPTION_MASK_ISA2_PREFETCHI_UNSET): New. > (ix86_handle_option): Handle -mprefetchi. > * common/config/i386/i386-cpuinfo.h (enum processor_features): > Add FEATURE_PREFETCHI. > * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for > prefetchi. > * config.gcc: Add prfchiintrin.h. > * config/i386/cpuid.h (bit_PREFETCHI): New. > * config/i386/i386-c.cc (ix86_target_macros_internal): Define > __PREFETCHI__. > * config/i386/i386-isa.def (PREFETCHI): Add DEF_PTA(PREFETCHI). > * config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p): > Handle prefetchi. > * config/i386/i386.md (prefetch): Add handler for prefetchi > (*prefetch_i): New define_insn. > * config/i386/i386.opt: Add option -mprefetchi. > * config/i386/immintrin.h: Include prfchiintrin.h. > * config/i386/predicates.md (local_func_symbolic_operand): > New predicates. > * config/i386/xmmintrin.h (enum _mm_hint): New enum for prefetchi. > (_mm_prefetch): Handle the highest bit of enum. > * doc/extend.texi: Document prefetchi. > * doc/invoke.texi: Document -mprefetchi. > * doc/sourcebuild.texi: Document target prefetchi. > * config/i386/prfchiintrin.h: New file. > > gcc/testsuite/ChangeLog: > > * g++.dg/other/i386-2.C: Add -mprefetchi. > * g++.dg/other/i386-3.C: Ditto. > * gcc.misc-tests/i386-pf-3dnow-1.c: Add scan-assembler-not for > prefetchit0/t1. > * gcc.misc-tests/i386-pf-athlon-1.c: Ditto. > * gcc.misc-tests/i386-pf-sse-1.c: Ditto. > * gcc.target/i386/avx-1.c: Add -mprefetchi. > * gcc.target/i386/avx-2.c: Ditto. > * gcc.target/i386/funcspec-56.inc: Add new target attribute. > * gcc.target/i386/prefetchi-1.c: Rewrite testcase. > * gcc.target/i386/prefetchi-2.c: New test. > * gcc.target/i386/prefetchi-3.c: Ditto. > * gcc.target/i386/sse-12.c: Add -mprefetchi. > * gcc.target/i386/sse-13.c: Ditto. > * gcc.target/i386/sse-14.c: Ditto. > * gcc.target/i386/sse-22.c: Add prefetchi. > * gcc.target/i386/sse-23.c: Ditto. > > Co-authored-by: Hongtao Liu <hongtao....@intel.com> > --- > gcc/common/config/i386/cpuinfo.h | 2 + > gcc/common/config/i386/i386-common.cc | 15 ++++ > gcc/common/config/i386/i386-cpuinfo.h | 1 + > gcc/common/config/i386/i386-isas.h | 1 + > gcc/config.gcc | 2 +- > gcc/config/i386/cpuid.h | 1 + > gcc/config/i386/i386-c.cc | 2 + > gcc/config/i386/i386-isa.def | 1 + > gcc/config/i386/i386-options.cc | 4 +- > gcc/config/i386/i386.md | 90 +++++++++++++------ > gcc/config/i386/i386.opt | 4 + > gcc/config/i386/immintrin.h | 2 + > gcc/config/i386/predicates.md | 15 ++++ > gcc/config/i386/prfchiintrin.h | 39 ++++++++ > gcc/config/i386/xmmintrin.h | 6 +- > gcc/doc/extend.texi | 5 ++ > gcc/doc/invoke.texi | 10 ++- > gcc/doc/sourcebuild.texi | 3 + > gcc/testsuite/g++.dg/other/i386-2.C | 2 +- > gcc/testsuite/g++.dg/other/i386-3.C | 2 +- > .../gcc.misc-tests/i386-pf-3dnow-1.c | 2 + > .../gcc.misc-tests/i386-pf-athlon-1.c | 2 + > gcc/testsuite/gcc.misc-tests/i386-pf-sse-1.c | 2 + > gcc/testsuite/gcc.target/i386/avx-1.c | 2 +- > gcc/testsuite/gcc.target/i386/avx-2.c | 2 +- > gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + > gcc/testsuite/gcc.target/i386/prefetchi-1.c | 36 ++++++-- > gcc/testsuite/gcc.target/i386/prefetchi-2.c | 26 ++++++ > gcc/testsuite/gcc.target/i386/prefetchi-3.c | 15 ++++ > gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- > gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- > gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- > gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- > gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- > 34 files changed, 259 insertions(+), 49 deletions(-) > create mode 100644 gcc/config/i386/prfchiintrin.h > create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-3.c > > diff --git a/gcc/common/config/i386/cpuinfo.h > b/gcc/common/config/i386/cpuinfo.h > index 118f3a42abd..551e0483330 100644 > --- a/gcc/common/config/i386/cpuinfo.h > +++ b/gcc/common/config/i386/cpuinfo.h > @@ -797,6 +797,8 @@ get_available_features (struct __processor_model > *cpu_model, > set_feature (FEATURE_HRESET); > if (eax & bit_CMPCCXADD) > set_feature(FEATURE_CMPCCXADD); > + if (edx & bit_PREFETCHI) > + set_feature (FEATURE_PREFETCHI); > if (avx_usable) > { > if (eax & bit_AVXVNNI) > diff --git a/gcc/common/config/i386/i386-common.cc > b/gcc/common/config/i386/i386-common.cc > index f3d00ce4bc9..77ff07a3797 100644 > --- a/gcc/common/config/i386/i386-common.cc > +++ b/gcc/common/config/i386/i386-common.cc > @@ -112,6 +112,7 @@ along with GCC; see the file COPYING3. If not see > #define OPTION_MASK_ISA2_AVXNECONVERT_SET OPTION_MASK_ISA2_AVXNECONVERT > #define OPTION_MASK_ISA2_CMPCCXADD_SET OPTION_MASK_ISA2_CMPCCXADD > #define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16 > +#define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI > > /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same > as -msse4.2. */ > @@ -287,6 +288,7 @@ along with GCC; see the file COPYING3. If not see > #define OPTION_MASK_ISA2_AVXNECONVERT_UNSET OPTION_MASK_ISA2_AVXNECONVERT > #define OPTION_MASK_ISA2_CMPCCXADD_UNSET OPTION_MASK_ISA2_CMPCCXADD > #define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16 > +#define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI > > /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same > as -mno-sse4.1. */ > @@ -1211,6 +1213,19 @@ ix86_handle_option (struct gcc_options *opts, > } > return true; > > + case OPT_mprefetchi: > + if (value) > + { > + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_PREFETCHI_SET; > + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_SET; > + } > + else > + { > + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_PREFETCHI_UNSET; > + opts->x_ix86_isa_flags2_explicit |= > OPTION_MASK_ISA2_PREFETCHI_UNSET; > + } > + return true; > + > case OPT_mfma: > if (value) > { > diff --git a/gcc/common/config/i386/i386-cpuinfo.h > b/gcc/common/config/i386/i386-cpuinfo.h > index f9d5b7238ea..3fe69178841 100644 > --- a/gcc/common/config/i386/i386-cpuinfo.h > +++ b/gcc/common/config/i386/i386-cpuinfo.h > @@ -246,6 +246,7 @@ enum processor_features > FEATURE_AVXNECONVERT, > FEATURE_CMPCCXADD, > FEATURE_AMX_FP16, > + FEATURE_PREFETCHI, > CPU_FEATURE_MAX > }; > > diff --git a/gcc/common/config/i386/i386-isas.h > b/gcc/common/config/i386/i386-isas.h > index 7c4a71413b5..8648ea6903c 100644 > --- a/gcc/common/config/i386/i386-isas.h > +++ b/gcc/common/config/i386/i386-isas.h > @@ -182,4 +182,5 @@ ISA_NAMES_TABLE_START > P_NONE, "-mavxneconvert") > ISA_NAMES_TABLE_ENTRY("cmpccxadd", FEATURE_CMPCCXADD, P_NONE, > "-mcmpccxadd") > ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16") > + ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, > "-mprefetchi") > ISA_NAMES_TABLE_END > diff --git a/gcc/config.gcc b/gcc/config.gcc > index 8a8712d1466..ceea7726bfd 100644 > --- a/gcc/config.gcc > +++ b/gcc/config.gcc > @@ -423,7 +423,7 @@ i[34567]86-*-* | x86_64-*-*) > hresetintrin.h keylockerintrin.h avxvnniintrin.h > mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h > avxifmaintrin.h avxvnniint8intrin.h > avxneconvertintrin.h > - cmpccxaddintrin.h amxfp16intrin.h" > + cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h" > ;; > ia64-*-*) > extra_headers=ia64intrin.h > diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h > index 229c15c5950..92583261883 100644 > --- a/gcc/config/i386/cpuid.h > +++ b/gcc/config/i386/cpuid.h > @@ -54,6 +54,7 @@ > #define bit_AVXVNNIINT8 (1 << 4) > #define bit_AVXNECONVERT (1 << 5) > #define bit_CMPXCHG8B (1 << 8) > +#define bit_PREFETCHI (1 << 14) > #define bit_CMOV (1 << 15) > #define bit_MMX (1 << 23) > #define bit_FXSAVE (1 << 24) > diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc > index 3020b5f267a..74239002ed6 100644 > --- a/gcc/config/i386/i386-c.cc > +++ b/gcc/config/i386/i386-c.cc > @@ -650,6 +650,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > def_or_undef (parse_in, "__CMPCCXADD__"); > if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP16) > def_or_undef (parse_in, "__AMX_FP16__"); > + if (isa_flag2 & OPTION_MASK_ISA2_PREFETCHI) > + def_or_undef (parse_in, "__PREFETCHI__"); > if (TARGET_IAMCU) > { > def_or_undef (parse_in, "__iamcu"); > diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def > index 55b25763957..f234dcc37d7 100644 > --- a/gcc/config/i386/i386-isa.def > +++ b/gcc/config/i386/i386-isa.def > @@ -114,3 +114,4 @@ DEF_PTA(AVXVNNIINT8) > DEF_PTA(AVXNECONVERT) > DEF_PTA(CMPCCXADD) > DEF_PTA(AMX_FP16) > +DEF_PTA(PREFETCHI) > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc > index bf37c77589e..3f98b09e5cf 100644 > --- a/gcc/config/i386/i386-options.cc > +++ b/gcc/config/i386/i386-options.cc > @@ -232,7 +232,8 @@ static struct ix86_target_opts isa2_opts[] = > { "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 }, > { "-mavxneconvert", OPTION_MASK_ISA2_AVXNECONVERT }, > { "-mcmpccxadd", OPTION_MASK_ISA2_CMPCCXADD }, > - { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 } > + { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 }, > + { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI } > }; > static struct ix86_target_opts isa_opts[] = > { > @@ -1084,6 +1085,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree > args, char *p_strings[], > IX86_ATTR_ISA ("avxneconvert", OPT_mavxneconvert), > IX86_ATTR_ISA ("cmpccxadd", OPT_mcmpccxadd), > IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16), > + IX86_ATTR_ISA ("prefetchi", OPT_mprefetchi), > > /* enum options */ > IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index c65cf14b9f4..fb75f57483b 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -23637,47 +23637,65 @@ > (match_operand:SI 1 "const_int_operand") > (match_operand:SI 2 "const_int_operand") > (match_operand:SI 3 "const_int_operand"))] > - "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || > TARGET_PREFETCHWT1" > + "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1 > + || TARGET_PREFETCHI" > { > - if (INTVAL (operands[3]) == 0) > - { > - warning (0, "instruction prefetch is not supported; using data > prefetch"); > - operands[3] = const1_rtx; > - } > bool write = operands[1] != const0_rtx; > int locality = INTVAL (operands[2]); > + bool data = operands[3] != const0_rtx; > > gcc_assert (IN_RANGE (locality, 0, 3)); > > - /* Use 3dNOW prefetch in case we are asking for write prefetch not > - supported by SSE counterpart (non-SSE2 athlon machines) or the > - SSE prefetch is not available (K6 machines). Otherwise use SSE > - prefetch as it allows specifying of locality. */ > - > - if (write) > + if (data) > { > - if (TARGET_PREFETCHWT1) > - operands[2] = GEN_INT (MAX (locality, 2)); > - else if (TARGET_PRFCHW) > - operands[2] = GEN_INT (3); > - else if (TARGET_3DNOW && !TARGET_SSE2) > - operands[2] = GEN_INT (3); > - else if (TARGET_PREFETCH_SSE) > - operands[1] = const0_rtx; > + /* Use 3dNOW prefetch in case we are asking for write prefetch not > + supported by SSE counterpart (non-SSE2 athlon machines) or the > + SSE prefetch is not available (K6 machines). Otherwise use SSE > + prefetch as it allows specifying of locality. */ > + > + if (write) > + { > + if (TARGET_PREFETCHWT1) > + operands[2] = GEN_INT (MAX (locality, 2)); > + else if (TARGET_PRFCHW) > + operands[2] = GEN_INT (3); > + else if (TARGET_3DNOW && !TARGET_SSE2) > + operands[2] = GEN_INT (3); > + else if (TARGET_PREFETCH_SSE) > + operands[1] = const0_rtx; > + else > + { > + gcc_assert (TARGET_3DNOW); > + operands[2] = GEN_INT (3); > + } > + } > else > { > - gcc_assert (TARGET_3DNOW); > - operands[2] = GEN_INT (3); > + if (TARGET_PREFETCH_SSE) > + ; > + else > + { > + gcc_assert (TARGET_3DNOW); > + operands[2] = GEN_INT (3); > + } > } > } > else > { > - if (TARGET_PREFETCH_SSE) > + /* GOT/PLT_PIC should not be available for instruction prefetch. > + It must be real instruction address. */ > + if (TARGET_PREFETCHI && TARGET_64BIT > + && local_func_symbolic_operand (operands[0], GET_MODE (operands[0]))) > ; > else > { > - gcc_assert (TARGET_3DNOW); > - operands[2] = GEN_INT (3); > + /* Ignore the hint. */ > + warning (0, "instruction prefetch applies when in 64-bit mode" > + " with RIP-relative addressing and" > + " option %<-mprefetchi%>;" > + " they stay NOPs otherwise"); > + emit_insn (gen_nop ()); > + DONE; > } > } > }) > @@ -23733,6 +23751,28 @@ > (symbol_ref "memory_address_length (operands[0], false)")) > (set_attr "memory" "none")]) > > +(define_insn "*prefetch_i" > + [(prefetch (match_operand 0 "local_func_symbolic_operand" "p") > + (const_int 0) > + (match_operand:SI 1 "const_int_operand") > + (const_int 0))] > + "TARGET_PREFETCHI" > +{ > + static const char * const patterns[2] = { > + "prefetchit1\t%a0", "prefetchit0\t%a0" > + }; > + > + int locality = INTVAL (operands[1]); > + gcc_assert (IN_RANGE (locality, 2, 3)); > + > + return patterns[locality - 2]; > +} > + [(set_attr "type" "sse") > + (set_attr "atom_sse_attr" "prefetch") > + (set (attr "length_address") > + (symbol_ref "memory_address_length (operands[0], false)")) > + (set_attr "memory" "none")]) > + > (define_expand "stack_protect_set" > [(match_operand 0 "memory_operand") > (match_operand 1 "memory_operand")] > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index eaa43946341..1d91103cd54 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -1238,3 +1238,7 @@ CMPCCXADD build-in functions and code generation. > mamx-fp16 > Target Mask(ISA2_AMX_FP16) Var(ix86_isa_flags2) Save > Support AMX-FP16 built-in functions and code generation. > + > +mprefetchi > +Target Mask(ISA2_PREFETCHI) Var(ix86_isa_flags2) Save > +Support PREFETCHI built-in functions and code generation. > diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h > index d8415863f52..ac6402653e0 100644 > --- a/gcc/config/i386/immintrin.h > +++ b/gcc/config/i386/immintrin.h > @@ -134,6 +134,8 @@ > > #include <amxbf16intrin.h> > > +#include <prfchiintrin.h> > + > #include <prfchwintrin.h> > > #include <keylockerintrin.h> > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > index c4141a96735..2a3f07224cc 100644 > --- a/gcc/config/i386/predicates.md > +++ b/gcc/config/i386/predicates.md > @@ -610,6 +610,21 @@ > return false; > }) > > +(define_predicate "local_func_symbolic_operand" > + (match_operand 0 "local_symbolic_operand") > +{ > + if (GET_CODE (op) == CONST > + && GET_CODE (XEXP (op, 0)) == PLUS > + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) > + op = XEXP (XEXP (op, 0), 0); > + > + if (GET_CODE (op) == SYMBOL_REF > + && !SYMBOL_REF_FUNCTION_P (op)) > + return false; > + > + return true; > +})
Will it return true for any memory address? I think we should support code label and check for SYMBOL_REF_LOCAL_P. -- H.J.