On Thu, Nov 7, 2024 at 10:29 AM MayShao-oc <[email protected]> wrote:
>
> Hi all:
> For zhaoxin, I find no improvement when enable pass_align_tight_loops,
> and have performance drop in some cases.
> This patch add a new tunable to bypass pass_align_tight_loops in zhaoxin.
>
> Bootstrapped X86_64.
> Ok for trunk?
> BR
> Mayshao
> gcc/ChangeLog:
>
> * config/i386/i386-features.cc (TARGET_ALIGN_TIGHT_LOOPS):
> default true in all processors except for zhaoxin.
> * config/i386/i386.h (TARGET_ALIGN_TIGHT_LOOPS): New Macro.
> * config/i386/x86-tune.def (X86_TUNE_ALIGN_TIGHT_LOOPS):
> New tune
> ---
> gcc/config/i386/i386-features.cc | 4 +++-
> gcc/config/i386/i386.h | 3 +++
> gcc/config/i386/x86-tune.def | 4 ++++
> 3 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/i386/i386-features.cc
> b/gcc/config/i386/i386-features.cc
> index e2e85212a4f..d9fd92964fe 100644
> --- a/gcc/config/i386/i386-features.cc
> +++ b/gcc/config/i386/i386-features.cc
> @@ -3620,7 +3620,9 @@ public:
> /* opt_pass methods: */
> bool gate (function *) final override
> {
> - return optimize && optimize_function_for_speed_p (cfun);
> + return TARGET_ALIGN_TIGHT_LOOPS
> + && optimize
> + && optimize_function_for_speed_p (cfun);
> }
>
> unsigned int execute (function *) final override
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 2dcd8803a08..7f9010246c2 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -466,6 +466,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
> #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
> #define TARGET_SSE_MOVCC_USE_BLENDV \
> ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
> +#define TARGET_ALIGN_TIGHT_LOOPS \
> + ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
> +
>
> /* Feature tests against the various architecture variations. */
> enum ix86_arch_indices {
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index 6ebb2fd3414..bd4fa8b3eee 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -542,6 +542,10 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
> DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
> "sse_movcc_use_blendv", ~m_CORE_ATOM)
>
> +/* X86_TUNE_ALIGN_TIGHT_LOOPS: if false, tight loops are not aligned. */
> +DEF_TUNE (X86_TUNE_ALIGN_TIGHT_LOOPS, "align_tight_loops",
> + ~(m_ZHAOXIN))
Please also add ~(m_ZHAOXIN | m_CASCADELAKE | m_SKYLAKE_AVX512))
And could you put it under the section of
/*****************************************************************************/
-/* Branch predictor tuning */
+/* Branch predictor and The Front-end tuning
*/
/*****************************************************************************/
> +
>
> /*****************************************************************************/
> /* AVX instruction selection tuning (some of SSE flags affects AVX, too)
> */
>
> /*****************************************************************************/
> --
> 2.27.0
>
--
BR,
Hongtao