Hi, For function with different target attributes, current logic rejects to inline the callee when any arch or tune is mismatched. Relax the condition to honor just prefer_vecotr_width_type and other flags that may cause safety issue so caller can get more optimization opportunity.
Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,} Ok for trunk? gcc/ChangeLog: * config/i386/i386.cc (ix86_can_inline_p): Do not check arch or tune directly, just check prefer_vector_width_type and make sure not to inline if they mismatch. gcc/testsuite/ChangeLog: * gcc.target/i386/inline-target-attr.c: New test. --- gcc/config/i386/i386.cc | 11 +++++---- .../gcc.target/i386/inline-target-attr.c | 24 +++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/inline-target-attr.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 0761965344b..1d86384ac06 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -605,11 +605,12 @@ ix86_can_inline_p (tree caller, tree callee) != (callee_opts->x_target_flags & ~always_inline_safe_mask)) ret = false; - /* See if arch, tune, etc. are the same. */ - else if (caller_opts->arch != callee_opts->arch) - ret = false; - - else if (!always_inline && caller_opts->tune != callee_opts->tune) + /* Do not inline when specified perfer-vector-width mismatched between + callee and caller. */ + else if ((callee_opts->x_prefer_vector_width_type != PVW_NONE + && caller_opts->x_prefer_vector_width_type != PVW_NONE) + && callee_opts->x_prefer_vector_width_type + != caller_opts->x_prefer_vector_width_type) ret = false; else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath diff --git a/gcc/testsuite/gcc.target/i386/inline-target-attr.c b/gcc/testsuite/gcc.target/i386/inline-target-attr.c new file mode 100644 index 00000000000..995502165f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/inline-target-attr.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "call\[ \t\]callee" } } */ + +__attribute__((target("arch=skylake"))) +int callee (int n) +{ + int sum = 0; + for (int i = 0; i < n; i++) + { + if (i % 2 == 0) + sum +=i; + else + sum += (i - 1); + } + return sum + n; +} + +__attribute__((target("arch=icelake-server"))) +int caller (int n) +{ + return callee (n) + n; +} + -- 2.31.1