https://gcc.gnu.org/g:ac306de7d5100d3682eae2270995a9abbe19db38
commit r15-984-gac306de7d5100d3682eae2270995a9abbe19db38 Author: liuhongt <hongtao....@intel.com> Date: Fri May 31 14:38:07 2024 +0800 Add some preference for floating point rtl ifcvt when sse4.1 is not available W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por) for movdfcc/movsfcc, and could possibly fail cost comparison. Increase branch cost could hurt performance for other modes, so specially add some preference for floating point ifcvt. gcc/ChangeLog: PR target/115299 * config/i386/i386.cc (ix86_noce_conversion_profitable_p): Add some preference for floating point ifcvt when SSE4.1 is not available. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115299.c: New test. * gcc.target/i386/pr86722.c: Adjust testcase. Diff: --- gcc/config/i386/i386.cc | 17 +++++++++++++++++ gcc/testsuite/gcc.target/i386/pr115299.c | 10 ++++++++++ gcc/testsuite/gcc.target/i386/pr86722.c | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 1a0206ab573..271da127a89 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -24879,6 +24879,23 @@ ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) return false; } } + + /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por) + for movdfcc/movsfcc, and could possibly fail cost comparison. + Increase branch cost will hurt performance for other modes, so + specially add some preference for floating point ifcvt. */ + if (!TARGET_SSE4_1 && if_info->x + && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT + && if_info->speed_p) + { + unsigned cost = seq_cost (seq, true); + + if (cost <= if_info->original_cost) + return true; + + return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2)); + } + return default_noce_conversion_profitable_p (seq, if_info); } diff --git a/gcc/testsuite/gcc.target/i386/pr115299.c b/gcc/testsuite/gcc.target/i386/pr115299.c new file mode 100644 index 00000000000..53c5899136a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115299.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-sse4.1 -msse2" } */ + +void f(double*d,double*e){ + for(;d<e;++d) + *d=(*d<.5)?.7:0; +} + +/* { dg-final { scan-assembler {(?n)(?:cmpnltsd|cmpltsd)} } } */ +/* { dg-final { scan-assembler {(?n)(?:andnpd|andpd)} } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr86722.c b/gcc/testsuite/gcc.target/i386/pr86722.c index 4de2ca1a6c0..e266a1e56c2 100644 --- a/gcc/testsuite/gcc.target/i386/pr86722.c +++ b/gcc/testsuite/gcc.target/i386/pr86722.c @@ -6,5 +6,5 @@ void f(double*d,double*e){ *d=(*d<.5)?.7:0; } -/* { dg-final { scan-assembler-not "andnpd" } } */ +/* { dg-final { scan-assembler-times {(?n)(?:andnpd|andpd)} 1 } } */ /* { dg-final { scan-assembler-not "orpd" } } */