Hi uros:
This patch is about to fix inefficient vector constructor.
Currently in ix86_expand_vector_init_concat, vector are initialized
per 2 elements which can miss some optimization opportunity like
pr92295.
Bootstrap and i386 regression test is ok.
Ok for trunk?
Changelog
gcc/
PR target/92295
* config/i386/i386-expand.c (ix86_expand_vector_init_concat)
Enhance ix86_expand_vector_init_concat.
gcc/testsuite
* gcc.target/i386/pr92295.c: New test.
--
BR,
Hongtao
From 408fb093993f9df4da42d8daf2e6996f087c4618 Mon Sep 17 00:00:00 2001
From: liuhongt
Date: Thu, 31 Oct 2019 15:14:00 +
Subject: [PATCH] Enhance ix86_expand_vector_init_concat.
Changelog
gcc/
PR target/92295
* config/i386/i386-expand.c (ix86_expand_vector_init_concat)
Enhance ix86_expand_vector_init_concat.
gcc/testsuite
* gcc.target/i386/pr92295.c: New test.
---
gcc/config/i386/i386-expand.c | 130 ++--
gcc/testsuite/gcc.target/i386/pr92295.c | 13 +++
2 files changed, 65 insertions(+), 78 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr92295.c
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 6d3d14c37dd..be040a1bc3e 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -13654,8 +13654,8 @@ static void
ix86_expand_vector_init_concat (machine_mode mode,
rtx target, rtx *ops, int n)
{
- machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
- rtx first[16], second[8], third[4];
+ machine_mode half_mode = VOIDmode;
+ rtx half[2];
rtvec v;
int i, j;
@@ -13665,55 +13665,55 @@ ix86_expand_vector_init_concat (machine_mode mode,
switch (mode)
{
case E_V16SImode:
- cmode = V8SImode;
+ half_mode = V8SImode;
break;
case E_V16SFmode:
- cmode = V8SFmode;
+ half_mode = V8SFmode;
break;
case E_V8DImode:
- cmode = V4DImode;
+ half_mode = V4DImode;
break;
case E_V8DFmode:
- cmode = V4DFmode;
+ half_mode = V4DFmode;
break;
case E_V8SImode:
- cmode = V4SImode;
+ half_mode = V4SImode;
break;
case E_V8SFmode:
- cmode = V4SFmode;
+ half_mode = V4SFmode;
break;
case E_V4DImode:
- cmode = V2DImode;
+ half_mode = V2DImode;
break;
case E_V4DFmode:
- cmode = V2DFmode;
+ half_mode = V2DFmode;
break;
case E_V4SImode:
- cmode = V2SImode;
+ half_mode = V2SImode;
break;
case E_V4SFmode:
- cmode = V2SFmode;
+ half_mode = V2SFmode;
break;
case E_V2DImode:
- cmode = DImode;
+ half_mode = DImode;
break;
case E_V2SImode:
- cmode = SImode;
+ half_mode = SImode;
break;
case E_V2DFmode:
- cmode = DFmode;
+ half_mode = DFmode;
break;
case E_V2SFmode:
- cmode = SFmode;
+ half_mode = SFmode;
break;
default:
gcc_unreachable ();
}
- if (!register_operand (ops[1], cmode))
- ops[1] = force_reg (cmode, ops[1]);
- if (!register_operand (ops[0], cmode))
- ops[0] = force_reg (cmode, ops[0]);
+ if (!register_operand (ops[1], half_mode))
+ ops[1] = force_reg (half_mode, ops[1]);
+ if (!register_operand (ops[0], half_mode))
+ ops[0] = force_reg (half_mode, ops[0]);
emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
ops[1])));
break;
@@ -13722,16 +13722,16 @@ ix86_expand_vector_init_concat (machine_mode mode,
switch (mode)
{
case E_V4DImode:
- cmode = V2DImode;
+ half_mode = V2DImode;
break;
case E_V4DFmode:
- cmode = V2DFmode;
+ half_mode = V2DFmode;
break;
case E_V4SImode:
- cmode = V2SImode;
+ half_mode = V2SImode;
break;
case E_V4SFmode:
- cmode = V2SFmode;
+ half_mode = V2SFmode;
break;
default:
gcc_unreachable ();
@@ -13742,20 +13742,16 @@ ix86_expand_vector_init_concat (machine_mode mode,
switch (mode)
{
case E_V8DImode:
- cmode = V2DImode;
- hmode = V4DImode;
+ half_mode = V4DImode;
break;
case E_V8DFmode:
- cmode = V2DFmode;
- hmode = V4DFmode;
+ half_mode = V4DFmode;
break;
case E_V8SImode:
- cmode = V2SImode;
- hmode = V4SImode;
+ half_mode = V4SImode;
break;
case E_V8SFmode:
- cmode = V2SFmode;
- hmode = V4SFmode;
+ half_mode = V4SFmode;
break;
default:
gcc_unreachable ();
@@ -13766,14 +13762,10 @@ ix86_expand_vector_init_concat (machine_mode mode,
switch (mode)
{
case E_V16SImode:
- cmode = V2SImode;
- hmode = V4SImode;
- gmode = V8SImode;
+ half_mode = V8SImode;
break;
case E_V16SFmode:
- cmode = V2SFmode;
- hmode = V4SFmode;
- gmode = V8SFmode;
+ half_mode = V8SFmode;
break;
default:
gcc_unreachable ();
@@ -13783,50 +13775,32 @@ ix86_expand_vector_init_concat (machine_mode mode,
half:
/* FIXME: We process inputs backward to help RA. PR 36222. */
i = n - 1;
- j = (n >> 1) - 1;
- for (; i > 0; i -= 2, j--)
- {
- first[j] = gen_reg_rtx (cmode);
- v = gen_rtvec (2, ops[i - 1], ops[i]);
- ix86_expand_vecto