Hi!
As discussed in the PR, gen_reg_rtx from when init_emit has not been
initialized is highly undesirable. The following patch makes sure that
for d->testing_p we never call gen_reg_rtx (i.e. from within
ix86_vectorize_vec_perm_const_ok) and never try to emit insns.
Bootstrapped/regtested on x86_64-linux and i686-linux (together with Uros'
patch to assert that gen_reg_rtx is not called when init_emit is not
active) with RTL checking, further tested with
GCC_TEST_RUN_EXPENSIVE=1 make -j16 -k check
RUNTESTFLAGS='--target_board=unix\{-msse2,-msse3,-mssse3,-msse4,-mavx,-mavx2,-mavx512f\}
dg-torture.exp=*vshuf*'
(on AVX HW, so -mavx2 and -mavx512f tests expectedly failed execution,
but at least didn't fail compilation, with the exception of
gcc.dg/torture/vshuf-v8sf.c which ICEs with -mavx2 -DEXPENSIVE, but
both without this patch and with this patch - will look at it eventually).
Ok for trunk (and the attached patch for 4.8 branch where Uros has tested
it)?
2014-02-20 Jakub Jelinek <[email protected]>
PR target/57896
* config/i386/i386.c (expand_vec_perm_interleave2): Don't call
gen_reg_rtx if d->testing_p.
(expand_vec_perm_pshufb2, expand_vec_perm_broadcast_1): Return early
if d->testing_p and we will certainly return true.
(expand_vec_perm_even_odd_1): Likewise. Don't call gen_reg_rtx
if d->testing_p.
--- gcc/config/i386/i386.c.jj 2014-02-19 19:11:03.600211257 +0100
+++ gcc/config/i386/i386.c 2014-02-20 12:57:30.857266155 +0100
@@ -43411,8 +43411,11 @@ expand_vec_perm_interleave2 (struct expa
else
dfinal.perm[i] = e;
}
- dremap.target = gen_reg_rtx (dremap.vmode);
- dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
+ if (!d->testing_p)
+ {
+ dremap.target = gen_reg_rtx (dremap.vmode);
+ dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
+ }
dfinal.op1 = dfinal.op0;
dfinal.one_operand_p = true;
@@ -43845,6 +43848,9 @@ expand_vec_perm_pshufb2 (struct expand_v
return false;
gcc_assert (!d->one_operand_p);
+ if (d->testing_p)
+ return true;
+
nelt = d->nelt;
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
@@ -44053,6 +44059,8 @@ expand_vec_perm_even_odd_1 (struct expan
switch (d->vmode)
{
case V4DFmode:
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V4DFmode);
t2 = gen_reg_rtx (V4DFmode);
@@ -44072,6 +44080,8 @@ expand_vec_perm_even_odd_1 (struct expan
{
int mask = odd ? 0xdd : 0x88;
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V8SFmode);
t2 = gen_reg_rtx (V8SFmode);
t3 = gen_reg_rtx (V8SFmode);
@@ -44113,6 +44123,8 @@ expand_vec_perm_even_odd_1 (struct expan
return expand_vec_perm_pshufb2 (d);
else
{
+ if (d->testing_p)
+ break;
/* We need 2*log2(N)-1 operations to achieve odd/even
with interleave. */
t1 = gen_reg_rtx (V8HImode);
@@ -44134,6 +44146,8 @@ expand_vec_perm_even_odd_1 (struct expan
return expand_vec_perm_pshufb2 (d);
else
{
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V16QImode);
t2 = gen_reg_rtx (V16QImode);
t3 = gen_reg_rtx (V16QImode);
@@ -44160,7 +44174,10 @@ expand_vec_perm_even_odd_1 (struct expan
{
struct expand_vec_perm_d d_copy = *d;
d_copy.vmode = V4DFmode;
- d_copy.target = gen_reg_rtx (V4DFmode);
+ if (d->testing_p)
+ d_copy.target = gen_lowpart (V4DFmode, d->target);
+ else
+ d_copy.target = gen_reg_rtx (V4DFmode);
d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44173,6 +44190,9 @@ expand_vec_perm_even_odd_1 (struct expan
return false;
}
+ if (d->testing_p)
+ break;
+
t1 = gen_reg_rtx (V4DImode);
t2 = gen_reg_rtx (V4DImode);
@@ -44193,7 +44213,10 @@ expand_vec_perm_even_odd_1 (struct expan
{
struct expand_vec_perm_d d_copy = *d;
d_copy.vmode = V8SFmode;
- d_copy.target = gen_reg_rtx (V8SFmode);
+ if (d->testing_p)
+ d_copy.target = gen_lowpart (V8SFmode, d->target);
+ else
+ d_copy.target = gen_reg_rtx (V8SFmode);
d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44206,6 +44229,9 @@ expand_vec_perm_even_odd_1 (struct expan
return false;
}
+ if (d->testing_p)
+ break;
+
t1 = gen_reg_rtx (V8SImode);
t2 = gen_reg_rtx (V8SImode);
t3 = gen_reg_rtx (V4DImode);
@@ -44298,6 +44324,8 @@ expand_vec_perm_broadcast_1 (struct expa
case V16QImode:
/* These can be implemented via interleave. We save one insn by
stopping once we have promoted to V4SImode and then use pshufd. */
+ if (d->testing_p)
+ return true;
do
{
rtx dest;
Jakub
2014-02-20 Jakub Jelinek <[email protected]>
PR target/57896
* config/i386/i386.c (expand_vec_perm_interleave2): Don't call
gen_reg_rtx if d->testing_p.
(expand_vec_perm_pshufb2, expand_vec_perm_even_odd_1,
expand_vec_perm_broadcast_1): Return early if d->testing_p and
we will certainly return true.
--- gcc/config/i386/i386.c.jj 2014-02-10 15:12:58.000000000 +0100
+++ gcc/config/i386/i386.c 2014-02-20 12:28:37.320252911 +0100
@@ -39414,7 +39414,9 @@ expand_vec_perm_interleave2 (struct expa
else
dfinal.perm[i] = e;
}
- dfinal.op0 = gen_reg_rtx (dfinal.vmode);
+
+ if (!d->testing_p)
+ dfinal.op0 = gen_reg_rtx (dfinal.vmode);
dfinal.op1 = dfinal.op0;
dfinal.one_operand_p = true;
dremap.target = dfinal.op0;
@@ -39849,6 +39851,9 @@ expand_vec_perm_pshufb2 (struct expand_v
return false;
gcc_assert (!d->one_operand_p);
+ if (d->testing_p)
+ return true;
+
nelt = d->nelt;
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
@@ -40048,6 +40053,8 @@ expand_vec_perm_even_odd_1 (struct expan
switch (d->vmode)
{
case V4DFmode:
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V4DFmode);
t2 = gen_reg_rtx (V4DFmode);
@@ -40067,6 +40074,8 @@ expand_vec_perm_even_odd_1 (struct expan
{
int mask = odd ? 0xdd : 0x88;
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V8SFmode);
t2 = gen_reg_rtx (V8SFmode);
t3 = gen_reg_rtx (V8SFmode);
@@ -40108,6 +40117,8 @@ expand_vec_perm_even_odd_1 (struct expan
return expand_vec_perm_pshufb2 (d);
else
{
+ if (d->testing_p)
+ break;
/* We need 2*log2(N)-1 operations to achieve odd/even
with interleave. */
t1 = gen_reg_rtx (V8HImode);
@@ -40129,6 +40140,8 @@ expand_vec_perm_even_odd_1 (struct expan
return expand_vec_perm_pshufb2 (d);
else
{
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V16QImode);
t2 = gen_reg_rtx (V16QImode);
t3 = gen_reg_rtx (V16QImode);
@@ -40161,6 +40174,9 @@ expand_vec_perm_even_odd_1 (struct expan
return expand_vec_perm_even_odd_1 (&d_copy, odd);
}
+ if (d->testing_p)
+ break;
+
t1 = gen_reg_rtx (V4DImode);
t2 = gen_reg_rtx (V4DImode);
@@ -40187,6 +40203,9 @@ expand_vec_perm_even_odd_1 (struct expan
return expand_vec_perm_even_odd_1 (&d_copy, odd);
}
+ if (d->testing_p)
+ break;
+
t1 = gen_reg_rtx (V8SImode);
t2 = gen_reg_rtx (V8SImode);
@@ -40279,6 +40298,8 @@ expand_vec_perm_broadcast_1 (struct expa
case V16QImode:
/* These can be implemented via interleave. We save one insn by
stopping once we have promoted to V4SImode and then use pshufd. */
+ if (d->testing_p)
+ return true;
do
{
rtx dest;