Hi!

MEM_REFs which can represent type punning on lhs don't force
non-gimple types to be addressable.  This causes various problems
in the expander, which wasn't prepared to handle that.

This patch tries to fix what I've found and adds a bunch of
testcases.  The original report was with just -O2 on some large testcase
from Eigen, most of the testcases have -fno-tree-sra just because
I've given up on delta when it stopped reducing at 32KB.

The first problem (the one from Eigen) is _mm_store_pd into
a std::complex<double> var, which is a single field and thus
has DCmode TYPE_MODE.  As starting with 4.6 that var is not
TREE_ADDRESSABLE, its DECL_RTL is a CONCAT, and for assignments
to concat expand_assignment was expecting either that
from has COMPLEX_TYPE (and matching mode to the store), or
that it is a real or imaginary subpart store, thus when
trying to store a V2DF mode value it expected it to be
real part store (bitpos == 0) and tried to set a DFmode pseudo
from V2DFmode rtx.
Further testing revealed that it is possible to hit many other
cases with CONCAT destination, it can be a store of just a few bits,
or can overlap parts of both real and imaginary, or be partially
out of bounds.
The patch handles the case from Eigen - bitpos == 0 bitsize ==
GET_MODE_BITSIZE (GET_MODE (to_rtx)) non-COMPLEX_TYPE by setting
each half separately, if it is a store which is not touching
one of the parts by just adjusting bitpos for the imaginary
case and storing just to one of the parts (this is
the bitpos + bitsize < half_bitsize resp. bitpos >= half_bitsize
case) and finally adds a generic slow one for the very unusual
cases with partial overlap of both.

After testing it with the testcases I wrote, I found a bunch of
other ICEs though, and reproduced them even without CONCAT
on the LHS (the testcases below which don't contain any _Complex
keyword).

Bootstrapped/regtested on x86_64-linux and i686-linux,
regtested with a cross compiler on these new testcases also
for powerpc{,64}-linux and s390{,x}-linux.

Ok for trunk and after a while for 4.6?

2011-03-30  Jakub Jelinek  <ja...@redhat.com>

        PR middle-end/48335
        * expr.c (expand_assignment): Handle all possibilities
        if TO_RTX is CONCAT.
        * expmed.c (store_bit_field_1): Avoid trying to create
        invalid SUBREGs.
        (store_split_bit_field): If SUBREG_REG (op0) or
        op0 itself has smaller mode than word, return it
        for offset 0 and const0_rtx for out of bounds stores.
        If word is const0_rtx, skip it.

        * gcc.c-torture/compile/pr48335-1.c: New test.
        * gcc.dg/pr48335-1.c: New test.
        * gcc.dg/pr48335-2.c: New test.
        * gcc.dg/pr48335-3.c: New test.
        * gcc.dg/pr48335-4.c: New test.
        * gcc.dg/pr48335-5.c: New test.
        * gcc.dg/pr48335-6.c: New test.
        * gcc.dg/pr48335-7.c: New test.
        * gcc.dg/pr48335-8.c: New test.
        * gcc.target/i386/pr48335-1.c: New test.

--- gcc/expr.c.jj       2011-03-23 17:15:55.000000000 +0100
+++ gcc/expr.c  2011-03-30 11:38:15.000000000 +0200
@@ -4278,16 +4278,47 @@ expand_assignment (tree to, tree from, b
       /* Handle expand_expr of a complex value returning a CONCAT.  */
       else if (GET_CODE (to_rtx) == CONCAT)
        {
-         if (COMPLEX_MODE_P (TYPE_MODE (TREE_TYPE (from))))
+         unsigned short mode_bitsize = GET_MODE_BITSIZE (GET_MODE (to_rtx));
+         if (COMPLEX_MODE_P (TYPE_MODE (TREE_TYPE (from)))
+             && bitpos == 0
+             && bitsize == mode_bitsize)
+           result = store_expr (from, to_rtx, false, nontemporal);
+         else if (bitsize == mode_bitsize / 2
+                  && (bitpos == 0 || bitpos == GET_MODE_BITSIZE (mode1)))
+           result = store_expr (from, XEXP (to_rtx, bitpos != 0), false,
+                                nontemporal);
+         else if (bitpos + bitsize <= mode_bitsize / 2)
+           result = store_field (XEXP (to_rtx, 0), bitsize, bitpos,
+                                 mode1, from, TREE_TYPE (tem),
+                                 get_alias_set (to), nontemporal);
+         else if (bitpos >= mode_bitsize / 2)
+           result = store_field (XEXP (to_rtx, 1), bitsize,
+                                 bitpos - mode_bitsize / 2, mode1, from,
+                                 TREE_TYPE (tem), get_alias_set (to),
+                                 nontemporal);
+         else if (bitpos == 0 && bitsize == mode_bitsize)
            {
-             gcc_assert (bitpos == 0);
-             result = store_expr (from, to_rtx, false, nontemporal);
+             rtx from_rtx = expand_normal (from);
+             from_rtx = simplify_gen_subreg (GET_MODE (to_rtx), from_rtx,
+                                             TYPE_MODE (TREE_TYPE (from)), 0);
+             emit_move_insn (XEXP (to_rtx, 0),
+                             read_complex_part (from_rtx, false));
+             emit_move_insn (XEXP (to_rtx, 1),
+                             read_complex_part (from_rtx, true));
+             result = NULL;
            }
          else
            {
-             gcc_assert (bitpos == 0 || bitpos == GET_MODE_BITSIZE (mode1));
-             result = store_expr (from, XEXP (to_rtx, bitpos != 0), false,
-                                  nontemporal);
+             rtx temp = assign_stack_temp (GET_MODE (to_rtx),
+                                           GET_MODE_SIZE (GET_MODE (to_rtx)),
+                                           0);
+             write_complex_part (temp, XEXP (to_rtx, 0), false);
+             write_complex_part (temp, XEXP (to_rtx, 1), true);
+             result = store_field (temp, bitsize, bitpos, mode1, from,
+                                   TREE_TYPE (tem), get_alias_set (to),
+                                   nontemporal);
+             emit_move_insn (XEXP (to_rtx, 0), read_complex_part (temp, 
false));
+             emit_move_insn (XEXP (to_rtx, 1), read_complex_part (temp, true));
            }
        }
       else
--- gcc/expmed.c.jj     2011-03-23 17:15:55.000000000 +0100
+++ gcc/expmed.c        2011-03-30 13:37:54.000000000 +0200
@@ -422,7 +422,10 @@ store_bit_field_1 (rtx str_rtx, unsigned
             && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
          : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
             || (offset * BITS_PER_UNIT % bitsize == 0
-                && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
+                && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0)))
+      && (MEM_P (op0)
+         || GET_MODE (op0) == fieldmode
+         || validate_subreg (fieldmode, GET_MODE (op0), op0, byte_offset)))
     {
       if (MEM_P (op0))
        op0 = adjust_address (op0, fieldmode, offset);
@@ -479,6 +482,7 @@ store_bit_field_1 (rtx str_rtx, unsigned
       struct expand_operand ops[2];
       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
       rtx arg0 = op0;
+      unsigned HOST_WIDE_INT subreg_off;
 
       if (GET_CODE (arg0) == SUBREG)
        {
@@ -491,15 +495,18 @@ store_bit_field_1 (rtx str_rtx, unsigned
          arg0 = SUBREG_REG (arg0);
        }
 
-      arg0 = gen_rtx_SUBREG (fieldmode, arg0,
-                            (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
-                            + (offset * UNITS_PER_WORD));
-
-      create_fixed_operand (&ops[0], arg0);
-      /* Shrink the source operand to FIELDMODE.  */
-      create_convert_operand_to (&ops[1], value, fieldmode, false);
-      if (maybe_expand_insn (icode, 2, ops))
-       return true;
+      subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
+                  + (offset * UNITS_PER_WORD);
+      if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
+       {
+         arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
+
+         create_fixed_operand (&ops[0], arg0);
+         /* Shrink the source operand to FIELDMODE.  */
+         create_convert_operand_to (&ops[1], value, fieldmode, false);
+         if (maybe_expand_insn (icode, 2, ops))
+           return true;
+       }
     }
 
   /* Handle fields bigger than a word.  */
@@ -1045,22 +1052,32 @@ store_split_bit_field (rtx op0, unsigned
       if (GET_CODE (op0) == SUBREG)
        {
          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
-         word = operand_subword_force (SUBREG_REG (op0), word_offset,
-                                       GET_MODE (SUBREG_REG (op0)));
+         enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
+         if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
+           word = word_offset ? const0_rtx : op0;
+         else
+           word = operand_subword_force (SUBREG_REG (op0), word_offset,
+                                         GET_MODE (SUBREG_REG (op0)));
          offset = 0;
        }
       else if (REG_P (op0))
        {
-         word = operand_subword_force (op0, offset, GET_MODE (op0));
+         enum machine_mode op0_mode = GET_MODE (op0);
+         if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
+           word = offset ? const0_rtx : op0;
+         else
+           word = operand_subword_force (op0, offset, GET_MODE (op0));
          offset = 0;
        }
       else
        word = op0;
 
       /* OFFSET is in UNITs, and UNIT is in bits.
-         store_fixed_bit_field wants offset in bytes.  */
-      store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
-                            thispos, part);
+        store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
+        it is jut an out of bounds access.  Ignore it.  */
+      if (word != const0_rtx)
+       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
+                              thispos, part);
       bitsdone += thissize;
     }
 }
--- gcc/testsuite/gcc.c-torture/compile/pr48335-1.c.jj  2011-03-30 
11:32:24.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/compile/pr48335-1.c     2011-03-30 
11:31:47.000000000 +0200
@@ -0,0 +1,41 @@
+/* PR middle-end/48335 */
+
+struct S { float d; };
+
+void bar (struct S);
+
+void
+f0 (int x)
+{
+  struct S s = {.d = 0.0f };
+  ((char *) &s.d)[0] = x;
+  s.d *= 7.0;
+  bar (s);
+}
+
+void
+f1 (int x)
+{
+  struct S s = {.d = 0.0f };
+  ((char *) &s.d)[1] = x;
+  s.d *= 7.0;
+  bar (s);
+}
+
+void
+f2 (int x)
+{
+  struct S s = {.d = 0.0f };
+  ((char *) &s.d)[2] = x;
+  s.d *= 7.0;
+  bar (s);
+}
+
+void
+f3 (int x)
+{
+  struct S s = {.d = 0.0f };
+  ((char *) &s.d)[3] = x;
+  s.d *= 7.0;
+  bar (s);
+}
--- gcc/testsuite/gcc.dg/pr48335-1.c.jj 2011-03-30 10:57:29.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr48335-1.c    2011-03-29 18:28:03.000000000 +0200
@@ -0,0 +1,48 @@
+/* PR middle-end/48335 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-sra" } */
+
+typedef long long T __attribute__((may_alias));
+
+struct S
+{
+  _Complex float d __attribute__((aligned (8)));
+};
+
+void bar (struct S);
+
+void
+f1 (T x)
+{
+  struct S s;
+  *(T *) &s.d = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f2 (int x)
+{
+  struct S s = { .d = 0.0f };
+  *(char *) &s.d = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f3 (int x)
+{
+  struct S s = { .d = 0.0f };
+  ((char *) &s.d)[2] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f4 (int x, int y)
+{
+  struct S s = { .d = 0.0f };
+  ((char *) &s.d)[y] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
--- gcc/testsuite/gcc.dg/pr48335-2.c.jj 2011-03-30 10:57:29.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr48335-2.c    2011-03-29 18:27:53.000000000 +0200
@@ -0,0 +1,58 @@
+/* PR middle-end/48335 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-sra" } */
+
+typedef long long T __attribute__((may_alias, aligned (1)));
+typedef short U __attribute__((may_alias, aligned (1)));
+
+struct S
+{
+  _Complex float d __attribute__((aligned (8)));
+};
+
+void bar (struct S);
+
+void
+f1 (T x)
+{
+  struct S s;
+  *(T *) ((char *) &s.d + 1) = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f2 (int x)
+{
+  struct S s = { .d = 0.0f };
+  ((U *)((char *) &s.d + 1))[0] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f3 (int x)
+{
+  struct S s = { .d = 0.0f };
+  ((U *)((char *) &s.d + 1))[1] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f4 (int x)
+{
+  struct S s = { .d = 0.0f };
+  ((U *)((char *) &s.d + 1))[2] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f5 (int x)
+{
+  struct S s = { .d = 0.0f };
+  ((U *)((char *) &s.d + 1))[3] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
--- gcc/testsuite/gcc.dg/pr48335-3.c.jj 2011-03-30 11:43:38.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr48335-3.c    2011-03-30 11:42:54.000000000 +0200
@@ -0,0 +1,48 @@
+/* PR middle-end/48335 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-sra" } */
+
+typedef short U __attribute__((may_alias, aligned (1)));
+
+struct S
+{
+  double d;
+};
+
+void bar (struct S);
+
+void
+f1 (int x)
+{
+  struct S s = { .d = 0.0 };
+  ((U *)((char *) &s.d + 1))[0] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f2 (int x)
+{
+  struct S s = { .d = 0.0 };
+  ((U *)((char *) &s.d + 1))[1] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f3 (int x)
+{
+  struct S s = { .d = 0.0 };
+  ((U *)((char *) &s.d + 1))[2] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f4 (int x)
+{
+  struct S s = { .d = 0.0 };
+  ((U *)((char *) &s.d + 1))[3] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
--- gcc/testsuite/gcc.dg/pr48335-4.c.jj 2011-03-30 12:59:11.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr48335-4.c    2011-03-30 12:55:51.000000000 +0200
@@ -0,0 +1,39 @@
+/* PR middle-end/48335 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-sra" } */
+
+typedef short U __attribute__((may_alias, aligned (1)));
+
+struct S
+{
+  double d;
+};
+
+void bar (struct S);
+
+void
+f1 (int x)
+{
+  struct S s = { .d = 0.0 };
+  ((U *)((char *) &s.d + 1))[-1] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f2 (int x)
+{
+  struct S s = { .d = 0.0 };
+  ((U *)((char *) &s.d + 1))[-2] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f3 (int x)
+{
+  struct S s = { .d = 0.0 };
+  ((U *)((char *) &s.d + 1))[5] = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
--- gcc/testsuite/gcc.dg/pr48335-5.c.jj 2011-03-30 13:09:30.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr48335-5.c    2011-03-30 13:01:10.000000000 +0200
@@ -0,0 +1,38 @@
+/* PR middle-end/48335 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-sra" } */
+
+typedef long long T __attribute__((may_alias));
+
+struct S
+{
+  _Complex float d __attribute__((aligned (8)));
+};
+
+int
+f1 (struct S x)
+{
+  struct S s = x;
+  return *(T *) &s.d;
+}
+
+int
+f2 (struct S x)
+{
+  struct S s = x;
+  return *(char *) &s.d;
+}
+
+int
+f3 (struct S x)
+{
+  struct S s = x;
+  return ((char *) &s.d)[2];
+}
+
+int
+f4 (struct S x, int y)
+{
+  struct S s = x;
+  return ((char *) &s.d)[y];
+}
--- gcc/testsuite/gcc.dg/pr48335-6.c.jj 2011-03-30 13:09:30.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr48335-6.c    2011-03-30 13:04:58.000000000 +0200
@@ -0,0 +1,46 @@
+/* PR middle-end/48335 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-sra" } */
+
+typedef long long T __attribute__((may_alias, aligned (1)));
+typedef short U __attribute__((may_alias, aligned (1)));
+
+struct S
+{
+  _Complex float d __attribute__((aligned (8)));
+};
+
+T
+f1 (struct S x)
+{
+  struct S s = x;
+  return *(T *) ((char *) &s.d + 1);
+}
+
+int
+f2 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[0];
+}
+
+int
+f3 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[1];
+}
+
+int
+f4 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[2];
+}
+
+int
+f5 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[3];
+}
--- gcc/testsuite/gcc.dg/pr48335-7.c.jj 2011-03-30 13:09:30.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr48335-7.c    2011-03-30 13:07:17.000000000 +0200
@@ -0,0 +1,38 @@
+/* PR middle-end/48335 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-sra" } */
+
+typedef short U __attribute__((may_alias, aligned (1)));
+
+struct S
+{
+  double d;
+};
+
+int
+f1 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[0];
+}
+
+int
+f2 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[1];
+}
+
+int
+f3 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[2];
+}
+
+int
+f4 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[3];
+}
--- gcc/testsuite/gcc.dg/pr48335-8.c.jj 2011-03-30 13:09:30.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr48335-8.c    2011-03-30 13:08:17.000000000 +0200
@@ -0,0 +1,31 @@
+/* PR middle-end/48335 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-sra" } */
+
+typedef short U __attribute__((may_alias, aligned (1)));
+
+struct S
+{
+  double d;
+};
+
+int
+f1 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[-1];
+}
+
+int
+f2 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[-2];
+}
+
+int
+f3 (struct S x)
+{
+  struct S s = x;
+  return ((U *)((char *) &s.d + 1))[5];
+}
--- gcc/testsuite/gcc.target/i386/pr48335-1.c.jj        2011-03-30 
10:57:16.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/pr48335-1.c   2011-03-29 18:28:12.000000000 
+0200
@@ -0,0 +1,32 @@
+/* PR middle-end/48335 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-sra -msse2" } */
+
+#include <emmintrin.h>
+
+typedef __float128 T __attribute__((may_alias));
+
+struct S
+{
+  _Complex double d __attribute__((aligned (16)));
+};
+
+void bar (struct S);
+
+void
+f1 (T x)
+{
+  struct S s;
+  *(T *) &s.d = x;
+  __real__ s.d *= 7.0;
+  bar (s);
+}
+
+void
+f2 (__m128d x)
+{
+  struct S s;
+  _mm_store_pd ((double *) &s.d, x);
+  __real__ s.d *= 7.0;
+  bar (s);
+}

        Jakub

Reply via email to