Hi! MEM_REFs which can represent type punning on lhs don't force non-gimple types to be addressable. This causes various problems in the expander, which wasn't prepared to handle that.
This patch tries to fix what I've found and adds a bunch of testcases. The original report was with just -O2 on some large testcase from Eigen, most of the testcases have -fno-tree-sra just because I've given up on delta when it stopped reducing at 32KB. The first problem (the one from Eigen) is _mm_store_pd into a std::complex<double> var, which is a single field and thus has DCmode TYPE_MODE. As starting with 4.6 that var is not TREE_ADDRESSABLE, its DECL_RTL is a CONCAT, and for assignments to concat expand_assignment was expecting either that from has COMPLEX_TYPE (and matching mode to the store), or that it is a real or imaginary subpart store, thus when trying to store a V2DF mode value it expected it to be real part store (bitpos == 0) and tried to set a DFmode pseudo from V2DFmode rtx. Further testing revealed that it is possible to hit many other cases with CONCAT destination, it can be a store of just a few bits, or can overlap parts of both real and imaginary, or be partially out of bounds. The patch handles the case from Eigen - bitpos == 0 bitsize == GET_MODE_BITSIZE (GET_MODE (to_rtx)) non-COMPLEX_TYPE by setting each half separately, if it is a store which is not touching one of the parts by just adjusting bitpos for the imaginary case and storing just to one of the parts (this is the bitpos + bitsize < half_bitsize resp. bitpos >= half_bitsize case) and finally adds a generic slow one for the very unusual cases with partial overlap of both. After testing it with the testcases I wrote, I found a bunch of other ICEs though, and reproduced them even without CONCAT on the LHS (the testcases below which don't contain any _Complex keyword). Bootstrapped/regtested on x86_64-linux and i686-linux, regtested with a cross compiler on these new testcases also for powerpc{,64}-linux and s390{,x}-linux. Ok for trunk and after a while for 4.6? 2011-03-30 Jakub Jelinek <ja...@redhat.com> PR middle-end/48335 * expr.c (expand_assignment): Handle all possibilities if TO_RTX is CONCAT. * expmed.c (store_bit_field_1): Avoid trying to create invalid SUBREGs. (store_split_bit_field): If SUBREG_REG (op0) or op0 itself has smaller mode than word, return it for offset 0 and const0_rtx for out of bounds stores. If word is const0_rtx, skip it. * gcc.c-torture/compile/pr48335-1.c: New test. * gcc.dg/pr48335-1.c: New test. * gcc.dg/pr48335-2.c: New test. * gcc.dg/pr48335-3.c: New test. * gcc.dg/pr48335-4.c: New test. * gcc.dg/pr48335-5.c: New test. * gcc.dg/pr48335-6.c: New test. * gcc.dg/pr48335-7.c: New test. * gcc.dg/pr48335-8.c: New test. * gcc.target/i386/pr48335-1.c: New test. --- gcc/expr.c.jj 2011-03-23 17:15:55.000000000 +0100 +++ gcc/expr.c 2011-03-30 11:38:15.000000000 +0200 @@ -4278,16 +4278,47 @@ expand_assignment (tree to, tree from, b /* Handle expand_expr of a complex value returning a CONCAT. */ else if (GET_CODE (to_rtx) == CONCAT) { - if (COMPLEX_MODE_P (TYPE_MODE (TREE_TYPE (from)))) + unsigned short mode_bitsize = GET_MODE_BITSIZE (GET_MODE (to_rtx)); + if (COMPLEX_MODE_P (TYPE_MODE (TREE_TYPE (from))) + && bitpos == 0 + && bitsize == mode_bitsize) + result = store_expr (from, to_rtx, false, nontemporal); + else if (bitsize == mode_bitsize / 2 + && (bitpos == 0 || bitpos == GET_MODE_BITSIZE (mode1))) + result = store_expr (from, XEXP (to_rtx, bitpos != 0), false, + nontemporal); + else if (bitpos + bitsize <= mode_bitsize / 2) + result = store_field (XEXP (to_rtx, 0), bitsize, bitpos, + mode1, from, TREE_TYPE (tem), + get_alias_set (to), nontemporal); + else if (bitpos >= mode_bitsize / 2) + result = store_field (XEXP (to_rtx, 1), bitsize, + bitpos - mode_bitsize / 2, mode1, from, + TREE_TYPE (tem), get_alias_set (to), + nontemporal); + else if (bitpos == 0 && bitsize == mode_bitsize) { - gcc_assert (bitpos == 0); - result = store_expr (from, to_rtx, false, nontemporal); + rtx from_rtx = expand_normal (from); + from_rtx = simplify_gen_subreg (GET_MODE (to_rtx), from_rtx, + TYPE_MODE (TREE_TYPE (from)), 0); + emit_move_insn (XEXP (to_rtx, 0), + read_complex_part (from_rtx, false)); + emit_move_insn (XEXP (to_rtx, 1), + read_complex_part (from_rtx, true)); + result = NULL; } else { - gcc_assert (bitpos == 0 || bitpos == GET_MODE_BITSIZE (mode1)); - result = store_expr (from, XEXP (to_rtx, bitpos != 0), false, - nontemporal); + rtx temp = assign_stack_temp (GET_MODE (to_rtx), + GET_MODE_SIZE (GET_MODE (to_rtx)), + 0); + write_complex_part (temp, XEXP (to_rtx, 0), false); + write_complex_part (temp, XEXP (to_rtx, 1), true); + result = store_field (temp, bitsize, bitpos, mode1, from, + TREE_TYPE (tem), get_alias_set (to), + nontemporal); + emit_move_insn (XEXP (to_rtx, 0), read_complex_part (temp, false)); + emit_move_insn (XEXP (to_rtx, 1), read_complex_part (temp, true)); } } else --- gcc/expmed.c.jj 2011-03-23 17:15:55.000000000 +0100 +++ gcc/expmed.c 2011-03-30 13:37:54.000000000 +0200 @@ -422,7 +422,10 @@ store_bit_field_1 (rtx str_rtx, unsigned && byte_offset % GET_MODE_SIZE (fieldmode) == 0) : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0)) || (offset * BITS_PER_UNIT % bitsize == 0 - && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0)))) + && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))) + && (MEM_P (op0) + || GET_MODE (op0) == fieldmode + || validate_subreg (fieldmode, GET_MODE (op0), op0, byte_offset))) { if (MEM_P (op0)) op0 = adjust_address (op0, fieldmode, offset); @@ -479,6 +482,7 @@ store_bit_field_1 (rtx str_rtx, unsigned struct expand_operand ops[2]; enum insn_code icode = optab_handler (movstrict_optab, fieldmode); rtx arg0 = op0; + unsigned HOST_WIDE_INT subreg_off; if (GET_CODE (arg0) == SUBREG) { @@ -491,15 +495,18 @@ store_bit_field_1 (rtx str_rtx, unsigned arg0 = SUBREG_REG (arg0); } - arg0 = gen_rtx_SUBREG (fieldmode, arg0, - (bitnum % BITS_PER_WORD) / BITS_PER_UNIT - + (offset * UNITS_PER_WORD)); - - create_fixed_operand (&ops[0], arg0); - /* Shrink the source operand to FIELDMODE. */ - create_convert_operand_to (&ops[1], value, fieldmode, false); - if (maybe_expand_insn (icode, 2, ops)) - return true; + subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT + + (offset * UNITS_PER_WORD); + if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) + { + arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); + + create_fixed_operand (&ops[0], arg0); + /* Shrink the source operand to FIELDMODE. */ + create_convert_operand_to (&ops[1], value, fieldmode, false); + if (maybe_expand_insn (icode, 2, ops)) + return true; + } } /* Handle fields bigger than a word. */ @@ -1045,22 +1052,32 @@ store_split_bit_field (rtx op0, unsigned if (GET_CODE (op0) == SUBREG) { int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; - word = operand_subword_force (SUBREG_REG (op0), word_offset, - GET_MODE (SUBREG_REG (op0))); + enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0)); + if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD) + word = word_offset ? const0_rtx : op0; + else + word = operand_subword_force (SUBREG_REG (op0), word_offset, + GET_MODE (SUBREG_REG (op0))); offset = 0; } else if (REG_P (op0)) { - word = operand_subword_force (op0, offset, GET_MODE (op0)); + enum machine_mode op0_mode = GET_MODE (op0); + if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD) + word = offset ? const0_rtx : op0; + else + word = operand_subword_force (op0, offset, GET_MODE (op0)); offset = 0; } else word = op0; /* OFFSET is in UNITs, and UNIT is in bits. - store_fixed_bit_field wants offset in bytes. */ - store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize, - thispos, part); + store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx, + it is jut an out of bounds access. Ignore it. */ + if (word != const0_rtx) + store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize, + thispos, part); bitsdone += thissize; } } --- gcc/testsuite/gcc.c-torture/compile/pr48335-1.c.jj 2011-03-30 11:32:24.000000000 +0200 +++ gcc/testsuite/gcc.c-torture/compile/pr48335-1.c 2011-03-30 11:31:47.000000000 +0200 @@ -0,0 +1,41 @@ +/* PR middle-end/48335 */ + +struct S { float d; }; + +void bar (struct S); + +void +f0 (int x) +{ + struct S s = {.d = 0.0f }; + ((char *) &s.d)[0] = x; + s.d *= 7.0; + bar (s); +} + +void +f1 (int x) +{ + struct S s = {.d = 0.0f }; + ((char *) &s.d)[1] = x; + s.d *= 7.0; + bar (s); +} + +void +f2 (int x) +{ + struct S s = {.d = 0.0f }; + ((char *) &s.d)[2] = x; + s.d *= 7.0; + bar (s); +} + +void +f3 (int x) +{ + struct S s = {.d = 0.0f }; + ((char *) &s.d)[3] = x; + s.d *= 7.0; + bar (s); +} --- gcc/testsuite/gcc.dg/pr48335-1.c.jj 2011-03-30 10:57:29.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr48335-1.c 2011-03-29 18:28:03.000000000 +0200 @@ -0,0 +1,48 @@ +/* PR middle-end/48335 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra" } */ + +typedef long long T __attribute__((may_alias)); + +struct S +{ + _Complex float d __attribute__((aligned (8))); +}; + +void bar (struct S); + +void +f1 (T x) +{ + struct S s; + *(T *) &s.d = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f2 (int x) +{ + struct S s = { .d = 0.0f }; + *(char *) &s.d = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f3 (int x) +{ + struct S s = { .d = 0.0f }; + ((char *) &s.d)[2] = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f4 (int x, int y) +{ + struct S s = { .d = 0.0f }; + ((char *) &s.d)[y] = x; + __real__ s.d *= 7.0; + bar (s); +} --- gcc/testsuite/gcc.dg/pr48335-2.c.jj 2011-03-30 10:57:29.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr48335-2.c 2011-03-29 18:27:53.000000000 +0200 @@ -0,0 +1,58 @@ +/* PR middle-end/48335 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra" } */ + +typedef long long T __attribute__((may_alias, aligned (1))); +typedef short U __attribute__((may_alias, aligned (1))); + +struct S +{ + _Complex float d __attribute__((aligned (8))); +}; + +void bar (struct S); + +void +f1 (T x) +{ + struct S s; + *(T *) ((char *) &s.d + 1) = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f2 (int x) +{ + struct S s = { .d = 0.0f }; + ((U *)((char *) &s.d + 1))[0] = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f3 (int x) +{ + struct S s = { .d = 0.0f }; + ((U *)((char *) &s.d + 1))[1] = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f4 (int x) +{ + struct S s = { .d = 0.0f }; + ((U *)((char *) &s.d + 1))[2] = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f5 (int x) +{ + struct S s = { .d = 0.0f }; + ((U *)((char *) &s.d + 1))[3] = x; + __real__ s.d *= 7.0; + bar (s); +} --- gcc/testsuite/gcc.dg/pr48335-3.c.jj 2011-03-30 11:43:38.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr48335-3.c 2011-03-30 11:42:54.000000000 +0200 @@ -0,0 +1,48 @@ +/* PR middle-end/48335 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra" } */ + +typedef short U __attribute__((may_alias, aligned (1))); + +struct S +{ + double d; +}; + +void bar (struct S); + +void +f1 (int x) +{ + struct S s = { .d = 0.0 }; + ((U *)((char *) &s.d + 1))[0] = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f2 (int x) +{ + struct S s = { .d = 0.0 }; + ((U *)((char *) &s.d + 1))[1] = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f3 (int x) +{ + struct S s = { .d = 0.0 }; + ((U *)((char *) &s.d + 1))[2] = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f4 (int x) +{ + struct S s = { .d = 0.0 }; + ((U *)((char *) &s.d + 1))[3] = x; + __real__ s.d *= 7.0; + bar (s); +} --- gcc/testsuite/gcc.dg/pr48335-4.c.jj 2011-03-30 12:59:11.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr48335-4.c 2011-03-30 12:55:51.000000000 +0200 @@ -0,0 +1,39 @@ +/* PR middle-end/48335 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra" } */ + +typedef short U __attribute__((may_alias, aligned (1))); + +struct S +{ + double d; +}; + +void bar (struct S); + +void +f1 (int x) +{ + struct S s = { .d = 0.0 }; + ((U *)((char *) &s.d + 1))[-1] = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f2 (int x) +{ + struct S s = { .d = 0.0 }; + ((U *)((char *) &s.d + 1))[-2] = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f3 (int x) +{ + struct S s = { .d = 0.0 }; + ((U *)((char *) &s.d + 1))[5] = x; + __real__ s.d *= 7.0; + bar (s); +} --- gcc/testsuite/gcc.dg/pr48335-5.c.jj 2011-03-30 13:09:30.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr48335-5.c 2011-03-30 13:01:10.000000000 +0200 @@ -0,0 +1,38 @@ +/* PR middle-end/48335 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra" } */ + +typedef long long T __attribute__((may_alias)); + +struct S +{ + _Complex float d __attribute__((aligned (8))); +}; + +int +f1 (struct S x) +{ + struct S s = x; + return *(T *) &s.d; +} + +int +f2 (struct S x) +{ + struct S s = x; + return *(char *) &s.d; +} + +int +f3 (struct S x) +{ + struct S s = x; + return ((char *) &s.d)[2]; +} + +int +f4 (struct S x, int y) +{ + struct S s = x; + return ((char *) &s.d)[y]; +} --- gcc/testsuite/gcc.dg/pr48335-6.c.jj 2011-03-30 13:09:30.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr48335-6.c 2011-03-30 13:04:58.000000000 +0200 @@ -0,0 +1,46 @@ +/* PR middle-end/48335 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra" } */ + +typedef long long T __attribute__((may_alias, aligned (1))); +typedef short U __attribute__((may_alias, aligned (1))); + +struct S +{ + _Complex float d __attribute__((aligned (8))); +}; + +T +f1 (struct S x) +{ + struct S s = x; + return *(T *) ((char *) &s.d + 1); +} + +int +f2 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[0]; +} + +int +f3 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[1]; +} + +int +f4 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[2]; +} + +int +f5 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[3]; +} --- gcc/testsuite/gcc.dg/pr48335-7.c.jj 2011-03-30 13:09:30.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr48335-7.c 2011-03-30 13:07:17.000000000 +0200 @@ -0,0 +1,38 @@ +/* PR middle-end/48335 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra" } */ + +typedef short U __attribute__((may_alias, aligned (1))); + +struct S +{ + double d; +}; + +int +f1 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[0]; +} + +int +f2 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[1]; +} + +int +f3 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[2]; +} + +int +f4 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[3]; +} --- gcc/testsuite/gcc.dg/pr48335-8.c.jj 2011-03-30 13:09:30.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr48335-8.c 2011-03-30 13:08:17.000000000 +0200 @@ -0,0 +1,31 @@ +/* PR middle-end/48335 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra" } */ + +typedef short U __attribute__((may_alias, aligned (1))); + +struct S +{ + double d; +}; + +int +f1 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[-1]; +} + +int +f2 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[-2]; +} + +int +f3 (struct S x) +{ + struct S s = x; + return ((U *)((char *) &s.d + 1))[5]; +} --- gcc/testsuite/gcc.target/i386/pr48335-1.c.jj 2011-03-30 10:57:16.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/pr48335-1.c 2011-03-29 18:28:12.000000000 +0200 @@ -0,0 +1,32 @@ +/* PR middle-end/48335 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra -msse2" } */ + +#include <emmintrin.h> + +typedef __float128 T __attribute__((may_alias)); + +struct S +{ + _Complex double d __attribute__((aligned (16))); +}; + +void bar (struct S); + +void +f1 (T x) +{ + struct S s; + *(T *) &s.d = x; + __real__ s.d *= 7.0; + bar (s); +} + +void +f2 (__m128d x) +{ + struct S s; + _mm_store_pd ((double *) &s.d, x); + __real__ s.d *= 7.0; + bar (s); +} Jakub