PR 48660 is about an ICE on code like: template<int N> struct val { char a[N]; };
class Base { public: virtual val<2> get2() const = 0; }; class Derived : public virtual Base { public: virtual val<2> get2() const { return foo->get2(); } Base *foo; }; Base* make() { return new Derived; } The thunk for Derived::get2() ends with an assignment to the val<2> RESULT_DECL, followed by a return. The problem is that val<2> has mode BLKmode, but the RESULT_DECL is a HImode register (small structures are returned in registers on ARM). We then ICE while copying a BLKmode rhs into a HImode lhs. At first I thought this was a bug in the thunk expansion; I haven't been able to trigger it elsewhere. But Richard said on irc that this was valid gimple and should work. So this patch instead fixes the implicit FIXME in: The compiler currently can't handle copying a BLKmode value into registers. We could put this code in a more general area (for use by everyone instead of just function call/return), but until this feature is generally usable it is kept here (and in expand_call). */ I think the expand_call part is already out-of-date: I assume it's referring to copy_blkmode_from_reg, which handles moves in the opposite direction -- return register to BLKmode value -- and which is already a separate function. At the moment, copies into RESULT_DECL are handled by the: /* If the rhs is a function call and its value is not an aggregate, call the function before we start to compute the lhs. This is needed for correct code for cases such as val = setjmp (buf) on machines where reference to val requires loading up part of an address in a separate insn. Don't do this if TO is a VAR_DECL or PARM_DECL whose DECL_RTL is REG since it might be a promoted variable where the zero- or sign- extension needs to be done. Handling this in the normal way is safe because no computation is done before the call. The same is true for SSA names. */ if (TREE_CODE (from) == CALL_EXPR && ! aggregate_value_p (from, from) && COMPLETE_TYPE_P (TREE_TYPE (from)) && TREE_CODE (TYPE_SIZE (TREE_TYPE (from))) == INTEGER_CST && ! (((TREE_CODE (to) == VAR_DECL || TREE_CODE (to) == PARM_DECL) && REG_P (DECL_RTL (to))) || TREE_CODE (to) == SSA_NAME)) { rtx value; push_temp_slots (); value = expand_normal (from); if (to_rtx == 0) to_rtx = expand_expr (to, NULL_RTX, VOIDmode, EXPAND_WRITE); block of expand_assignment, but I think the REG_P exclusion that applies to VAR_DECLs and PARM_DECLs also applies to RESULT_DECLs. Tested on x86_64-linux-gnu and arm-linux-gnu. OK for trunk? What about release branches? This is a regression from 4.4 (which had the old thunk code). A conservative alternative for the branches might be to leave out the stmt.c part. Richard gcc/ * expr.h (copy_blkmode_to_reg): Declare. * expr.c (copy_blkmode_to_reg): New function. (expand_assignment): Don't expand register RESULT_DECLs before the lhs. Use copy_blkmode_to_reg to copy BLKmode values into a RESULT_DECL register. * stmt.c (expand_return): Move BLKmode-to-register code into copy_blkmode_to_reg. Index: gcc/expr.h =================================================================== --- gcc/expr.h 2011-09-08 11:06:35.314378369 +0100 +++ gcc/expr.h 2011-09-08 13:23:37.976613715 +0100 @@ -321,6 +321,8 @@ extern void emit_group_store (rtx, rtx, /* Copy BLKmode object from a set of registers. */ extern rtx copy_blkmode_from_reg (rtx, rtx, tree); +extern rtx copy_blkmode_to_reg (enum machine_mode, tree); + /* Mark REG as holding a parameter for the next CALL_INSN. */ extern void use_reg (rtx *, rtx); Index: gcc/expr.c =================================================================== --- gcc/expr.c 2011-09-08 11:06:35.314378369 +0100 +++ gcc/expr.c 2011-09-08 13:38:24.871096387 +0100 @@ -2180,6 +2180,112 @@ copy_blkmode_from_reg (rtx tgtblk, rtx s return tgtblk; } +/* Copy BLKmode value SRC into a register of mode MODE. Return the + register if it contains any data, otherwise return null. + + This is used on targets that return BLKmode values in registers. */ + +rtx +copy_blkmode_to_reg (enum machine_mode mode, tree src) +{ + int i, n_regs; + unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes; + unsigned int bitsize; + rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX; + enum machine_mode dst_mode; + + gcc_assert (TYPE_MODE (TREE_TYPE (src)) == BLKmode); + + x = expand_normal (src); + + bytes = int_size_in_bytes (TREE_TYPE (src)); + if (bytes == 0) + return NULL_RTX; + + /* If the structure doesn't take up a whole number of words, see + whether the register value should be padded on the left or on + the right. Set PADDING_CORRECTION to the number of padding + bits needed on the left side. + + In most ABIs, the structure will be returned at the least end of + the register, which translates to right padding on little-endian + targets and left padding on big-endian targets. The opposite + holds if the structure is returned at the most significant + end of the register. */ + if (bytes % UNITS_PER_WORD != 0 + && (targetm.calls.return_in_msb (TREE_TYPE (src)) + ? !BYTES_BIG_ENDIAN + : BYTES_BIG_ENDIAN)) + padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD) + * BITS_PER_UNIT)); + + n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + dst_words = XALLOCAVEC (rtx, n_regs); + bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD); + + /* Copy the structure BITSIZE bits at a time. */ + for (bitpos = 0, xbitpos = padding_correction; + bitpos < bytes * BITS_PER_UNIT; + bitpos += bitsize, xbitpos += bitsize) + { + /* We need a new destination pseudo each time xbitpos is + on a word boundary and when xbitpos == padding_correction + (the first time through). */ + if (xbitpos % BITS_PER_WORD == 0 + || xbitpos == padding_correction) + { + /* Generate an appropriate register. */ + dst_word = gen_reg_rtx (word_mode); + dst_words[xbitpos / BITS_PER_WORD] = dst_word; + + /* Clear the destination before we move anything into it. */ + emit_move_insn (dst_word, CONST0_RTX (word_mode)); + } + + /* We need a new source operand each time bitpos is on a word + boundary. */ + if (bitpos % BITS_PER_WORD == 0) + src_word = operand_subword_force (x, bitpos / BITS_PER_WORD, BLKmode); + + /* Use bitpos for the source extraction (left justified) and + xbitpos for the destination store (right justified). */ + store_bit_field (dst_word, bitsize, xbitpos % BITS_PER_WORD, + 0, 0, word_mode, + extract_bit_field (src_word, bitsize, + bitpos % BITS_PER_WORD, 1, false, + NULL_RTX, word_mode, word_mode)); + } + + if (mode == BLKmode) + { + /* Find the smallest integer mode large enough to hold the + entire structure. */ + for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); + mode != VOIDmode; + mode = GET_MODE_WIDER_MODE (mode)) + /* Have we found a large enough mode? */ + if (GET_MODE_SIZE (mode) >= bytes) + break; + + /* A suitable mode should have been found. */ + gcc_assert (mode != VOIDmode); + } + + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (word_mode)) + dst_mode = word_mode; + else + dst_mode = mode; + dst = gen_reg_rtx (dst_mode); + + for (i = 0; i < n_regs; i++) + emit_move_insn (operand_subword (dst, i, 0, dst_mode), dst_words[i]); + + if (mode != dst_mode) + dst = gen_lowpart (mode, dst); + + return dst; +} + /* Add a USE expression for REG to the (possibly empty) list pointed to by CALL_FUSAGE. REG must denote a hard register. */ @@ -4721,7 +4827,9 @@ expand_assignment (tree to, tree from, b if (TREE_CODE (from) == CALL_EXPR && ! aggregate_value_p (from, from) && COMPLETE_TYPE_P (TREE_TYPE (from)) && TREE_CODE (TYPE_SIZE (TREE_TYPE (from))) == INTEGER_CST - && ! (((TREE_CODE (to) == VAR_DECL || TREE_CODE (to) == PARM_DECL) + && ! (((TREE_CODE (to) == VAR_DECL + || TREE_CODE (to) == PARM_DECL + || TREE_CODE (to) == RESULT_DECL) && REG_P (DECL_RTL (to))) || TREE_CODE (to) == SSA_NAME)) { @@ -4767,12 +4875,15 @@ expand_assignment (tree to, tree from, b rtx temp; push_temp_slots (); - temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL); + if (REG_P (to_rtx) && TYPE_MODE (TREE_TYPE (from)) == BLKmode) + temp = copy_blkmode_to_reg (GET_MODE (to_rtx), from); + else + temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL); if (GET_CODE (to_rtx) == PARALLEL) emit_group_load (to_rtx, temp, TREE_TYPE (from), int_size_in_bytes (TREE_TYPE (from))); - else + else if (temp) emit_move_insn (to_rtx, temp); preserve_temp_slots (to_rtx); @@ -8950,10 +9061,15 @@ expand_expr_real_1 (tree exp, rtx target return temp; } - /* If the mode of DECL_RTL does not match that of the decl, it - must be a promoted value. We return a SUBREG of the wanted mode, - but mark it so that we know that it was already extended. */ - if (REG_P (decl_rtl) && GET_MODE (decl_rtl) != DECL_MODE (exp)) + /* If the mode of DECL_RTL does not match that of the decl, + there are two cases: we are dealing with a BLKmode value + that is returned in a register, or we are dealing with + a promoted value. In the latter case, return a SUBREG + of the wanted mode, but mark it so that we know that it + was already extended. */ + if (REG_P (decl_rtl) + && !(code == RESULT_DECL && DECL_MODE (exp) == BLKmode) + && GET_MODE (decl_rtl) != DECL_MODE (exp)) { enum machine_mode pmode; Index: gcc/stmt.c =================================================================== --- gcc/stmt.c 2011-09-08 11:06:35.314378369 +0100 +++ gcc/stmt.c 2011-09-08 13:23:38.000613685 +0100 @@ -1685,120 +1685,21 @@ expand_return (tree retval) expand_value_return (result_rtl); /* If the result is an aggregate that is being returned in one (or more) - registers, load the registers here. The compiler currently can't handle - copying a BLKmode value into registers. We could put this code in a - more general area (for use by everyone instead of just function - call/return), but until this feature is generally usable it is kept here - (and in expand_call). */ + registers, load the registers here. */ else if (retval_rhs != 0 && TYPE_MODE (TREE_TYPE (retval_rhs)) == BLKmode && REG_P (result_rtl)) { - int i; - unsigned HOST_WIDE_INT bitpos, xbitpos; - unsigned HOST_WIDE_INT padding_correction = 0; - unsigned HOST_WIDE_INT bytes - = int_size_in_bytes (TREE_TYPE (retval_rhs)); - int n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - unsigned int bitsize - = MIN (TYPE_ALIGN (TREE_TYPE (retval_rhs)), BITS_PER_WORD); - rtx *result_pseudos = XALLOCAVEC (rtx, n_regs); - rtx result_reg, src = NULL_RTX, dst = NULL_RTX; - rtx result_val = expand_normal (retval_rhs); - enum machine_mode tmpmode, result_reg_mode; - - if (bytes == 0) + val = copy_blkmode_to_reg (GET_MODE (result_rtl), retval_rhs); + if (val) { - expand_null_return (); - return; - } - - /* If the structure doesn't take up a whole number of words, see - whether the register value should be padded on the left or on - the right. Set PADDING_CORRECTION to the number of padding - bits needed on the left side. - - In most ABIs, the structure will be returned at the least end of - the register, which translates to right padding on little-endian - targets and left padding on big-endian targets. The opposite - holds if the structure is returned at the most significant - end of the register. */ - if (bytes % UNITS_PER_WORD != 0 - && (targetm.calls.return_in_msb (TREE_TYPE (retval_rhs)) - ? !BYTES_BIG_ENDIAN - : BYTES_BIG_ENDIAN)) - padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD) - * BITS_PER_UNIT)); - - /* Copy the structure BITSIZE bits at a time. */ - for (bitpos = 0, xbitpos = padding_correction; - bitpos < bytes * BITS_PER_UNIT; - bitpos += bitsize, xbitpos += bitsize) - { - /* We need a new destination pseudo each time xbitpos is - on a word boundary and when xbitpos == padding_correction - (the first time through). */ - if (xbitpos % BITS_PER_WORD == 0 - || xbitpos == padding_correction) - { - /* Generate an appropriate register. */ - dst = gen_reg_rtx (word_mode); - result_pseudos[xbitpos / BITS_PER_WORD] = dst; - - /* Clear the destination before we move anything into it. */ - emit_move_insn (dst, CONST0_RTX (GET_MODE (dst))); - } - - /* We need a new source operand each time bitpos is on a word - boundary. */ - if (bitpos % BITS_PER_WORD == 0) - src = operand_subword_force (result_val, - bitpos / BITS_PER_WORD, - BLKmode); - - /* Use bitpos for the source extraction (left justified) and - xbitpos for the destination store (right justified). */ - store_bit_field (dst, bitsize, xbitpos % BITS_PER_WORD, - 0, 0, word_mode, - extract_bit_field (src, bitsize, - bitpos % BITS_PER_WORD, 1, false, - NULL_RTX, word_mode, word_mode)); - } - - tmpmode = GET_MODE (result_rtl); - if (tmpmode == BLKmode) - { - /* Find the smallest integer mode large enough to hold the - entire structure and use that mode instead of BLKmode - on the USE insn for the return register. */ - for (tmpmode = GET_CLASS_NARROWEST_MODE (MODE_INT); - tmpmode != VOIDmode; - tmpmode = GET_MODE_WIDER_MODE (tmpmode)) - /* Have we found a large enough mode? */ - if (GET_MODE_SIZE (tmpmode) >= bytes) - break; - - /* A suitable mode should have been found. */ - gcc_assert (tmpmode != VOIDmode); - - PUT_MODE (result_rtl, tmpmode); + /* Use the mode of the result value on the return register. */ + PUT_MODE (result_rtl, GET_MODE (val)); + expand_value_return (val); } - - if (GET_MODE_SIZE (tmpmode) < GET_MODE_SIZE (word_mode)) - result_reg_mode = word_mode; else - result_reg_mode = tmpmode; - result_reg = gen_reg_rtx (result_reg_mode); - - for (i = 0; i < n_regs; i++) - emit_move_insn (operand_subword (result_reg, i, 0, result_reg_mode), - result_pseudos[i]); - - if (tmpmode != result_reg_mode) - result_reg = gen_lowpart (tmpmode, result_reg); - - expand_value_return (result_reg); + expand_null_return (); } else if (retval_rhs != 0 && !VOID_TYPE_P (TREE_TYPE (retval_rhs)) Index: gcc/testsuite/g++.dg/pr48660.C =================================================================== --- /dev/null 2011-09-08 10:02:38.944000001 +0100 +++ gcc/testsuite/g++.dg/pr48660.C 2011-09-08 13:23:38.040613636 +0100 @@ -0,0 +1,22 @@ +template<int N> struct val { char a[N]; }; + +class Base +{ +public: + virtual val<1> get1() const = 0; + virtual val<2> get2() const = 0; + virtual val<3> get3() const = 0; + virtual val<4> get4() const = 0; +}; + +class Derived : public virtual Base +{ +public: + virtual val<1> get1() const { return foo->get1(); } + virtual val<2> get2() const { return foo->get2(); } + virtual val<3> get3() const { return foo->get3(); } + virtual val<4> get4() const { return foo->get4(); } + Base *foo; +}; + +Base* make() { return new Derived; }