PR 48660 is about an ICE on code like:

  template<int N> struct val { char a[N]; };

  class Base
  {
  public:
    virtual val<2> get2() const = 0;
  };

  class Derived : public virtual Base
  {
  public:
    virtual val<2> get2() const { return foo->get2(); }
    Base *foo;
  };

  Base* make() { return new Derived; }

The thunk for Derived::get2() ends with an assignment to the
val<2> RESULT_DECL, followed by a return.  The problem is that
val<2> has mode BLKmode, but the RESULT_DECL is a HImode register
(small structures are returned in registers on ARM).  We then ICE
while copying a BLKmode rhs into a HImode lhs.

At first I thought this was a bug in the thunk expansion; I haven't been
able to trigger it elsewhere.  But Richard said on irc that this was valid
gimple and should work.  So this patch instead fixes the implicit FIXME in:

                                      The compiler currently can't handle
     copying a BLKmode value into registers.  We could put this code in a
     more general area (for use by everyone instead of just function
     call/return), but until this feature is generally usable it is kept here
     (and in expand_call).  */

I think the expand_call part is already out-of-date: I assume it's
referring to copy_blkmode_from_reg, which handles moves in the opposite
direction -- return register to BLKmode value -- and which is already
a separate function.

At the moment, copies into RESULT_DECL are handled by the:

  /* If the rhs is a function call and its value is not an aggregate,
     call the function before we start to compute the lhs.
     This is needed for correct code for cases such as
     val = setjmp (buf) on machines where reference to val
     requires loading up part of an address in a separate insn.

     Don't do this if TO is a VAR_DECL or PARM_DECL whose DECL_RTL is REG
     since it might be a promoted variable where the zero- or sign- extension
     needs to be done.  Handling this in the normal way is safe because no
     computation is done before the call.  The same is true for SSA names.  */
  if (TREE_CODE (from) == CALL_EXPR && ! aggregate_value_p (from, from)
      && COMPLETE_TYPE_P (TREE_TYPE (from))
      && TREE_CODE (TYPE_SIZE (TREE_TYPE (from))) == INTEGER_CST
      && ! (((TREE_CODE (to) == VAR_DECL || TREE_CODE (to) == PARM_DECL)
             && REG_P (DECL_RTL (to)))
            || TREE_CODE (to) == SSA_NAME))
    {
      rtx value;

      push_temp_slots ();
      value = expand_normal (from);
      if (to_rtx == 0)
        to_rtx = expand_expr (to, NULL_RTX, VOIDmode, EXPAND_WRITE);

block of expand_assignment, but I think the REG_P exclusion that applies
to VAR_DECLs and PARM_DECLs also applies to RESULT_DECLs.

Tested on x86_64-linux-gnu and arm-linux-gnu.  OK for trunk?

What about release branches?  This is a regression from 4.4 (which had
the old thunk code).  A conservative alternative for the branches might
be to leave out the stmt.c part.

Richard


gcc/
        * expr.h (copy_blkmode_to_reg): Declare.
        * expr.c (copy_blkmode_to_reg): New function.
        (expand_assignment): Don't expand register RESULT_DECLs before
        the lhs.  Use copy_blkmode_to_reg to copy BLKmode values into a
        RESULT_DECL register.
        * stmt.c (expand_return): Move BLKmode-to-register code into
        copy_blkmode_to_reg.

Index: gcc/expr.h
===================================================================
--- gcc/expr.h  2011-09-08 11:06:35.314378369 +0100
+++ gcc/expr.h  2011-09-08 13:23:37.976613715 +0100
@@ -321,6 +321,8 @@ extern void emit_group_store (rtx, rtx, 
 /* Copy BLKmode object from a set of registers.  */
 extern rtx copy_blkmode_from_reg (rtx, rtx, tree);
 
+extern rtx copy_blkmode_to_reg (enum machine_mode, tree);
+
 /* Mark REG as holding a parameter for the next CALL_INSN.  */
 extern void use_reg (rtx *, rtx);
 
Index: gcc/expr.c
===================================================================
--- gcc/expr.c  2011-09-08 11:06:35.314378369 +0100
+++ gcc/expr.c  2011-09-08 13:38:24.871096387 +0100
@@ -2180,6 +2180,112 @@ copy_blkmode_from_reg (rtx tgtblk, rtx s
   return tgtblk;
 }
 
+/* Copy BLKmode value SRC into a register of mode MODE.  Return the
+   register if it contains any data, otherwise return null.
+
+   This is used on targets that return BLKmode values in registers.  */
+
+rtx
+copy_blkmode_to_reg (enum machine_mode mode, tree src)
+{
+  int i, n_regs;
+  unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes;
+  unsigned int bitsize;
+  rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX;
+  enum machine_mode dst_mode;
+
+  gcc_assert (TYPE_MODE (TREE_TYPE (src)) == BLKmode);
+
+  x = expand_normal (src);
+
+  bytes = int_size_in_bytes (TREE_TYPE (src));
+  if (bytes == 0)
+    return NULL_RTX;
+
+  /* If the structure doesn't take up a whole number of words, see
+     whether the register value should be padded on the left or on
+     the right.  Set PADDING_CORRECTION to the number of padding
+     bits needed on the left side.
+
+     In most ABIs, the structure will be returned at the least end of
+     the register, which translates to right padding on little-endian
+     targets and left padding on big-endian targets.  The opposite
+     holds if the structure is returned at the most significant
+     end of the register.  */
+  if (bytes % UNITS_PER_WORD != 0
+      && (targetm.calls.return_in_msb (TREE_TYPE (src))
+         ? !BYTES_BIG_ENDIAN
+         : BYTES_BIG_ENDIAN))
+    padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
+                                          * BITS_PER_UNIT));
+
+  n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  dst_words = XALLOCAVEC (rtx, n_regs);
+  bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
+
+  /* Copy the structure BITSIZE bits at a time.  */
+  for (bitpos = 0, xbitpos = padding_correction;
+       bitpos < bytes * BITS_PER_UNIT;
+       bitpos += bitsize, xbitpos += bitsize)
+    {
+      /* We need a new destination pseudo each time xbitpos is
+        on a word boundary and when xbitpos == padding_correction
+        (the first time through).  */
+      if (xbitpos % BITS_PER_WORD == 0
+         || xbitpos == padding_correction)
+       {
+         /* Generate an appropriate register.  */
+         dst_word = gen_reg_rtx (word_mode);
+         dst_words[xbitpos / BITS_PER_WORD] = dst_word;
+
+         /* Clear the destination before we move anything into it.  */
+         emit_move_insn (dst_word, CONST0_RTX (word_mode));
+       }
+
+      /* We need a new source operand each time bitpos is on a word
+        boundary.  */
+      if (bitpos % BITS_PER_WORD == 0)
+       src_word = operand_subword_force (x, bitpos / BITS_PER_WORD, BLKmode);
+
+      /* Use bitpos for the source extraction (left justified) and
+        xbitpos for the destination store (right justified).  */
+      store_bit_field (dst_word, bitsize, xbitpos % BITS_PER_WORD,
+                      0, 0, word_mode,
+                      extract_bit_field (src_word, bitsize,
+                                         bitpos % BITS_PER_WORD, 1, false,
+                                         NULL_RTX, word_mode, word_mode));
+    }
+
+  if (mode == BLKmode)
+    {
+      /* Find the smallest integer mode large enough to hold the
+        entire structure.  */
+      for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+          mode != VOIDmode;
+          mode = GET_MODE_WIDER_MODE (mode))
+       /* Have we found a large enough mode?  */
+       if (GET_MODE_SIZE (mode) >= bytes)
+         break;
+
+      /* A suitable mode should have been found.  */
+      gcc_assert (mode != VOIDmode);
+    }
+
+  if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (word_mode))
+    dst_mode = word_mode;
+  else
+    dst_mode = mode;
+  dst = gen_reg_rtx (dst_mode);
+
+  for (i = 0; i < n_regs; i++)
+    emit_move_insn (operand_subword (dst, i, 0, dst_mode), dst_words[i]);
+
+  if (mode != dst_mode)
+    dst = gen_lowpart (mode, dst);
+
+  return dst;
+}
+
 /* Add a USE expression for REG to the (possibly empty) list pointed
    to by CALL_FUSAGE.  REG must denote a hard register.  */
 
@@ -4721,7 +4827,9 @@ expand_assignment (tree to, tree from, b
   if (TREE_CODE (from) == CALL_EXPR && ! aggregate_value_p (from, from)
       && COMPLETE_TYPE_P (TREE_TYPE (from))
       && TREE_CODE (TYPE_SIZE (TREE_TYPE (from))) == INTEGER_CST
-      && ! (((TREE_CODE (to) == VAR_DECL || TREE_CODE (to) == PARM_DECL)
+      && ! (((TREE_CODE (to) == VAR_DECL
+             || TREE_CODE (to) == PARM_DECL
+             || TREE_CODE (to) == RESULT_DECL)
             && REG_P (DECL_RTL (to)))
            || TREE_CODE (to) == SSA_NAME))
     {
@@ -4767,12 +4875,15 @@ expand_assignment (tree to, tree from, b
       rtx temp;
 
       push_temp_slots ();
-      temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
+      if (REG_P (to_rtx) && TYPE_MODE (TREE_TYPE (from)) == BLKmode)
+       temp = copy_blkmode_to_reg (GET_MODE (to_rtx), from);
+      else
+       temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
 
       if (GET_CODE (to_rtx) == PARALLEL)
        emit_group_load (to_rtx, temp, TREE_TYPE (from),
                         int_size_in_bytes (TREE_TYPE (from)));
-      else
+      else if (temp)
        emit_move_insn (to_rtx, temp);
 
       preserve_temp_slots (to_rtx);
@@ -8950,10 +9061,15 @@ expand_expr_real_1 (tree exp, rtx target
          return temp;
        }
 
-      /* If the mode of DECL_RTL does not match that of the decl, it
-        must be a promoted value.  We return a SUBREG of the wanted mode,
-        but mark it so that we know that it was already extended.  */
-      if (REG_P (decl_rtl) && GET_MODE (decl_rtl) != DECL_MODE (exp))
+      /* If the mode of DECL_RTL does not match that of the decl,
+        there are two cases: we are dealing with a BLKmode value
+        that is returned in a register, or we are dealing with
+        a promoted value.  In the latter case, return a SUBREG
+        of the wanted mode, but mark it so that we know that it
+        was already extended.  */
+      if (REG_P (decl_rtl)
+         && !(code == RESULT_DECL && DECL_MODE (exp) == BLKmode)
+         && GET_MODE (decl_rtl) != DECL_MODE (exp))
        {
          enum machine_mode pmode;
 
Index: gcc/stmt.c
===================================================================
--- gcc/stmt.c  2011-09-08 11:06:35.314378369 +0100
+++ gcc/stmt.c  2011-09-08 13:23:38.000613685 +0100
@@ -1685,120 +1685,21 @@ expand_return (tree retval)
     expand_value_return (result_rtl);
 
   /* If the result is an aggregate that is being returned in one (or more)
-     registers, load the registers here.  The compiler currently can't handle
-     copying a BLKmode value into registers.  We could put this code in a
-     more general area (for use by everyone instead of just function
-     call/return), but until this feature is generally usable it is kept here
-     (and in expand_call).  */
+     registers, load the registers here.  */
 
   else if (retval_rhs != 0
           && TYPE_MODE (TREE_TYPE (retval_rhs)) == BLKmode
           && REG_P (result_rtl))
     {
-      int i;
-      unsigned HOST_WIDE_INT bitpos, xbitpos;
-      unsigned HOST_WIDE_INT padding_correction = 0;
-      unsigned HOST_WIDE_INT bytes
-       = int_size_in_bytes (TREE_TYPE (retval_rhs));
-      int n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-      unsigned int bitsize
-       = MIN (TYPE_ALIGN (TREE_TYPE (retval_rhs)), BITS_PER_WORD);
-      rtx *result_pseudos = XALLOCAVEC (rtx, n_regs);
-      rtx result_reg, src = NULL_RTX, dst = NULL_RTX;
-      rtx result_val = expand_normal (retval_rhs);
-      enum machine_mode tmpmode, result_reg_mode;
-
-      if (bytes == 0)
+      val = copy_blkmode_to_reg (GET_MODE (result_rtl), retval_rhs);
+      if (val)
        {
-         expand_null_return ();
-         return;
-       }
-
-      /* If the structure doesn't take up a whole number of words, see
-        whether the register value should be padded on the left or on
-        the right.  Set PADDING_CORRECTION to the number of padding
-        bits needed on the left side.
-
-        In most ABIs, the structure will be returned at the least end of
-        the register, which translates to right padding on little-endian
-        targets and left padding on big-endian targets.  The opposite
-        holds if the structure is returned at the most significant
-        end of the register.  */
-      if (bytes % UNITS_PER_WORD != 0
-         && (targetm.calls.return_in_msb (TREE_TYPE (retval_rhs))
-             ? !BYTES_BIG_ENDIAN
-             : BYTES_BIG_ENDIAN))
-       padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
-                                              * BITS_PER_UNIT));
-
-      /* Copy the structure BITSIZE bits at a time.  */
-      for (bitpos = 0, xbitpos = padding_correction;
-          bitpos < bytes * BITS_PER_UNIT;
-          bitpos += bitsize, xbitpos += bitsize)
-       {
-         /* We need a new destination pseudo each time xbitpos is
-            on a word boundary and when xbitpos == padding_correction
-            (the first time through).  */
-         if (xbitpos % BITS_PER_WORD == 0
-             || xbitpos == padding_correction)
-           {
-             /* Generate an appropriate register.  */
-             dst = gen_reg_rtx (word_mode);
-             result_pseudos[xbitpos / BITS_PER_WORD] = dst;
-
-             /* Clear the destination before we move anything into it.  */
-             emit_move_insn (dst, CONST0_RTX (GET_MODE (dst)));
-           }
-
-         /* We need a new source operand each time bitpos is on a word
-            boundary.  */
-         if (bitpos % BITS_PER_WORD == 0)
-           src = operand_subword_force (result_val,
-                                        bitpos / BITS_PER_WORD,
-                                        BLKmode);
-
-         /* Use bitpos for the source extraction (left justified) and
-            xbitpos for the destination store (right justified).  */
-         store_bit_field (dst, bitsize, xbitpos % BITS_PER_WORD,
-                          0, 0, word_mode,
-                          extract_bit_field (src, bitsize,
-                                             bitpos % BITS_PER_WORD, 1, false,
-                                             NULL_RTX, word_mode, word_mode));
-       }
-
-      tmpmode = GET_MODE (result_rtl);
-      if (tmpmode == BLKmode)
-       {
-         /* Find the smallest integer mode large enough to hold the
-            entire structure and use that mode instead of BLKmode
-            on the USE insn for the return register.  */
-         for (tmpmode = GET_CLASS_NARROWEST_MODE (MODE_INT);
-              tmpmode != VOIDmode;
-              tmpmode = GET_MODE_WIDER_MODE (tmpmode))
-           /* Have we found a large enough mode?  */
-           if (GET_MODE_SIZE (tmpmode) >= bytes)
-             break;
-
-         /* A suitable mode should have been found.  */
-         gcc_assert (tmpmode != VOIDmode);
-
-         PUT_MODE (result_rtl, tmpmode);
+         /* Use the mode of the result value on the return register.  */
+         PUT_MODE (result_rtl, GET_MODE (val));
+         expand_value_return (val);
        }
-
-      if (GET_MODE_SIZE (tmpmode) < GET_MODE_SIZE (word_mode))
-       result_reg_mode = word_mode;
       else
-       result_reg_mode = tmpmode;
-      result_reg = gen_reg_rtx (result_reg_mode);
-
-      for (i = 0; i < n_regs; i++)
-       emit_move_insn (operand_subword (result_reg, i, 0, result_reg_mode),
-                       result_pseudos[i]);
-
-      if (tmpmode != result_reg_mode)
-       result_reg = gen_lowpart (tmpmode, result_reg);
-
-      expand_value_return (result_reg);
+       expand_null_return ();
     }
   else if (retval_rhs != 0
           && !VOID_TYPE_P (TREE_TYPE (retval_rhs))
Index: gcc/testsuite/g++.dg/pr48660.C
===================================================================
--- /dev/null   2011-09-08 10:02:38.944000001 +0100
+++ gcc/testsuite/g++.dg/pr48660.C      2011-09-08 13:23:38.040613636 +0100
@@ -0,0 +1,22 @@
+template<int N> struct val { char a[N]; };
+
+class Base
+{
+public:
+  virtual val<1> get1() const = 0;
+  virtual val<2> get2() const = 0;
+  virtual val<3> get3() const = 0;
+  virtual val<4> get4() const = 0;
+};
+
+class Derived : public virtual Base
+{
+public:
+  virtual val<1> get1() const { return foo->get1(); }
+  virtual val<2> get2() const { return foo->get2(); }
+  virtual val<3> get3() const { return foo->get3(); }
+  virtual val<4> get4() const { return foo->get4(); }
+  Base *foo;
+};
+
+Base* make() { return new Derived; }

Reply via email to