On Thu, Apr 05, 2018 at 01:29:06PM +0100, Tamar Christina wrote:
> diff --git a/gcc/expr.c b/gcc/expr.c
> index 
> 00660293f72e5441a6421a280b04c57fca2922b8..7daeb8c91d758edf0b3dc37f6927380b6f3df877
>  100644
> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -2749,7 +2749,7 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
>  {
>    int i, n_regs;
>    unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes;
> -  unsigned int bitsize;
> +   unsigned int bitsize = 0;
>    rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX;
>    /* No current ABI uses variable-sized modes to pass a BLKmnode type.  */
>    fixed_size_mode mode = as_a <fixed_size_mode> (mode_in);
> @@ -2782,7 +2782,7 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
>  
>    n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
>    dst_words = XALLOCAVEC (rtx, n_regs);
> -  bitsize = BITS_PER_WORD;
> +
>    if (targetm.slow_unaligned_access (word_mode, TYPE_ALIGN (TREE_TYPE 
> (src))))
>      bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
>  

You calculate bitsize here, then override it in the loop?  Doesn't
that mean strict align targets will use mis-aligned loads and stores?

> @@ -2791,6 +2791,17 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
>         bitpos < bytes * BITS_PER_UNIT;
>         bitpos += bitsize, xbitpos += bitsize)
>      {
> +      /* Find the largest integer mode that can be used to copy all or as
> +      many bits as possible of the structure.  */
> +      opt_scalar_int_mode mode_iter;
> +      FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
> +      if (GET_MODE_BITSIZE (mode_iter.require ())
> +                         <= ((bytes * BITS_PER_UNIT) - bitpos)
> +       && GET_MODE_BITSIZE (mode_iter.require ()) <= BITS_PER_WORD)
> +     bitsize = GET_MODE_BITSIZE (mode_iter.require ());
> +      else
> +     break;
> +

This isn't correct.  Consider a 6 byte struct on a 4 byte word, 8 bit
byte, big-endian target when targetm.calls.return_in_msb is false.

In this scenario, copy_blkmode_to_reg should return two registers, set
as if they had been loaded from two words in memory laid out as
follows (left to right increasing byte addresses):
 _______________________         _______________________
|  0  |  0  |  s0 |  s1 |       |  s2 |  s3 |  s4 |  s5 |
 ~~~~~~~~~~~~~~~~~~~~~~~         ~~~~~~~~~~~~~~~~~~~~~~~

So we will have xbitpos=16 first time around the loop.  That means
your new code will attempt to store 32 bits into a bit-field starting
at bit 16 in the first 32-bit register, and of course fail.

This scenario used to be handled correctly, at least when the struct
wasn't over-aligned.

-- 
Alan Modra
Australia Development Lab, IBM

Reply via email to