http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57748

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |uros at gcc dot gnu.org

--- Comment #29 from Richard Biener <rguenth at gcc dot gnu.org> ---
(In reply to Bernd Edlinger from comment #28)
> (In reply to Richard Biener from comment #19)
> > Barking up wrong trees.  Hacky fix looks like:
> > 
> > Index: gcc/expr.c
> > ===================================================================
> > --- gcc/expr.c  (revision 202043)
> > +++ gcc/expr.c  (working copy)
> > @@ -4753,6 +4753,9 @@ expand_assignment (tree to, tree from, b
> >         {
> >           enum machine_mode address_mode;
> >           rtx offset_rtx;
> > +         rtx saved_to_rtx = to_rtx;
> > +         if (misalignp)
> > +           to_rtx = mem;
> >  
> >           if (!MEM_P (to_rtx))
> >             {
> > @@ -4785,6 +4788,11 @@ expand_assignment (tree to, tree from, b
> >           to_rtx = offset_address (to_rtx, offset_rtx,
> >                                    highest_pow2_factor_for_target (to,
> >                                                                    offset));
> > +         if (misalignp)
> > +           {
> > +             mem = to_rtx;
> > +             to_rtx = saved_to_rtx;
> > +           }
> >         }
> >  
> >        /* No action is needed if the target is not a memory and the field
> > 
> > 
> 
> this patch generates wrong code too:
> 
> foo:
> .LFB9:
>         .cfi_startproc
>         pushq   %rbx
>         .cfi_def_cfa_offset 16
>         .cfi_offset 3, -16
>         movq    %rdi, %rbx
>         subq    $16, %rsp
>         .cfi_def_cfa_offset 32
>         call    get_i
>         movdqu  (%rbx), %xmm0
>         cltq
>         movq    .LC1(%rip), %xmm1
>         psrldq  $8, %xmm0
>         punpcklqdq      %xmm0, %xmm1
>         movdqu  %xmm1, 16(%rbx,%rax,8)
>         addq    $16, %rsp
>         .cfi_def_cfa_offset 16
>         popq    %rbx
>         .cfi_def_cfa_offset 8
>         ret
>         .cfi_endproc
> 
> loads *s into xmm0, modifies low part,
> writes back at s->b[0] and s->b[1].

This bug is latent because we use the mode of the base object to query
for movmisalign_optab (and use it).  It highlights the issue I raised
in my last comment.

So, new, completely untested patch:

Index: gcc/expr.c
===================================================================
--- gcc/expr.c  (revision 202240)
+++ gcc/expr.c  (working copy)
@@ -4646,10 +4646,12 @@ expand_assignment (tree to, tree from, b

   /* Handle misaligned stores.  */
   mode = TYPE_MODE (TREE_TYPE (to));
-  if ((TREE_CODE (to) == MEM_REF
-       || TREE_CODE (to) == TARGET_MEM_REF)
-      && mode != BLKmode
-      && !mem_ref_refers_to_non_mem_p (to)
+  if (mode != BLKmode
+      && (DECL_P (to)
+         || handled_component_p (to)
+         || ((TREE_CODE (to) == MEM_REF
+              || TREE_CODE (to) == TARGET_MEM_REF)
+             && !mem_ref_refers_to_non_mem_p (to)))
       && ((align = get_object_alignment (to))
          < GET_MODE_ALIGNMENT (mode))
       && (((icode = optab_handler (movmisalign_optab, mode))
@@ -4696,7 +4698,6 @@ expand_assignment (tree to, tree from, b
       int unsignedp;
       int volatilep = 0;
       tree tem;
-      bool misalignp;
       rtx mem = NULL_RTX;

       push_temp_slots ();
@@ -4707,40 +4708,7 @@ expand_assignment (tree to, tree from, b
          && DECL_BIT_FIELD_TYPE (TREE_OPERAND (to, 1)))
        get_bit_range (&bitregion_start, &bitregion_end, to, &bitpos, &offset);

-      /* If we are going to use store_bit_field and extract_bit_field,
-        make sure to_rtx will be safe for multiple use.  */
-      mode = TYPE_MODE (TREE_TYPE (tem));
-      if (TREE_CODE (tem) == MEM_REF
-         && mode != BLKmode
-         && ((align = get_object_alignment (tem))
-             < GET_MODE_ALIGNMENT (mode))
-         && ((icode = optab_handler (movmisalign_optab, mode))
-             != CODE_FOR_nothing))
-       {
-         struct expand_operand ops[2];
-
-         misalignp = true;
-         to_rtx = gen_reg_rtx (mode);
-         mem = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
-
-         /* If the misaligned store doesn't overwrite all bits, perform
-            rmw cycle on MEM.  */
-         if (bitsize != GET_MODE_BITSIZE (mode))
-           {
-             create_input_operand (&ops[0], to_rtx, mode);
-             create_fixed_operand (&ops[1], mem);
-             /* The movmisalign<mode> pattern cannot fail, else the assignment
-                would silently be omitted.  */
-             expand_insn (icode, 2, ops);
-
-             mem = copy_rtx (mem);
-           }
-       }
-      else
-       {
-         misalignp = false;
-         to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
-       }
+      to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);

       /* If the bitfield is volatile, we want to access it in the
         field's mode, not the computed mode.
@@ -4879,17 +4847,6 @@ expand_assignment (tree to, tree from, b
                                  get_alias_set (to), nontemporal);
        }

-      if (misalignp)
-       {
-         struct expand_operand ops[2];
-
-         create_fixed_operand (&ops[0], mem);
-         create_input_operand (&ops[1], to_rtx, mode);
-         /* The movmisalign<mode> pattern cannot fail, else the assignment
-            would silently be omitted.  */
-         expand_insn (icode, 2, ops);
-       }
-
       if (result)
        preserve_temp_slots (result);
       pop_temp_slots ();

Reply via email to