https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90235

--- Comment #1 from Hongtao.liu <crazylht at gmail dot com> ---
(In reply to H.J. Lu from comment #0)
> From PR 90202:
> 
> [hjl@gnu-cfl-1 pr90202]$ cat x.ii
> struct v {
>     int val[16];
> };
> 
> struct v test(struct v a, struct v b) {
>     struct v res;
> 
>     for (int i = 0; i < 16; i++)
>         res.val[i] = a.val[i] + b.val[i];
> 
>     return res;
> }
> [hjl@gnu-cfl-1 pr90202]$ make CC=gcc
> gcc -O3 -march=skylake  -S x.ii
> [hjl@gnu-cfl-1 pr90202]$ cat x.s
>       .file   "x.ii"
>       .text
>       .p2align 4,,15
>       .globl  _Z4test1vS_
>       .type   _Z4test1vS_, @function
> _Z4test1vS_:
> .LFB0:
>       .cfi_startproc
>       pushq   %rbp
>       .cfi_def_cfa_offset 16
>       .cfi_offset 6, -16
>       movq    %rdi, %rax
>       movq    %rsp, %rbp
>       .cfi_def_cfa_register 6
>       vmovdqu 16(%rbp), %ymm1
>       vmovdqu 48(%rbp), %ymm2
>       vpaddd  80(%rbp), %ymm1, %ymm0
>       vmovdqu %ymm0, (%rdi)
>       vpaddd  112(%rbp), %ymm2, %ymm0
>       vmovdqu %ymm0, 32(%rdi)
>       vzeroupper
>       popq    %rbp
>       .cfi_def_cfa 7, 8
>       ret
>       .cfi_endproc
> 
> Since there is
> 
> rtx
> gen_reg_rtx (machine_mode mode)
> {
>   rtx val; 
>   unsigned int align = GET_MODE_ALIGNMENT (mode);
> 
>   gcc_assert (can_create_pseudo_p ()); 
> 
>   /* If a virtual register with bigger mode alignment is generated,
>      increase stack alignment estimation because it might be spilled
>      to stack later.  */
>   if (SUPPORTS_STACK_ALIGNMENT
>       && crtl->stack_alignment_estimated < align
>       && !crtl->stack_realign_processed)
>     {    
>       unsigned int min_align = MINIMUM_ALIGNMENT (NULL, mode, align);
>       if (crtl->stack_alignment_estimated < min_align)
>         crtl->stack_alignment_estimated = min_align;
>     }    
> 
> and IRA has
> 
>   frame_pointer_needed
>     = (! flag_omit_frame_pointer
>        || (cfun->calls_alloca && EXIT_IGNORE_STACK)
>        /* We need the frame pointer to catch stack overflow exceptions if
>           the stack pointer is moving (as for the alloca case just above). 
> */
>        || (STACK_CHECK_MOVING_SP
>            && flag_stack_check
>            && flag_exceptions
>            && cfun->can_throw_non_call_exceptions)
>        || crtl->accesses_prior_frames
>        || (SUPPORTS_STACK_ALIGNMENT && crtl->stack_realign_needed)
>        || targetm.frame_pointer_required ());
> 
> generate AVX/AVX512 pseudo registers via gen_reg_rtx will mark frame
> pointer as needed.  Stack realignment is needed to
> 
> 1. Align the outgoing stack.
> 2. Support aligned spill of AVX/AVX512 registers.
> 
> But we won't know if spill is needed before RA. As the result, we
> save and restore frame pointer even if not needed.  Since 
> 
> (define_insn "mov<mode>_internal"
>   [(set (match_operand:VMOVE 0 "nonimmediate_operand"
>          "=v,v ,v ,m")
>         (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
>          " C,BC,vm,v"))]
>   "TARGET_SSE
>    && (register_operand (operands[0], <MODE>mode)
>        || register_operand (operands[1], <MODE>mode))"
> 
> now supports both aligned and unaligned load/store of AVX/AVX512
> registers, we can change gen_reg_rtx to
> 
>   /* If a virtual register with bigger mode alignment is generated,
>      increase stack alignment estimation because it might be spilled
>      to stack later.  */
>   if (SUPPORTS_STACK_ALIGNMENT
>       && !SUPPORTS_MISALIGNED_SPILL
>       && crtl->stack_alignment_estimated < align
>       && !crtl->stack_realign_processed)
>     {    
>       unsigned int min_align = MINIMUM_ALIGNMENT (NULL, mode, align);
>       if (crtl->stack_alignment_estimated < min_align)
>         crtl->stack_alignment_estimated = min_align;
>     }

Would this generate more unaligned_loads/stores, and does harm to performance?

Reply via email to