https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104704

--- Comment #11 from Hongtao.liu <crazylht at gmail dot com> ---
(In reply to H.J. Lu from comment #9)
> --- pieces-memset-46.s        2022-03-02 06:44:55.845212762 -0800
> +++
> /export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/pieces-
> memset-46.s   2022-03-02 06:45:03.313188978 -0800
> @@ -8,9 +8,11 @@ foo:
>       .cfi_startproc
>       movq    dst(%rip), %rdx
>       movl    $3, %eax
> -     vpbroadcastb    %eax, %zmm31
> -     vmovdqu8        %zmm31, (%rdx)
> -     vmovw   %xmm31, 64(%rdx)
> +     vpbroadcastb    %eax, %zmm0
> +     movl    $771, %eax
> +     movw    %ax, 64(%rdx)
> +     vmovdqu8        %zmm0, (%rdx)
> +     vzeroupper
>       ret
>       .cfi_endproc
>  .LFE0:
> 
> gen_reg_rtx generates 2 extra instructions for pieces-memset-46.c.

It's on purpose.

;; Don't move an immediate directly to memory when the instruction
;; gets too big, or if LCP stalls are a problem for 16-bit moves.

(define_peephole2
  [(match_scratch:SWI124 2 "<r>")
   (set (match_operand:SWI124 0 "memory_operand")
        (match_operand:SWI124 1 "immediate_operand"))]
  "optimize_insn_for_speed_p ()
   && ((<MODE>mode == HImode
       && TARGET_LCP_STALL)
       || (TARGET_SPLIT_LONG_MOVES
          && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
  [(set (match_dup 2) (match_dup 1))
   (set (match_dup 0) (match_dup 2))])

Reply via email to