On Mon, Nov 5, 2018 at 2:02 PM H.J. Lu <hongjiu...@intel.com> wrote:
>
> Hi Richard, Jakub,
>
> Can you take a look at this patch?  The last review from Kirill was in
> June.
>
> Thanks.
>
>
> H.J.
> --
> There are many duplicated AVX2/AVX512 vec_dup patterns like:
>
> (define_insn "avx2_vec_dup<mode>"
>   [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
>         (vec_duplicate:VF1_128_256
>           (vec_select:SF
>             (match_operand:V4SF 1 "register_operand" "v")
>             (parallel [(const_int 0)]))))]
>   "TARGET_AVX2"
>   "vbroadcastss\t{%1, %0|%0, %1}"
>   [(set_attr "type" "sselog1")
>     (set_attr "prefix" "maybe_evex")
>     (set_attr "mode" "<MODE>")])
>
> and
>
> (define_insn "vec_dup<mode>"
>   [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
>         (vec_duplicate:AVX_VEC_DUP_MODE
>           (match_operand:<ssescalarmode> 1 "nonimmediate_operand" 
> "m,m,x,v,?x")))]
>   "TARGET_AVX"
>   "@
>    v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
>    vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
>    v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
>    v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
>    #"
>   [(set_attr "type" "ssemov")
>    (set_attr "prefix_extra" "1")
>    (set_attr "prefix" "maybe_evex")
>    (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
>    (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
>
> We can remove the duplicated AVX2/AVX512 vec_dup patterns and use the
> normal AVX2/AVX512 vec_dup patterns instead by changing source operand
> to subreg of the same register class of the base by generating
>
> (set (reg:V8SF 84)
>      (vec_duplicate:V8SF (subreg:SF (reg:V4SF 85) 0)))
>
> instead of
>
> (set (reg:V8SF 84)
>       (vec_duplicate:V8SF
>         (vec_select:SF (reg:V4SF 85)
>           (parallel [(const_int 0 [0])]))))
>
> For integer vector broadcast, we generate
>
> (set (reg:V32QI 86)
>      (vec_duplicate:V32QI
>         (vec_select:QI (subreg:V16QI (reg:V32QI 87) 0))
>           (parallel [(const_int 0 [0])]))))
>
> instead of
>
> (set (reg:V32QI 86)
>      (vec_duplicate:V32QI
>         (vec_select:QI (reg:V32QI 87)
>           (parallel [(const_int 0 [0])]))))
>
> so that we can remove
>
> (define_insn "avx2_pbroadcast<mode>_1"
>   [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
>         (vec_duplicate:VI_256
>           (vec_select:<ssescalarmode>
>             (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
>             (parallel [(const_int 0)]))))]
>   "TARGET_AVX2"
>   "@
>    vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
>    vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
>    vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
>    vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
>   [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
>    (set_attr "type" "ssemov")
>    (set_attr "prefix_extra" "1")
>    (set_attr "prefix" "vex")
>    (set_attr "mode" "<sseinsnmode>")])
>
> and keep only
>
> (define_insn "avx2_pbroadcast<mode>"
>   [(set (match_operand:VI 0 "register_operand" "=x,v")
>         (vec_duplicate:VI
>           (vec_select:<ssescalarmode>
>             (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
>             (parallel [(const_int 0)]))))]
>   "TARGET_AVX2"
>   "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
>   [(set_attr "isa" "*,<pbroadcast_evex_isa>")
>    (set_attr "type" "ssemov")
>    (set_attr "prefix_extra" "1")
>    (set_attr "prefix" "vex,evex")
>    (set_attr "mode" "<sseinsnmode>")])
>
> gcc.target/i386/avx2-vbroadcastss_ps256-1.c is changed by
>
>  avx2_test:
>         .cfi_startproc
> -       vmovaps x(%rip), %xmm1
> -       vbroadcastss    %xmm1, %ymm0
> +       vbroadcastss    x(%rip), %ymm0
>         vmovaps %ymm0, y(%rip)
>         vzeroupper
>         ret
>         .cfi_endproc
>
> gcc.target/i386/avx512vl-vbroadcast-3.c is changed by
>
> @@ -113,7 +113,7 @@ f10:
>         .cfi_startproc
>         vmovaps %ymm0, %ymm16
>         vpermilps       $85, %ymm16, %ymm16
> -       vbroadcastss    %xmm16, %ymm16
> +       vshuff32x4      $0x0, %ymm16, %ymm16, %ymm16
>         vzeroupper
>         ret
>         .cfi_endproc
> @@ -153,8 +153,7 @@ f12:
>  f13:
>  .LFB12:
>         .cfi_startproc
> -       vmovaps (%rdi), %ymm16
> -       vbroadcastss    %xmm16, %ymm16
> +       vbroadcastss    (%rdi), %ymm16
>         vzeroupper
>         ret
>         .cfi_endproc
>
> gcc/
>
>         * config/i386/i386-builtin.def: Replace CODE_FOR_avx2_vec_dupv4sf,
>         CODE_FOR_avx2_vec_dupv8sf and CODE_FOR_avx2_vec_dupv4df with
>         CODE_FOR_vec_dupv4sf, CODE_FOR_vec_dupv8sf and
>         CODE_FOR_vec_dupv4df, respectively.
>         * config/i386/i386.c (expand_vec_perm_1): Use subreg with vec_dup.
>         * config/i386/i386.md (SF to DF splitter): Replace
>         gen_avx512f_vec_dupv16sf_1 with gen_avx512f_vec_dupv16sf.
>         * config/i386/sse.md (VF48_AVX512VL): New.
>         (avx2_vec_dup<mode>): Removed.
>         (avx2_vec_dupv8sf_1): Likewise.
>         (avx512f_vec_dup<mode>_1): Likewise.
>         (avx2_pbroadcast<mode>_1): Likewise.
>         (avx2_vec_dupv4df): Likewise.
>         (<avx512>_vec_dup<mode>_1): Likewise.
>         (*avx_vperm_broadcast_<mode>): Replace gen_avx2_vec_dupv8sf with
>         gen_vec_dupv8sf.
>
> gcc/testsuite/
>
>         * gcc.target/i386/avx2-vbroadcastss_ps256-1.c: Updated.
>         * gcc.target/i386/avx512vl-vbroadcast-3.c: Likewise.

PING:

https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00315.html

-- 
H.J.

Reply via email to