On Mon, Feb 11, 2019 at 11:55 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
> plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
> source operand is allowed.
>
> 2019-02-08  H.J. Lu  <hongjiu...@intel.com>
>             Uros Bizjak  <ubiz...@gmail.com>
>
>         PR target/89021
>         * config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
>         New prototype.
>         (ix86_split_mmx_pack): Likewise.
>         * config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
>         function.
>         (ix86_split_mmx_pack): Likewise.
>         * config/i386/i386.md (mmx_isa): New.
>         (enabled): Also check mmx_isa.
>         * config/i386/mmx.md (any_s_truncate): New code iterator.
>         (s_trunsuffix): New code attr.
>         (mmx_packsswb): Removed.
>         (mmx_packssdw): Likewise.
>         (mmx_packuswb): Likewise.
>         (mmx_pack<s_trunsuffix>swb): New define_insn_and_split to emulate
>         MMX packsswb/packuswb with SSE2.
>         (mmx_packssdw): Likewise.
> ---
>  gcc/config/i386/i386-protos.h |  3 ++
>  gcc/config/i386/i386.c        | 54 ++++++++++++++++++++++++++++
>  gcc/config/i386/i386.md       | 12 +++++++
>  gcc/config/i386/mmx.md        | 67 +++++++++++++++++++----------------
>  4 files changed, 106 insertions(+), 30 deletions(-)
>
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index 2d600173917..bb96a420a85 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -200,6 +200,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, 
> rtx, rtx);
>
>  extern rtx ix86_split_stack_guard (void);
>
> +extern void ix86_move_vector_high_sse_to_mmx (rtx);
> +extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
> +
>  #ifdef TREE_CODE
>  extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
>  #endif /* TREE_CODE  */
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 61e602bdb38..b8d5ba7f28f 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19955,6 +19955,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, 
> rtx operands[])
>      gcc_unreachable ();
>  }
>
> +/* Move bits 64:95 to bits 32:63.  */
> +
> +void
> +ix86_move_vector_high_sse_to_mmx (rtx op)
> +{
> +  rtx mask = gen_rtx_PARALLEL (VOIDmode,
> +                              gen_rtvec (4, GEN_INT (0), GEN_INT (2),
> +                                         GEN_INT (0), GEN_INT (0)));
> +  rtx dest = gen_rtx_REG (V4SImode, REGNO (op));
> +  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
> +  rtx insn = gen_rtx_SET (dest, op);
> +  emit_insn (insn);
> +}
> +
> +/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
> +
> +void
> +ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
> +{
> +  rtx op0 = operands[0];
> +  rtx op1 = operands[1];
> +  rtx op2 = operands[2];
> +
> +  machine_mode dmode = GET_MODE (op0);
> +  machine_mode smode = GET_MODE (op1);
> +  machine_mode inner_dmode = GET_MODE_INNER (dmode);
> +  machine_mode inner_smode = GET_MODE_INNER (smode);
> +
> +  /* Get the corresponding SSE mode for destination.  */
> +  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
> +  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
> +                                           nunits).require ();
> +  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
> +                                                nunits / 2).require ();
> +
> +  /* Get the corresponding SSE mode for source.  */
> +  nunits = 16 / GET_MODE_SIZE (inner_smode);
> +  machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
> +                                           nunits).require ();
> +
> +  /* Generate SSE pack with signed/unsigned saturation.  */
> +  rtx dest = gen_rtx_REG (sse_dmode, REGNO (op0));
> +  op1 = gen_rtx_REG (sse_smode, REGNO (op1));
> +  op2 = gen_rtx_REG (sse_smode, REGNO (op2));

Please use lowpart_subreg.

Uros.

> +
> +  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
> +  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
> +  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
> +                                                   op1, op2));
> +  emit_insn (insn);
> +
> +  ix86_move_vector_high_sse_to_mmx (op0);
> +}
> +
>  /* Helper function of ix86_fixup_binary_operands to canonicalize
>     operand order.  Returns true if the operands should be swapped.  */
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 5b89e52493e..633b1dab523 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -792,6 +792,9 @@
>                     avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
>    (const_string "base"))
>
> +;; Define instruction set of MMX instructions
> +(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string 
> "base"))
> +
>  (define_attr "enabled" ""
>    (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
>          (eq_attr "isa" "x64_sse2")
> @@ -830,6 +833,15 @@
>          (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
>          (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
>          (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
> +
> +        (eq_attr "mmx_isa" "native")
> +          (symbol_ref "!TARGET_MMX_WITH_SSE")
> +        (eq_attr "mmx_isa" "x64")
> +          (symbol_ref "TARGET_MMX_WITH_SSE")
> +        (eq_attr "mmx_isa" "x64_avx")
> +          (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
> +        (eq_attr "mmx_isa" "x64_noavx")
> +          (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
>         ]
>         (const_int 1)))
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index c1e0f2c411e..840d369ab02 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1046,41 +1046,48 @@
>  ;;
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>
> -(define_insn "mmx_packsswb"
> -  [(set (match_operand:V8QI 0 "register_operand" "=y")
> +;; Used in signed and unsigned truncations with saturation.
> +(define_code_iterator any_s_truncate [ss_truncate us_truncate])
> +;; Instruction suffix for truncations with saturation.
> +(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
> +
> +(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
> +  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
>         (vec_concat:V8QI
> -         (ss_truncate:V4QI
> -           (match_operand:V4HI 1 "register_operand" "0"))
> -         (ss_truncate:V4QI
> -           (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
> -  "TARGET_MMX"
> -  "packsswb\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxshft")
> -   (set_attr "mode" "DI")])
> +         (any_s_truncate:V4QI
> +           (match_operand:V4HI 1 "register_operand" "0,0,Yv"))
> +         (any_s_truncate:V4QI
> +           (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv"))))]
> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
> +  "@
> +   pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
> +   #
> +   #"
> +  "TARGET_MMX_WITH_SSE && reload_completed"
> +  [(const_int 0)]
> +  "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>); DONE;"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxshft,sselog,sselog")
> +   (set_attr "mode" "DI,TI,TI")])
>
> -(define_insn "mmx_packssdw"
> -  [(set (match_operand:V4HI 0 "register_operand" "=y")
> +(define_insn_and_split "mmx_packssdw"
> +  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
>         (vec_concat:V4HI
>           (ss_truncate:V2HI
> -           (match_operand:V2SI 1 "register_operand" "0"))
> +           (match_operand:V2SI 1 "register_operand" "0,0,Yv"))
>           (ss_truncate:V2HI
> -           (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
> -  "TARGET_MMX"
> -  "packssdw\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxshft")
> -   (set_attr "mode" "DI")])
> -
> -(define_insn "mmx_packuswb"
> -  [(set (match_operand:V8QI 0 "register_operand" "=y")
> -       (vec_concat:V8QI
> -         (us_truncate:V4QI
> -           (match_operand:V4HI 1 "register_operand" "0"))
> -         (us_truncate:V4QI
> -           (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
> -  "TARGET_MMX"
> -  "packuswb\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxshft")
> -   (set_attr "mode" "DI")])
> +           (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv"))))]
> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
> +  "@
> +   packssdw\t{%2, %0|%0, %2}
> +   #
> +   #"
> +  "TARGET_MMX_WITH_SSE && reload_completed"
> +  [(const_int 0)]
> +  "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxshft,sselog,sselog")
> +   (set_attr "mode" "DI,TI,TI")])
>
>  (define_insn "mmx_punpckhbw"
>    [(set (match_operand:V8QI 0 "register_operand" "=y")
> --
> 2.20.1
>

Reply via email to