On Sat, Jul 13, 2024 at 3:44 PM Hongyu Wang <hongyu.w...@intel.com> wrote:
>
> Hi,
>
> According to the instruction spec of AVX512BF16, the convert from float
> to BF16 is not a simple truncation. It has special handling for
> denormal/nan, even for normal float it will add an extra bias according
> to the least significant bit for bf number. This means we cannot use the
> vcvtne2ps2bf16 for any bf16 vector shuffle.
> The optimization introduced in r15-1368 adds a specific split to convert
> HImode permutation with this instruction, so remove it and treat the
> BFmode permutation same as HFmode.
>
> Bootstrapped & regtested on x86_64-pc-linux-gnu. OK for trunk?
Could you just git revert 6d0b7b69d143025f271d0041cfa29cf26e6c343b?
>
> gcc/ChangeLog:
>
>         PR target/115889
>         * config/i386/predicates.md (vcvtne2ps2bf_parallel): Remove.
>         * config/i386/sse.md (hi_cvt_bf): Remove.
>         (HI_CVT_BF): Likewise.
>         (vpermt2_sepcial_bf16_shuffle_<mode>):Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/115889
>         * gcc.target/i386/vpermt2-special-bf16-shufflue.c: Adjust option
>         and output scan.
> ---
>  gcc/config/i386/predicates.md                 | 11 ------
>  gcc/config/i386/sse.md                        | 35 -------------------
>  .../i386/vpermt2-special-bf16-shufflue.c      |  5 ++-
>  3 files changed, 2 insertions(+), 49 deletions(-)
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index a894847adaf..5d0bb1e0f54 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -2327,14 +2327,3 @@ (define_predicate "apx_ndd_add_memory_operand"
>
>    return true;
>  })
> -
> -;; Check that each element is odd and incrementally increasing from 1
> -(define_predicate "vcvtne2ps2bf_parallel"
> -  (and (match_code "const_vector")
> -       (match_code "const_int" "a"))
> -{
> -  for (int i = 0; i < XVECLEN (op, 0); ++i)
> -    if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
> -      return false;
> -  return true;
> -})
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index b3b4697924b..c134494cd20 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -31460,38 +31460,3 @@ (define_insn "vpdp<vpdpwprodtype>_<mode>"
>    "TARGET_AVXVNNIINT16"
>    "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
>     [(set_attr "prefix" "vex")])
> -
> -(define_mode_attr hi_cvt_bf
> -  [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")])
> -
> -(define_mode_attr HI_CVT_BF
> -  [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")])
> -
> -(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>"
> -  [(set (match_operand:VI2_AVX512F 0 "register_operand")
> -       (unspec:VI2_AVX512F
> -         [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel")
> -          (match_operand:VI2_AVX512F 2 "register_operand")
> -          (match_operand:VI2_AVX512F 3 "nonimmediate_operand")]
> -          UNSPEC_VPERMT2))]
> -  "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()"
> -  "#"
> -  "&& 1"
> -  [(const_int 0)]
> -{
> -  rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode);
> -  operands[2] = lowpart_subreg (<ssePSmode>mode,
> -                               force_reg (<MODE>mode, operands[2]),
> -                               <MODE>mode);
> -  operands[3] = lowpart_subreg (<ssePSmode>mode,
> -                               force_reg (<MODE>mode, operands[3]),
> -                               <MODE>mode);
> -
> -  emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0,
> -                                                  operands[3],
> -                                                  operands[2]));
> -  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0,
> -                                              <HI_CVT_BF>mode));
> -  DONE;
> -}
> -[(set_attr "mode" "<sseinsnmode>")])
> diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c 
> b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
> index 5c65f2a9884..4cbc85735de 100755
> --- a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
> +++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
> @@ -1,7 +1,6 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */
> -/* { dg-final { scan-assembler-not "vpermi2b" } } */
> -/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */
> +/* { dg-options "-O2 -mavx512vbmi -mavx512vl" } */
> +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */
>
>  typedef __bf16 v8bf __attribute__((vector_size(16)));
>  typedef __bf16 v16bf __attribute__((vector_size(32)));
> --
> 2.34.1
>


-- 
BR,
Hongtao

Reply via email to