Re: [PATCH 02/10] [i386] Enable _Float16 type for TARGET_SSE2 and above.

Uros Bizjak via Gcc-patches Wed, 21 Jul 2021 03:36:07 -0700

On Wed, Jul 21, 2021 at 9:43 AM liuhongt <hongtao....@intel.com> wrote:
>
> gcc/ChangeLog:
>
>         * config/i386/i386-modes.def (FLOAT_MODE): Define ieee HFmode.
>         * config/i386/i386.c (enum x86_64_reg_class): Add
>         X86_64_SSEHF_CLASS.
>         (merge_classes): Handle X86_64_SSEHF_CLASS.
>         (examine_argument): Ditto.
>         (construct_container): Ditto.
>         (classify_argument): Ditto, and set HFmode/HCmode to
>         X86_64_SSEHF_CLASS.
>         (function_value_32): Return _FLoat16/Complex Float16 by
>         %xmm0/%xmm1.
>         (function_value_64): Return _Float16/Complex Float16 by SSE
>         register.
>         (ix86_print_operand): Handle CONST_DOUBLE HFmode.
>         (ix86_secondary_reload): Require gpr as intermediate register
>         to store _Float16 from sse register when sse4 is not
>         available.
>         (ix86_hard_regno_mode_ok): Put HFmode in sse register and gpr.
>         (ix86_libgcc_floating_mode_supported_p): Enable _FLoat16 under
>         sse2.
>         (ix86_scalar_mode_supported_p): Ditto.
>         (TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P): Defined.
>         (ix86_get_excess_precision): Return
>         FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 under sse2.
>         * config/i386/i386.h (VALID_SSE2_REG_MODE): Add HFmode.
>         * config/i386/i386.md (*pushhf_rex64): New define_insn.
>         (*pushhf): Ditto.
>         (*movhf_internal): Ditto.
>         * doc/extend.texi (Half-Precision Floating Point): Documemt
>         _Float16 for x86.
>
> gcc/lto/ChangeLog:
>
>         * lto-lang.c (lto_type_for_mode): Return float16_type_node
>         when mode == TYPE_MODE (float16_type_node).
>
> gcc/testsuite/ChangeLog
>
>         * gcc.target/i386/sse2-float16-1.c: New test.
>         * gcc.target/i386/sse2-float16-2.c: Ditto.
>         * gcc.target/i386/sse2-float16-3.c: Ditto.


OK for the x86 part with some small changes inline.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-modes.def                |   1 +
>  gcc/config/i386/i386.c                        |  99 ++++++++++++++-
>  gcc/config/i386/i386.h                        |   2 +-
>  gcc/config/i386/i386.md                       | 118 +++++++++++++++++-
>  gcc/doc/extend.texi                           |  16 +++
>  gcc/lto/lto-lang.c                            |   3 +
>  .../gcc.target/i386/sse2-float16-1.c          |   8 ++
>  .../gcc.target/i386/sse2-float16-2.c          |  16 +++
>  .../gcc.target/i386/sse2-float16-3.c          |  12 ++
>  9 files changed, 265 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-3.c
>
> diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
> index 4e7014be034..9232f59a925 100644
> --- a/gcc/config/i386/i386-modes.def
> +++ b/gcc/config/i386/i386-modes.def
> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
>
>  FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);
>  FLOAT_MODE (TF, 16, ieee_quad_format);
> +FLOAT_MODE (HF, 2, ieee_half_format);
>
>  /* In ILP32 mode, XFmode has size 12 and alignment 4.
>     In LP64 mode, XFmode has size and alignment 16.  */
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index ff96134fb37..02628d838fc 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -387,6 +387,7 @@ enum x86_64_reg_class
>      X86_64_INTEGER_CLASS,
>      X86_64_INTEGERSI_CLASS,
>      X86_64_SSE_CLASS,
> +    X86_64_SSEHF_CLASS,
>      X86_64_SSESF_CLASS,
>      X86_64_SSEDF_CLASS,
>      X86_64_SSEUP_CLASS,
> @@ -2023,8 +2024,10 @@ merge_classes (enum x86_64_reg_class class1, enum 
> x86_64_reg_class class2)
>      return X86_64_MEMORY_CLASS;
>
>    /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
> -  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
> -      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
> +  if ((class1 == X86_64_INTEGERSI_CLASS
> +       && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
> +      || (class2 == X86_64_INTEGERSI_CLASS
> +         && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
>      return X86_64_INTEGERSI_CLASS;
>    if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
>        || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
> @@ -2178,6 +2181,8 @@ classify_argument (machine_mode mode, const_tree type,
>             /* The partial classes are now full classes.  */
>             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
>               subclasses[0] = X86_64_SSE_CLASS;
> +           if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
> +             subclasses[0] = X86_64_SSE_CLASS;
>             if (subclasses[0] == X86_64_INTEGERSI_CLASS
>                 && !((bit_offset % 64) == 0 && bytes == 4))
>               subclasses[0] = X86_64_INTEGER_CLASS;
> @@ -2350,6 +2355,12 @@ classify_argument (machine_mode mode, const_tree type,
>        gcc_unreachable ();
>      case E_CTImode:
>        return 0;
> +    case E_HFmode:
> +      if (!(bit_offset % 64))
> +       classes[0] = X86_64_SSEHF_CLASS;
> +      else
> +       classes[0] = X86_64_SSE_CLASS;
> +      return 1;
>      case E_SFmode:
>        if (!(bit_offset % 64))
>         classes[0] = X86_64_SSESF_CLASS;
> @@ -2367,6 +2378,15 @@ classify_argument (machine_mode mode, const_tree type,
>        classes[0] = X86_64_SSE_CLASS;
>        classes[1] = X86_64_SSEUP_CLASS;
>        return 2;
> +    case E_HCmode:
> +      classes[0] = X86_64_SSE_CLASS;
> +      if (!(bit_offset % 64))
> +       return 1;
> +      else
> +       {
> +         classes[1] = X86_64_SSEHF_CLASS;
> +         return 2;
> +       }
>      case E_SCmode:
>        classes[0] = X86_64_SSE_CLASS;
>        if (!(bit_offset % 64))
> @@ -2481,6 +2501,7 @@ examine_argument (machine_mode mode, const_tree type, 
> int in_return,
>         (*int_nregs)++;
>         break;
>        case X86_64_SSE_CLASS:
> +      case X86_64_SSEHF_CLASS:
>        case X86_64_SSESF_CLASS:
>        case X86_64_SSEDF_CLASS:
>         (*sse_nregs)++;
> @@ -2580,13 +2601,14 @@ construct_container (machine_mode mode, machine_mode 
> orig_mode,
>
>    /* First construct simple cases.  Avoid SCmode, since we want to use
>       single register to pass this type.  */
> -  if (n == 1 && mode != SCmode)
> +  if (n == 1 && mode != SCmode && mode != HCmode)
>      switch (regclass[0])
>        {
>        case X86_64_INTEGER_CLASS:
>        case X86_64_INTEGERSI_CLASS:
>         return gen_rtx_REG (mode, intreg[0]);
>        case X86_64_SSE_CLASS:
> +      case X86_64_SSEHF_CLASS:
>        case X86_64_SSESF_CLASS:
>        case X86_64_SSEDF_CLASS:
>         if (mode != BLKmode)
> @@ -2683,6 +2705,14 @@ construct_container (machine_mode mode, machine_mode 
> orig_mode,
>                                    GEN_INT (i*8));
>             intreg++;
>             break;
> +         case X86_64_SSEHF_CLASS:
> +           exp [nexps++]
> +             = gen_rtx_EXPR_LIST (VOIDmode,
> +                                  gen_rtx_REG (HFmode,
> +                                               GET_SSE_REGNO (sse_regno)),
> +                                  GEN_INT (i*8));
> +           sse_regno++;
> +           break;
>           case X86_64_SSESF_CLASS:
>             exp [nexps++]
>               = gen_rtx_EXPR_LIST (VOIDmode,
> @@ -3903,6 +3933,19 @@ function_value_32 (machine_mode orig_mode, 
> machine_mode mode,
>      /* Most things go in %eax.  */
>      regno = AX_REG;
>
> +  /* Return _Float16/_Complex _Foat16 by sse register.  */
> +  if (mode == HFmode)
> +    regno = FIRST_SSE_REG;
> +  if (mode == HCmode)
> +    {
> +      rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
> +      XVECEXP (ret, 0, 0)
> +       = gen_rtx_EXPR_LIST (VOIDmode,
> +                            gen_rtx_REG (SImode, FIRST_SSE_REG),
> +                            GEN_INT (0));
> +      return ret;
> +    }
> +
>    /* Override FP return register with %xmm0 for local functions when
>       SSE math is enabled or for functions with sseregparm attribute.  */
>    if ((fn || fntype) && (mode == SFmode || mode == DFmode))
> @@ -3939,6 +3982,8 @@ function_value_64 (machine_mode orig_mode, machine_mode 
> mode,
>
>        switch (mode)
>         {
> +       case E_HFmode:
> +       case E_HCmode:
>         case E_SFmode:
>         case E_SCmode:
>         case E_DFmode:
> @@ -13411,6 +13456,15 @@ ix86_print_operand (FILE *file, rtx x, int code)
>           (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
>      }
>
> +  else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
> +    {
> +      long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
> +                              REAL_MODE_FORMAT (HFmode));
> +      if (ASSEMBLER_DIALECT == ASM_ATT)
> +       putc ('$', file);
> +      fprintf (file, "0x%04x", (unsigned int) l);
> +    }
> +
>    else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
>      {
>        long l;
> @@ -18928,6 +18982,16 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t 
> rclass,
>        return NO_REGS;
>      }
>
> +  /* Require movement to gpr, and then store to memory.  */
> +  if (mode == HFmode
> +      && !TARGET_SSE4_1
> +      && SSE_CLASS_P (rclass)
> +      && !in_p && MEM_P (x))
> +    {
> +      sri->extra_cost = 1;
> +      return GENERAL_REGS;
> +    }
> +
>    /* This condition handles corner case where an expression involving
>       pointers gets vectorized.  We're trying to use the address of a
>       stack slot as a vector initializer.
> @@ -19546,6 +19610,8 @@ ix86_hard_regno_mode_ok (unsigned int regno, 
> machine_mode mode)
>    else if (VALID_INT_MODE_P (mode)
>            || VALID_FP_MODE_P (mode))
>      return true;
> +  else if (mode == HFmode || mode == HCmode)
> +    return true;

Please add these two modes to VALID_INT_MODE_P instead.

>    /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
>       on to use that value in smaller contexts, this can easily force a
>       pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
> @@ -21555,10 +21621,27 @@ ix86_scalar_mode_supported_p (scalar_mode mode)
>      return default_decimal_float_supported_p ();
>    else if (mode == TFmode)
>      return true;
> +  else if (mode == HFmode && TARGET_SSE2)
> +    return true;
>    else
>      return default_scalar_mode_supported_p (mode);
>  }
>
> +/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
> +   if MODE is HFmode, and punt to the generic implementation otherwise.  */
> +
> +static bool
> +ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
> +{
> +  /* NB: Always return TRUE for HFmode so that the _Float16 type will
> +     be defined by the C front-end for AVX512FP16 intrinsics.  We will
> +     issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
> +     enabled.  */
> +  return ((mode == HFmode && TARGET_SSE2)
> +         ? true
> +         : default_libgcc_floating_mode_supported_p (mode));
> +}
> +
>  /* Implements target hook vector_mode_supported_p.  */
>  static bool
>  ix86_vector_mode_supported_p (machine_mode mode)
> @@ -23254,13 +23337,15 @@ ix86_get_excess_precision (enum 
> excess_precision_type type)
>            provide would be identical were it not for the unpredictable
>            cases.  */
>         if (!TARGET_80387)
> -         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
> +         return TARGET_SSE2
> +                ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
> +                : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
>         else if (!TARGET_MIX_SSE_I387)
>           {
>             if (!(TARGET_SSE && TARGET_SSE_MATH))
>               return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
>             else if (TARGET_SSE2)
> -             return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
> +             return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
>           }
>
>         /* If we are in standards compliant mode, but we know we will
> @@ -23820,6 +23905,10 @@ ix86_run_selftests (void)
>  #undef TARGET_SCALAR_MODE_SUPPORTED_P
>  #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
>
> +#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
> +#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P        \
> +ix86_libgcc_floating_mode_supported_p
> +
>  #undef TARGET_VECTOR_MODE_SUPPORTED_P
>  #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 0c2c93daf32..e21922e8782 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1018,7 +1018,7 @@ extern const char *host_detect_local_cpu (int argc, 
> const char **argv);
>  #define VALID_SSE2_REG_MODE(MODE)                                      \
>    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode     \
>     || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode   \
> -   || (MODE) == V2DImode || (MODE) == DFmode)
> +   || (MODE) == V2DImode || (MODE) == DFmode || (MODE) == HFmode)
>
>  #define VALID_SSE_REG_MODE(MODE)                                       \
>    ((MODE) == V1TImode || (MODE) == TImode                              \
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 8b809c49fe0..dd991c3ffdf 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -1222,6 +1222,9 @@ (define_mode_iterator MODEF [SF DF])
>  ;; All x87 floating point modes
>  (define_mode_iterator X87MODEF [SF DF XF])
>
> +;; All x87 floating point modes plus HF
> +(define_mode_iterator X87MODEFH [SF DF XF HF])
> +
>  ;; All SSE floating point modes
>  (define_mode_iterator SSEMODEF [SF DF TF])
>  (define_mode_attr ssevecmodef [(SF "V4SF") (DF "V2DF") (TF "TF")])
> @@ -3130,6 +3133,32 @@ (define_split
>    operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
>  })
>
> +(define_insn "*pushhf_rex64"
> +  [(set (match_operand:HF 0 "push_operand" "=X,X")
> +       (match_operand:HF 1 "nonmemory_no_elim_operand" "r,x"))]
> +  "TARGET_64BIT"
> +{
> +  /* Anything else should be already split before reg-stack.  */
> +  gcc_assert (which_alternative == 0);
> +  return "push{q}\t%q1";
> +}
> +  [(set_attr "type" "push,multi")
> +   (set_attr "mode" "DI,TI")
> +   (set_attr "isa"  "*,sse4")])

Please always put "isa" attribute first, as is the case with other
insn patterns.

> +(define_insn "*pushhf"
> +  [(set (match_operand:HF 0 "push_operand" "=X,X")
> +       (match_operand:HF 1 "general_no_elim_operand" "rmF,x"))]
> +  "!TARGET_64BIT"
> +{
> +  /* Anything else should be already split before reg-stack.  */
> +  gcc_assert (which_alternative == 0);
> +  return "push{l}\t%k1";
> +}
> +  [(set_attr "type" "push,multi")
> +   (set_attr "mode" "SI,TI")
> +   (set_attr "isa"  "*,sse4")])

Also here.

> +
>  (define_insn "*pushsf_rex64"
>    [(set (match_operand:SF 0 "push_operand" "=X,X,X")
>         (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))]
> @@ -3158,10 +3187,11 @@ (define_insn "*pushsf"
>     (set_attr "unit" "i387,*,*")
>     (set_attr "mode" "SF,SI,SF")])
>
> +(define_mode_iterator MODESH [SF HF])
>  ;; %%% Kill this when call knows how to work this out.
>  (define_split
> -  [(set (match_operand:SF 0 "push_operand")
> -       (match_operand:SF 1 "any_fp_register_operand"))]
> +  [(set (match_operand:MODESH 0 "push_operand")
> +       (match_operand:MODESH 1 "any_fp_register_operand"))]
>    "reload_completed"
>    [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
>     (set (match_dup 0) (match_dup 1))]
> @@ -3209,8 +3239,8 @@ (define_expand "movtf"
>    "ix86_expand_move (TFmode, operands); DONE;")
>
>  (define_expand "mov<mode>"
> -  [(set (match_operand:X87MODEF 0 "nonimmediate_operand")
> -       (match_operand:X87MODEF 1 "general_operand"))]
> +  [(set (match_operand:X87MODEFH 0 "nonimmediate_operand")
> +       (match_operand:X87MODEFH 1 "general_operand"))]
>    ""
>    "ix86_expand_move (<MODE>mode, operands); DONE;")
>
> @@ -3646,6 +3676,86 @@ (define_insn "*movsf_internal"
>            ]
>            (const_string "*")))])
>
> +(define_insn "*movhf_internal"
> + [(set (match_operand:HF 0 "nonimmediate_operand"
> +        "=?r,?m,v,v,?r,m,?v,v")
> +       (match_operand:HF 1 "general_operand"
> +        "rmF,rF,C,v, v,v, r,m"))]
> + "!(MEM_P (operands[0]) && MEM_P (operands[1]))
> +  && (lra_in_progress
> +      || reload_completed
> +      || !CONST_DOUBLE_P (operands[1])
> +      || (TARGET_SSE && TARGET_SSE_MATH
> +         && standard_sse_constant_p (operands[1], HFmode) == 1)
> +      || memory_operand (operands[0], HFmode))"
> +{
> +  switch (get_attr_type (insn))
> +    {
> +    case TYPE_IMOV:
> +      return "mov{w}\t{%1, %0|%0, %1}";
> +
> +    case TYPE_SSELOG1:
> +      return standard_sse_constant_opcode (insn, operands);
> +
> +    case TYPE_SSEMOV:
> +      return ix86_output_ssemov (insn, operands);
> +
> +    case TYPE_SSELOG:
> +      if (SSE_REG_P (operands[0]))
> +       return MEM_P (operands[1])
> +              ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> +              : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +      else
> +       return MEM_P (operands[1])
> +              ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> +              : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +  [(set (attr "isa")
> +       (cond [(eq_attr "alternative" "2,3,4,6,7")
> +                (const_string "sse2")
> +              (eq_attr "alternative" "5")
> +                (const_string "sse4")
> +             ]
> +             (const_string "*")))
> +   (set (attr "type")
> +       (cond [(eq_attr "alternative" "0,1")
> +                (const_string "imov")
> +              (eq_attr "alternative" "2")
> +                (const_string "sselog1")
> +              (eq_attr "alternative" "4,5,6,7")
> +                (const_string "sselog")
> +             ]
> +             (const_string "ssemov")))
> +   (set (attr "memory")
> +       (cond [(eq_attr "alternative" "4,6")
> +                (const_string "none")
> +              (eq_attr "alternative" "5")
> +                (const_string "store")
> +              (eq_attr "alternative" "7")
> +                (const_string "load")
> +             ]
> +             (const_string "*")))
> +   (set (attr "prefix")
> +       (cond [(eq_attr "alternative" "0,1")
> +                (const_string "orig")
> +             ]
> +             (const_string "maybe_vex")))
> +   (set (attr "mode")
> +       (cond [(eq_attr "alternative" "0,1")
> +                (const_string "HI")
> +              (eq_attr "alternative" "2")
> +                (const_string "V4SF")
> +              (eq_attr "alternative" "4,5,6,7")
> +                (const_string "TI")
> +              (eq_attr "alternative" "3")
> +                (const_string "SF")
> +             ]
> +             (const_string "*")))])
> +
>  (define_split
>    [(set (match_operand 0 "any_fp_register_operand")
>         (match_operand 1 "memory_operand"))]
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index b83cd4919bb..2cd0b38fe5b 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -1102,6 +1102,7 @@ typedef _Complex float __attribute__((mode(IC))) 
> _Complex_ibm128;
>  @section Half-Precision Floating Point
>  @cindex half-precision floating point
>  @cindex @code{__fp16} data type
> +@cindex @code{__Float16} data type
>
>  On ARM and AArch64 targets, GCC supports half-precision (16-bit) floating
>  point via the @code{__fp16} type defined in the ARM C Language Extensions.
> @@ -1150,6 +1151,21 @@ calls.
>  It is recommended that portable code use the @code{_Float16} type defined
>  by ISO/IEC TS 18661-3:2015.  @xref{Floating Types}.
>
> +On x86 targets with @code{target("sse2")} and above, GCC supports 
> half-precision
> +(16-bit) floating point via the @code{_Float16} type which is defined by
> +18661-3:2015. For C++, x86 provide a builtin type named @code{_Float16}
> +which contains same data format as C.
> +
> +Without @code{target("avx512fp16")} @code{_Float16} type is storage only, 
> and all
> +operations will be emulated by soft-fp and @code{float} instructions.
> +
> +Soft-fp keeps the intermediate result of the operation at 32-bit precision 
> by defaults,
> +which may lead to inconsistent behavior between soft-fp and avx512fp16 
> instructions,
> +using @option{-fexcess-precision=standard} will force round back after every 
> operation.
> +
> +With @option{-mavx512fp16}, instead of calling soft-fp, GCC automatically 
> generates
> +hardware instructions.
> +
>  @node Decimal Float
>  @section Decimal Floating Types
>  @cindex decimal floating types
> diff --git a/gcc/lto/lto-lang.c b/gcc/lto/lto-lang.c
> index c13c7e45ac1..92f499643b5 100644
> --- a/gcc/lto/lto-lang.c
> +++ b/gcc/lto/lto-lang.c
> @@ -992,6 +992,9 @@ lto_type_for_mode (machine_mode mode, int unsigned_p)
>      return unsigned_p ? unsigned_intTI_type_node : intTI_type_node;
>  #endif
>
> +  if (float16_type_node && mode == TYPE_MODE (float16_type_node))
> +    return float16_type_node;
> +
>    if (mode == TYPE_MODE (float_type_node))
>      return float_type_node;
>
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c 
> b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> new file mode 100644
> index 00000000000..1b645eb499d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-sse2" } */
> +
> +_Float16/* { dg-error "is not supported on this target" } */
> +foo (_Float16 x) /* { dg-error "is not supported on this target" } */
> +{
> +  return x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c 
> b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
> new file mode 100644
> index 00000000000..3da7683fc31
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2 -mno-avx512f" } */
> +
> +union flt
> +{
> +  _Float16 flt;
> +  short s;
> +};
> +
> +_Float16
> +foo (union flt x)
> +{
> +  return x.flt;
> +}
> +
> +/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} } } */
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-3.c 
> b/gcc/testsuite/gcc.target/i386/sse2-float16-3.c
> new file mode 100644
> index 00000000000..60ff9d4ab80
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-3.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2 -mno-avx512f" } */
> +
> +#include<complex.h>
> +
> +_Complex _Float16
> +foo (_Complex _Float16 x)
> +{
> +  return x;
> +}
> +
> +/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} } } */
> --
> 2.18.1
>

Re: [PATCH 02/10] [i386] Enable _Float16 type for TARGET_SSE2 and above.

Reply via email to