[PATCH] Enable vectorization for _Float16 floor/ceil/trunc/nearbyint/rint operations.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR target/102464 * config/i386/i386-builtin-types.def (V8HF_FTYPE_V8HF): New function type. (V16HF_FTYPE_V16HF): Ditto. (V32HF_FTYPE_V32HF): Ditto. (V8HF_FTYPE_V8HF_ROUND): Ditto. (V16HF_FTYPE_V16HF_ROUND): Ditto. (V32HF_FTYPE_V32HF_ROUND): Ditto. * config/i386/i386-builtin.def ( IX86_BUILTIN_FLOORPH, IX86_BUILTIN_CEILPH, IX86_BUILTIN_TRUNCPH, IX86_BUILTIN_FLOORPH256, IX86_BUILTIN_CEILPH256, IX86_BUILTIN_TRUNCPH256, IX86_BUILTIN_FLOORPH512, IX86_BUILTIN_CEILPH512, IX86_BUILTIN_TRUNCPH512): New builtin. * config/i386/i386-builtins.c (ix86_builtin_vectorized_function): Enable vectorization for HFmode FLOOR/CEIL/TRUNC operation. * config/i386/i386-expand.c (ix86_expand_args_builtin): Handle new builtins. * config/i386/sse.md (rint2, nearbyint2): Extend to vector HFmodes. gcc/testsuite/ChangeLog: * gcc.target/i386/pr102464-vrndscaleph.c: New test. --- gcc/config/i386/i386-builtin-types.def| 7 ++ gcc/config/i386/i386-builtin.def | 11 ++ gcc/config/i386/i386-builtins.c | 42 +++ gcc/config/i386/i386-expand.c | 3 + gcc/config/i386/sse.md| 12 +- .../gcc.target/i386/pr102464-vrndscaleph.c| 115 ++ 6 files changed, 184 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-vrndscaleph.c diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 4c355c587b5..e33f06ab30b 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1380,3 +1380,10 @@ DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI, INT) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, UHI, INT) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT) DEF_FUNCTION_TYPE (V32HF, V32HF, INT, V32HF, USI, INT) + +DEF_FUNCTION_TYPE (V8HF, V8HF) +DEF_FUNCTION_TYPE (V16HF, V16HF) +DEF_FUNCTION_TYPE (V32HF, V32HF) +DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 99217d08d37..d9eee3f373c 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -958,6 +958,10 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__buil BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_floorph", IX86_BUILTIN_FLOORPH, (enum rtx_code) ROUND_FLOOR, (int) V8HF_FTYPE_V8HF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_ceilph", IX86_BUILTIN_CEILPH, (enum rtx_code) ROUND_CEIL, (int) V8HF_FTYPE_V8HF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_truncph", IX86_BUILTIN_TRUNCPH, (enum rtx_code) ROUND_TRUNC, (int) V8HF_FTYPE_V8HF_ROUND) + BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND) @@ -1090,6 +1094,10 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia3 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf, "__builtin_ia32_floorph256", IX86_BUILTIN_FLOORPH256, (enum rtx_code) ROUND_FLOOR, (int) V16HF_FTYPE_V16HF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf, "__builtin_ia32_ceilph256", IX86_BUILTIN_CEILPH256, (enum rtx_co
Re: [PATCH] Enable vectorization for _Float16 floor/ceil/trunc/nearbyint/rint operations.
On Mon, Oct 25, 2021 at 4:24 PM liuhongt wrote: > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? > I'm going to check in this patch if there's no objection. > gcc/ChangeLog: > > PR target/102464 > * config/i386/i386-builtin-types.def (V8HF_FTYPE_V8HF): New > function type. > (V16HF_FTYPE_V16HF): Ditto. > (V32HF_FTYPE_V32HF): Ditto. > (V8HF_FTYPE_V8HF_ROUND): Ditto. > (V16HF_FTYPE_V16HF_ROUND): Ditto. > (V32HF_FTYPE_V32HF_ROUND): Ditto. > * config/i386/i386-builtin.def ( IX86_BUILTIN_FLOORPH, > IX86_BUILTIN_CEILPH, IX86_BUILTIN_TRUNCPH, > IX86_BUILTIN_FLOORPH256, IX86_BUILTIN_CEILPH256, > IX86_BUILTIN_TRUNCPH256, IX86_BUILTIN_FLOORPH512, > IX86_BUILTIN_CEILPH512, IX86_BUILTIN_TRUNCPH512): New builtin. > * config/i386/i386-builtins.c > (ix86_builtin_vectorized_function): Enable vectorization for > HFmode FLOOR/CEIL/TRUNC operation. > * config/i386/i386-expand.c (ix86_expand_args_builtin): Handle > new builtins. > * config/i386/sse.md (rint2, nearbyint2): Extend > to vector HFmodes. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr102464-vrndscaleph.c: New test. > --- > gcc/config/i386/i386-builtin-types.def| 7 ++ > gcc/config/i386/i386-builtin.def | 11 ++ > gcc/config/i386/i386-builtins.c | 42 +++ > gcc/config/i386/i386-expand.c | 3 + > gcc/config/i386/sse.md| 12 +- > .../gcc.target/i386/pr102464-vrndscaleph.c| 115 ++ > 6 files changed, 184 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-vrndscaleph.c > > diff --git a/gcc/config/i386/i386-builtin-types.def > b/gcc/config/i386/i386-builtin-types.def > index 4c355c587b5..e33f06ab30b 100644 > --- a/gcc/config/i386/i386-builtin-types.def > +++ b/gcc/config/i386/i386-builtin-types.def > @@ -1380,3 +1380,10 @@ DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI, INT) > DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, UHI, INT) > DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT) > DEF_FUNCTION_TYPE (V32HF, V32HF, INT, V32HF, USI, INT) > + > +DEF_FUNCTION_TYPE (V8HF, V8HF) > +DEF_FUNCTION_TYPE (V16HF, V16HF) > +DEF_FUNCTION_TYPE (V32HF, V32HF) > +DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND) > +DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND) > +DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND) > diff --git a/gcc/config/i386/i386-builtin.def > b/gcc/config/i386/i386-builtin.def > index 99217d08d37..d9eee3f373c 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -958,6 +958,10 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, > CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__buil > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2, > "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) > V2DF_FTYPE_V2DF) > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2_vec_pack_sfix, > "__builtin_ia32_roundpd_az_vec_pack_sfix", > IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF) > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, > CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_floorph", > IX86_BUILTIN_FLOORPH, (enum rtx_code) ROUND_FLOOR, (int) > V8HF_FTYPE_V8HF_ROUND) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, > CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_ceilph", > IX86_BUILTIN_CEILPH, (enum rtx_code) ROUND_CEIL, (int) V8HF_FTYPE_V8HF_ROUND) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, > CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_truncph", > IX86_BUILTIN_TRUNCPH, (enum rtx_code) ROUND_TRUNC, (int) > V8HF_FTYPE_V8HF_ROUND) > + > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, > "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, > (int) V4SF_FTYPE_V4SF_ROUND) > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, > "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, > (int) V4SF_FTYPE_V4SF_ROUND) > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, > "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, > (int) V4SF_FTYPE_V4SF_ROUND) > @@ -1090,6 +1094,10 @@ BDESC (OPTION_MASK_ISA_AVX, 0, > CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia3 > BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, > "__builtin_ia32_floorpd_vec_pack_sfix256", > IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) > V8SI_FTYPE_V4DF_V4DF_ROUND) > BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, > "__builtin_ia32_ceilpd_vec_pack_sfix256", > IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) > V8SI_FTYPE_V4DF_V4DF_ROUND) > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, > C
Re: [PATCH] Enable vectorization for _Float16 floor/ceil/trunc/nearbyint/rint operations.
On Thu, Oct 28, 2021 at 10:26 AM Hongtao Liu wrote: > > On Mon, Oct 25, 2021 at 4:24 PM liuhongt wrote: > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > > Ok for trunk? > > > I'm going to check in this patch if there's no objection. Committed. > > gcc/ChangeLog: > > > > PR target/102464 > > * config/i386/i386-builtin-types.def (V8HF_FTYPE_V8HF): New > > function type. > > (V16HF_FTYPE_V16HF): Ditto. > > (V32HF_FTYPE_V32HF): Ditto. > > (V8HF_FTYPE_V8HF_ROUND): Ditto. > > (V16HF_FTYPE_V16HF_ROUND): Ditto. > > (V32HF_FTYPE_V32HF_ROUND): Ditto. > > * config/i386/i386-builtin.def ( IX86_BUILTIN_FLOORPH, > > IX86_BUILTIN_CEILPH, IX86_BUILTIN_TRUNCPH, > > IX86_BUILTIN_FLOORPH256, IX86_BUILTIN_CEILPH256, > > IX86_BUILTIN_TRUNCPH256, IX86_BUILTIN_FLOORPH512, > > IX86_BUILTIN_CEILPH512, IX86_BUILTIN_TRUNCPH512): New builtin. > > * config/i386/i386-builtins.c > > (ix86_builtin_vectorized_function): Enable vectorization for > > HFmode FLOOR/CEIL/TRUNC operation. > > * config/i386/i386-expand.c (ix86_expand_args_builtin): Handle > > new builtins. > > * config/i386/sse.md (rint2, nearbyint2): Extend > > to vector HFmodes. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/pr102464-vrndscaleph.c: New test. > > --- > > gcc/config/i386/i386-builtin-types.def| 7 ++ > > gcc/config/i386/i386-builtin.def | 11 ++ > > gcc/config/i386/i386-builtins.c | 42 +++ > > gcc/config/i386/i386-expand.c | 3 + > > gcc/config/i386/sse.md| 12 +- > > .../gcc.target/i386/pr102464-vrndscaleph.c| 115 ++ > > 6 files changed, 184 insertions(+), 6 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-vrndscaleph.c > > > > diff --git a/gcc/config/i386/i386-builtin-types.def > > b/gcc/config/i386/i386-builtin-types.def > > index 4c355c587b5..e33f06ab30b 100644 > > --- a/gcc/config/i386/i386-builtin-types.def > > +++ b/gcc/config/i386/i386-builtin-types.def > > @@ -1380,3 +1380,10 @@ DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI, INT) > > DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, UHI, INT) > > DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT) > > DEF_FUNCTION_TYPE (V32HF, V32HF, INT, V32HF, USI, INT) > > + > > +DEF_FUNCTION_TYPE (V8HF, V8HF) > > +DEF_FUNCTION_TYPE (V16HF, V16HF) > > +DEF_FUNCTION_TYPE (V32HF, V32HF) > > +DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND) > > +DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND) > > +DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND) > > diff --git a/gcc/config/i386/i386-builtin.def > > b/gcc/config/i386/i386-builtin.def > > index 99217d08d37..d9eee3f373c 100644 > > --- a/gcc/config/i386/i386-builtin.def > > +++ b/gcc/config/i386/i386-builtin.def > > @@ -958,6 +958,10 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, > > CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__buil > > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2, > > "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) > > V2DF_FTYPE_V2DF) > > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2_vec_pack_sfix, > > "__builtin_ia32_roundpd_az_vec_pack_sfix", > > IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF) > > > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, > > CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_floorph", > > IX86_BUILTIN_FLOORPH, (enum rtx_code) ROUND_FLOOR, (int) > > V8HF_FTYPE_V8HF_ROUND) > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, > > CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_ceilph", > > IX86_BUILTIN_CEILPH, (enum rtx_code) ROUND_CEIL, (int) > > V8HF_FTYPE_V8HF_ROUND) > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, > > CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_truncph", > > IX86_BUILTIN_TRUNCPH, (enum rtx_code) ROUND_TRUNC, (int) > > V8HF_FTYPE_V8HF_ROUND) > > + > > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, > > "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) > > ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND) > > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, > > "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, > > (int) V4SF_FTYPE_V4SF_ROUND) > > BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, > > "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) > > ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND) > > @@ -1090,6 +1094,10 @@ BDESC (OPTION_MASK_ISA_AVX, 0, > > CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia3 > > BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, > > "__builtin_ia32_floorpd_vec_pack_sfix256", > > IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) > > V8SI_FTYPE_V4DF_V4DF_ROUND) > > BDESC (OPTION_MASK_IS