Hi: As discussed in [1], this patch support _Float16 under target sse2 and above, w/o avx512fp16, _Float16 type is storage only, all operations are emulated by soft-fp and float instructions. Soft-fp keeps the intermediate result of the operation at 32-bit precision by defaults, which may lead to inconsistent behavior between soft-fp and avx512fp16 instructions, using option -fexcess-precision=standard will force round back after every operation. [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574112.html
There's 10 patches in this series: 1) Update hf soft-fp from glibc. 2) [i386] Enable _Float16 type for TARGET_SSE2 and above. 3) [i386] libgcc: Enable hfmode soft-sf/df/xf/tf extensions and truncations. 4) AVX512FP16: Initial support for AVX512FP16 feature and scalar _Float16 instructions. 5) AVX512FP16: Support vector init/broadcast/set/extract for FP16. 6) AVX512FP16: Add testcase for vector init and broadcast intrinsics. 7) AVX512FP16: Add tests for vector passing in variable arguments. 8) AVX512FP16: Add ABI tests for xmm. 9) AVX512FP16: Add ABI test for ymm. 10) AVX512FP16: Add abi test for zmm Bootstrapped and regtested on x86_64-linux-gnu{-m32,} on CLX. Boostrappped and regtested on x86_64-linux-gnu{-m32\ -march=native,\ -march=native} on SPR. Pass 300+ new tests under gcc.dg/torture/*float16* On SPR, there're regressions related to FLT_EVAL_METHODS for pr69225-[1234567].c since TARGET_AVX512FP16 will set FLT_EVAL_MATHOD as FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16. gcc/common/config/i386/cpuinfo.h | 2 + gcc/common/config/i386/i386-common.c | 26 +- gcc/common/config/i386/i386-cpuinfo.h | 1 + gcc/common/config/i386/i386-isas.h | 1 + gcc/config.gcc | 2 +- gcc/config/i386/avx512fp16intrin.h | 225 ++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/i386-builtin-types.def | 7 +- gcc/config/i386/i386-builtins.c | 23 + gcc/config/i386/i386-c.c | 2 + gcc/config/i386/i386-expand.c | 129 +- gcc/config/i386/i386-isa.def | 1 + gcc/config/i386/i386-modes.def | 13 +- gcc/config/i386/i386-options.c | 4 +- gcc/config/i386/i386.c | 238 +++- gcc/config/i386/i386.h | 28 +- gcc/config/i386/i386.md | 304 ++++- gcc/config/i386/i386.opt | 4 + gcc/config/i386/immintrin.h | 4 + gcc/config/i386/sse.md | 395 ++++-- gcc/doc/extend.texi | 16 + gcc/doc/invoke.texi | 10 +- gcc/lto/lto-lang.c | 3 + gcc/optabs-query.c | 10 +- gcc/testsuite/g++.dg/other/i386-2.C | 2 +- gcc/testsuite/g++.dg/other/i386-3.C | 2 +- gcc/testsuite/g++.target/i386/float16-1.C | 8 + gcc/testsuite/g++.target/i386/float16-2.C | 14 + gcc/testsuite/g++.target/i386/float16-3.C | 10 + gcc/testsuite/gcc.target/i386/avx-1.c | 2 +- gcc/testsuite/gcc.target/i386/avx-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512-check.h | 3 + .../gcc.target/i386/avx512fp16-10a.c | 14 + .../gcc.target/i386/avx512fp16-10b.c | 25 + .../gcc.target/i386/avx512fp16-12a.c | 21 + .../gcc.target/i386/avx512fp16-12b.c | 27 + gcc/testsuite/gcc.target/i386/avx512fp16-1a.c | 24 + gcc/testsuite/gcc.target/i386/avx512fp16-1b.c | 32 + gcc/testsuite/gcc.target/i386/avx512fp16-1c.c | 26 + gcc/testsuite/gcc.target/i386/avx512fp16-1d.c | 33 + gcc/testsuite/gcc.target/i386/avx512fp16-1e.c | 30 + gcc/testsuite/gcc.target/i386/avx512fp16-2a.c | 28 + gcc/testsuite/gcc.target/i386/avx512fp16-2b.c | 33 + gcc/testsuite/gcc.target/i386/avx512fp16-2c.c | 36 + gcc/testsuite/gcc.target/i386/avx512fp16-3a.c | 36 + gcc/testsuite/gcc.target/i386/avx512fp16-3b.c | 35 + gcc/testsuite/gcc.target/i386/avx512fp16-3c.c | 40 + gcc/testsuite/gcc.target/i386/avx512fp16-4.c | 31 + gcc/testsuite/gcc.target/i386/avx512fp16-5.c | 133 ++ gcc/testsuite/gcc.target/i386/avx512fp16-6.c | 57 + gcc/testsuite/gcc.target/i386/avx512fp16-7.c | 86 ++ gcc/testsuite/gcc.target/i386/avx512fp16-8.c | 53 + gcc/testsuite/gcc.target/i386/avx512fp16-9a.c | 27 + gcc/testsuite/gcc.target/i386/avx512fp16-9b.c | 49 + .../gcc.target/i386/avx512fp16-vararg-1.c | 122 ++ .../gcc.target/i386/avx512fp16-vararg-2.c | 107 ++ .../gcc.target/i386/avx512fp16-vararg-3.c | 114 ++ .../gcc.target/i386/avx512fp16-vararg-4.c | 115 ++ .../gcc.target/i386/avx512fp16-vec_set_var.c | 30 + gcc/testsuite/gcc.target/i386/float16-3a.c | 10 + gcc/testsuite/gcc.target/i386/float16-3b.c | 10 + gcc/testsuite/gcc.target/i386/float16-4a.c | 10 + gcc/testsuite/gcc.target/i386/float16-4b.c | 10 + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + gcc/testsuite/gcc.target/i386/m512-check.h | 38 +- gcc/testsuite/gcc.target/i386/pr54855-12.c | 14 + gcc/testsuite/gcc.target/i386/pr54855-13.c | 14 + gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- .../gcc.target/i386/sse2-float16-1.c | 8 + .../gcc.target/i386/sse2-float16-2.c | 16 + .../gcc.target/i386/sse2-float16-3.c | 12 + .../abi/avx512fp16/abi-avx512fp16-xmm.exp | 48 + .../gcc.target/x86_64/abi/avx512fp16/args.h | 190 +++ .../x86_64/abi/avx512fp16/asm-support.S | 81 ++ .../x86_64/abi/avx512fp16/avx512fp16-check.h | 74 ++ .../abi/avx512fp16/avx512fp16-xmm-check.h | 3 + .../x86_64/abi/avx512fp16/defines.h | 150 +++ .../avx512fp16/m256h/abi-avx512fp16-ymm.exp | 45 + .../x86_64/abi/avx512fp16/m256h/args.h | 182 +++ .../x86_64/abi/avx512fp16/m256h/asm-support.S | 81 ++ .../avx512fp16/m256h/avx512fp16-ymm-check.h | 3 + .../avx512fp16/m256h/test_m256_returning.c | 54 + .../abi/avx512fp16/m256h/test_passing_m256.c | 370 ++++++ .../avx512fp16/m256h/test_passing_structs.c | 113 ++ .../avx512fp16/m256h/test_passing_unions.c | 337 ++++++ .../abi/avx512fp16/m256h/test_varargs-m256.c | 160 +++ .../avx512fp16/m512h/abi-avx512fp16-zmm.exp | 48 + .../x86_64/abi/avx512fp16/m512h/args.h | 186 +++ .../x86_64/abi/avx512fp16/m512h/asm-support.S | 97 ++ .../avx512fp16/m512h/avx512fp16-zmm-check.h | 4 + .../avx512fp16/m512h/test_m512_returning.c | 62 + .../abi/avx512fp16/m512h/test_passing_m512.c | 380 ++++++ .../avx512fp16/m512h/test_passing_structs.c | 123 ++ .../avx512fp16/m512h/test_passing_unions.c | 415 +++++++ .../abi/avx512fp16/m512h/test_varargs-m512.c | 164 +++ .../gcc.target/x86_64/abi/avx512fp16/macros.h | 53 + .../test_3_element_struct_and_unions.c | 692 +++++++++++ .../abi/avx512fp16/test_basic_alignment.c | 45 + .../test_basic_array_size_and_align.c | 43 + .../abi/avx512fp16/test_basic_returning.c | 87 ++ .../x86_64/abi/avx512fp16/test_basic_sizes.c | 43 + .../test_basic_struct_size_and_align.c | 42 + .../test_basic_union_size_and_align.c | 40 + .../abi/avx512fp16/test_complex_returning.c | 104 ++ .../abi/avx512fp16/test_m64m128_returning.c | 73 ++ .../abi/avx512fp16/test_passing_floats.c | 1066 +++++++++++++++++ .../abi/avx512fp16/test_passing_m64m128.c | 510 ++++++++ .../abi/avx512fp16/test_passing_structs.c | 332 +++++ .../abi/avx512fp16/test_passing_unions.c | 335 ++++++ .../abi/avx512fp16/test_struct_returning.c | 274 +++++ .../x86_64/abi/avx512fp16/test_varargs-m128.c | 164 +++ gcc/testsuite/lib/target-supports.exp | 13 +- libgcc/config.host | 5 +- libgcc/config/i386/32/sfp-machine.h | 1 + libgcc/config/i386/64/sfp-machine.h | 1 + libgcc/config/i386/64/t-softfp | 1 + libgcc/config/i386/sfp-machine.h | 1 + libgcc/config/i386/t-softfp | 5 + libgcc/soft-fp/eqhf2.c | 49 + libgcc/soft-fp/extendhfdf2.c | 53 + libgcc/soft-fp/extendhfsf2.c | 49 + libgcc/soft-fp/half.h | 1 + libgcc/soft-fp/truncdfhf2.c | 52 + libgcc/soft-fp/truncsfhf2.c | 48 + 127 files changed, 10324 insertions(+), 238 deletions(-) create mode 100644 gcc/config/i386/avx512fp16intrin.h create mode 100644 gcc/testsuite/g++.target/i386/float16-1.C create mode 100644 gcc/testsuite/g++.target/i386/float16-2.C create mode 100644 gcc/testsuite/g++.target/i386/float16-3.C create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-10a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-10b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-12a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-12b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-1c.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-1d.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-1e.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-2a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-2b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-2c.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-3a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-3b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-3c.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-4.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-5.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-6.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-7.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-8.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-9a.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-9b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vararg-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vararg-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vararg-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vararg-4.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vec_set_var.c create mode 100644 gcc/testsuite/gcc.target/i386/float16-3a.c create mode 100644 gcc/testsuite/gcc.target/i386/float16-3b.c create mode 100644 gcc/testsuite/gcc.target/i386/float16-4a.c create mode 100644 gcc/testsuite/gcc.target/i386/float16-4b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-12.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-13.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-3.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/abi-avx512fp16-xmm.exp create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/args.h create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/asm-support.S create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/avx512fp16-check.h create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/avx512fp16-xmm-check.h create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/defines.h create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/abi-avx512fp16-ymm.exp create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/args.h create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/asm-support.S create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/avx512fp16-ymm-check.h create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/test_m256_returning.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/test_passing_m256.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/test_passing_structs.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/test_passing_unions.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/test_varargs-m256.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/abi-avx512fp16-zmm.exp create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/args.h create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/asm-support.S create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/avx512fp16-zmm-check.h create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/test_m512_returning.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/test_passing_m512.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/test_passing_structs.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/test_passing_unions.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/test_varargs-m512.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/macros.h create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_3_element_struct_and_unions.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_basic_alignment.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_basic_array_size_and_align.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_basic_returning.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_basic_sizes.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_basic_struct_size_and_align.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_basic_union_size_and_align.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_complex_returning.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_m64m128_returning.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_passing_floats.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_passing_m64m128.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_passing_structs.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_passing_unions.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_struct_returning.c create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/test_varargs-m128.c create mode 100644 libgcc/config/i386/64/t-softfp create mode 100644 libgcc/soft-fp/eqhf2.c create mode 100644 libgcc/soft-fp/extendhfdf2.c create mode 100644 libgcc/soft-fp/extendhfsf2.c create mode 100644 libgcc/soft-fp/truncdfhf2.c create mode 100644 libgcc/soft-fp/truncsfhf2.c -- 2.18.1