Module: Mesa Branch: master Commit: 87900afe5bbe90c5f3ad0921b28ae1c889029ada URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=87900afe5bbe90c5f3ad0921b28ae1c889029ada
Author: Marek Olšák <marek.ol...@amd.com> Date: Fri Sep 18 05:21:09 2020 -0400 util: implement f16c - fast half<->float conversions This also happens to fix bptc-float-modes on llvmpipe. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6774> --- .gitlab-ci/piglit/quick_gl.txt | 5 ++-- meson.build | 13 ++++++++++ src/gallium/tests/unit/u_half_test.c | 25 +++++++++++++++----- src/util/half_float.c | 8 +++---- src/util/half_float.h | 46 ++++++++++++++++++++++++++++++++---- src/util/softfloat.c | 2 +- src/util/softfloat.h | 2 +- 7 files changed, 82 insertions(+), 19 deletions(-) diff --git a/.gitlab-ci/piglit/quick_gl.txt b/.gitlab-ci/piglit/quick_gl.txt index a24ae0d3647..104d2f6cd01 100644 --- a/.gitlab-ci/piglit/quick_gl.txt +++ b/.gitlab-ci/piglit/quick_gl.txt @@ -738,7 +738,6 @@ spec/arb_sparse_buffer/commit: skip spec/arb_sparse_buffer/minmax: skip spec/arb_tessellation_shader/arb_tessellation_shader-immediate-mode-draw-patches: skip spec/arb_texture_buffer_object/negative-unsupported: skip -spec/arb_texture_compression_bptc/bptc-float-modes: fail spec/arb_texture_cube_map/copyteximage cube samples=16: skip spec/arb_texture_cube_map/copyteximage cube samples=2: skip spec/arb_texture_cube_map/copyteximage cube samples=32: skip @@ -1656,8 +1655,8 @@ wgl/wgl-sanity: skip summary: name: results ---- -------- - pass: 23074 - fail: 198 + pass: 23075 + fail: 197 crash: 0 skip: 1433 timeout: 0 diff --git a/meson.build b/meson.build index 76f72a9df7c..b855710e787 100644 --- a/meson.build +++ b/meson.build @@ -1111,6 +1111,19 @@ else sse41_args = [] endif +if cc.has_argument('-mf16c') and cpp.has_argument('-mf16c') + pre_args += '-DUSE_F16C' + c_args += '-mf16c' + cpp_args += '-mf16c' + + # GCC on x86 (not x86_64) with -msse* assumes a 16 byte aligned stack, but + # that's not guaranteed (not sure if this also applies to -mf16c) + if host_machine.cpu_family() == 'x86' + c_args += '-mstackrealign' + cpp_args += '-mstackrealign' + endif +endif + # Check for GCC style atomics dep_atomic = null_dep diff --git a/src/gallium/tests/unit/u_half_test.c b/src/gallium/tests/unit/u_half_test.c index 48a9a2d539c..fb4ce6ec9f2 100644 --- a/src/gallium/tests/unit/u_half_test.c +++ b/src/gallium/tests/unit/u_half_test.c @@ -4,9 +4,10 @@ #include "util/u_math.h" #include "util/u_half.h" +#include "util/u_cpu_detect.h" -int -main(int argc, char **argv) +static void +test(void) { unsigned i; unsigned roundtrip_fails = 0; @@ -28,9 +29,21 @@ main(int argc, char **argv) if(roundtrip_fails) { printf("Failure! %u/65536 half floats failed a conversion to float and back.\n", roundtrip_fails); - return 1; - } else { - printf("Success!\n"); - return 0; + exit(1); } } + +int +main(int argc, char **argv) +{ + assert(!util_cpu_caps.has_f16c); + test(); + + /* Test f16c. */ + util_cpu_detect(); + if (util_cpu_caps.has_f16c) + test(); + + printf("Success!\n"); + return 0; +} diff --git a/src/util/half_float.c b/src/util/half_float.c index aae690a56a6..61b512f48ed 100644 --- a/src/util/half_float.c +++ b/src/util/half_float.c @@ -54,7 +54,7 @@ typedef union { float f; int32_t i; uint32_t u; } fi_type; * result in the same value as if the expression were executed on the GPU. */ uint16_t -_mesa_float_to_half(float val) +_mesa_float_to_half_slow(float val) { const fi_type fi = {val}; const int flt_m = fi.i & 0x7fffff; @@ -129,9 +129,9 @@ _mesa_float_to_half(float val) } uint16_t -_mesa_float_to_float16_rtz(float val) +_mesa_float_to_float16_rtz_slow(float val) { - return _mesa_float_to_half_rtz(val); + return _mesa_float_to_half_rtz_slow(val); } /** @@ -140,7 +140,7 @@ _mesa_float_to_float16_rtz(float val) * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html */ float -_mesa_half_to_float(uint16_t val) +_mesa_half_to_float_slow(uint16_t val) { return util_half_to_float(val); } diff --git a/src/util/half_float.h b/src/util/half_float.h index c9fad9a9400..6f9a405613b 100644 --- a/src/util/half_float.h +++ b/src/util/half_float.h @@ -28,6 +28,14 @@ #include <stdbool.h> #include <stdint.h> +#include <string.h> +#include "util/u_cpu_detect.h" + +#ifdef USE_F16C +#include <immintrin.h> +#define F16C_NEAREST 0 +#define F16C_TRUNCATE 3 +#endif #ifdef __cplusplus extern "C" { @@ -36,18 +44,48 @@ extern "C" { #define FP16_ONE ((uint16_t) 0x3c00) #define FP16_ZERO ((uint16_t) 0) -uint16_t _mesa_float_to_half(float val); -float _mesa_half_to_float(uint16_t val); +uint16_t _mesa_float_to_half_slow(float val); +float _mesa_half_to_float_slow(uint16_t val); uint8_t _mesa_half_to_unorm8(uint16_t v); uint16_t _mesa_uint16_div_64k_to_half(uint16_t v); /* - * _mesa_float_to_float16_rtz is no more than a wrapper to the counterpart + * _mesa_float_to_float16_rtz_slow is no more than a wrapper to the counterpart * softfloat.h call. Still, softfloat.h conversion API is meant to be kept * private. In other words, only use the API published here, instead of * calling directly the softfloat.h one. */ -uint16_t _mesa_float_to_float16_rtz(float val); +uint16_t _mesa_float_to_float16_rtz_slow(float val); + +static inline uint16_t +_mesa_float_to_half(float val) +{ +#ifdef USE_F16C + if (util_cpu_caps.has_f16c) + return _cvtss_sh(val, F16C_NEAREST); +#endif + return _mesa_float_to_half_slow(val); +} + +static inline float +_mesa_half_to_float(uint16_t val) +{ +#ifdef USE_F16C + if (util_cpu_caps.has_f16c) + return _cvtsh_ss(val); +#endif + return _mesa_half_to_float_slow(val); +} + +static inline uint16_t +_mesa_float_to_float16_rtz(float val) +{ +#ifdef USE_F16C + if (util_cpu_caps.has_f16c) + return _cvtss_sh(val, F16C_TRUNCATE); +#endif + return _mesa_float_to_float16_rtz_slow(val); +} static inline uint16_t _mesa_float_to_float16_rtne(float val) diff --git a/src/util/softfloat.c b/src/util/softfloat.c index 365b15bbf0c..50cf098fd9f 100644 --- a/src/util/softfloat.c +++ b/src/util/softfloat.c @@ -1435,7 +1435,7 @@ _mesa_double_to_f32(double val, bool rtz) * From f32_to_f16() */ uint16_t -_mesa_float_to_half_rtz(float val) +_mesa_float_to_half_rtz_slow(float val) { const fi_type fi = {val}; const uint32_t flt_m = fi.u & 0x7fffff; diff --git a/src/util/softfloat.h b/src/util/softfloat.h index 4e48c6548b9..2e254e29892 100644 --- a/src/util/softfloat.h +++ b/src/util/softfloat.h @@ -56,7 +56,7 @@ double _mesa_double_mul_rtz(double a, double b); double _mesa_double_fma_rtz(double a, double b, double c); float _mesa_float_fma_rtz(float a, float b, float c); float _mesa_double_to_f32(double x, bool rtz); -uint16_t _mesa_float_to_half_rtz(float x); +uint16_t _mesa_float_to_half_rtz_slow(float x); #ifdef __cplusplus } /* extern C */ _______________________________________________ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit