Re: [Mesa-dev] [PATCH v4] Fixing an x86 FPU bug.
Looks better, just a bunch of nit-picks... First, I think the summary/subject line can be improved. How about mesa: use fi_type in vertex attribute code On 02/18/2015 10:00 AM, marius.pre...@intel.com wrote: From: Marius Predut marius.pre...@intel.com On 32-bit, for floating point operations is used x86 FPU registers instead SSE, reason for when reinterprets an integer as a float result is unexpected (modify floats when they are written to memory). How about this: For 32-bit builds, floating point operations use x86 FPU registers, not SSE registers. If we're actually storing an integer in a float variable, the value might get modified when written to memory. This patch changes the VBO code to use the fi_type (float/int union) to store/copy vertex attributes. The patch was checked with and without -O3 compiler flag. Also, it add performace improvement because treat GLfloats as GLint. On x86 systems, moving a float as a int (thereby using integer registers instead of FP registers) is a performance win. Maybe: Also, this can improve performance on x86 because moving floats with integer registers instead of FP registers is faster. Neil Roberts review: -include changes on all places that are storing attribute values. Brian Paul review: - use fi_type type instead gl_constant_value Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82668 Signed-off-by: Marius Predut marius.pre...@intel.com --- src/mesa/main/context.c |3 ++- src/mesa/main/macros.h| 32 src/mesa/vbo/vbo_attrib_tmp.h | 20 src/mesa/vbo/vbo_context.h| 14 +++--- src/mesa/vbo/vbo_exec.h | 11 ++- src/mesa/vbo/vbo_exec_api.c | 35 +-- src/mesa/vbo/vbo_exec_draw.c |6 +++--- src/mesa/vbo/vbo_exec_eval.c | 22 +++--- src/mesa/vbo/vbo_save.h | 16 src/mesa/vbo/vbo_save_api.c | 34 +- src/mesa/vbo/vbo_save_draw.c |4 ++-- 11 files changed, 105 insertions(+), 92 deletions(-) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 63d30a2..f0597e2 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -134,6 +134,7 @@ #include math/m_matrix.h #include main/dispatch.h /* for _gloffset_COUNT */ #include uniforms.h +#include macros.h #ifdef USE_SPARC_ASM #include sparc/sparc.h @@ -656,7 +657,7 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api) consts-MaxSamples = 0; /* GLSL default if NativeIntegers == FALSE */ - consts-UniformBooleanTrue = FLT_AS_UINT(1.0f); + consts-UniformBooleanTrue = FLOAT_AS_UNION(1.0f).u; /* GL_ARB_sync */ consts-MaxServerWaitTimeout = 0x1fff7fffULL; diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h index 2d59c6f..9ca3460 100644 --- a/src/mesa/main/macros.h +++ b/src/mesa/main/macros.h @@ -170,25 +170,25 @@ extern GLfloat _mesa_ubyte_to_float_color_tab[256]; ub = ((GLubyte) F_TO_I((f) * 255.0F)) #endif -static inline GLfloat INT_AS_FLT(GLint i) +static fi_type UINT_AS_UNION(GLuint u) { fi_type tmp; - tmp.i = i; - return tmp.f; + tmp.u = u; + return tmp; } -static inline GLfloat UINT_AS_FLT(GLuint u) +static inline fi_type INT_AS_UNION(GLint i) { fi_type tmp; - tmp.u = u; - return tmp.f; + tmp.i = i; + return tmp; } -static inline unsigned FLT_AS_UINT(float f) +static inline fi_type FLOAT_AS_UNION(GLfloat f) { fi_type tmp; tmp.f = f; - return tmp.u; + return tmp; } /** @@ -620,24 +620,24 @@ do { \ * The default values are chosen based on \p type. */ static inline void -COPY_CLEAN_4V_TYPE_AS_FLOAT(GLfloat dst[4], int sz, const GLfloat src[4], +COPY_CLEAN_4V_TYPE_AS_UNION(fi_type dst[4], int sz, const fi_type src[4], GLenum type) { switch (type) { case GL_FLOAT: - ASSIGN_4V(dst, 0, 0, 0, 1); + ASSIGN_4V(dst, FLOAT_AS_UNION(0), FLOAT_AS_UNION(0), FLOAT_AS_UNION(0), FLOAT_AS_UNION(1)); Wrap to = 78 columns. break; case GL_INT: - ASSIGN_4V(dst, INT_AS_FLT(0), INT_AS_FLT(0), - INT_AS_FLT(0), INT_AS_FLT(1)); + ASSIGN_4V(dst, INT_AS_UNION(0), INT_AS_UNION(0), +INT_AS_UNION(0), INT_AS_UNION(1)); break; case GL_UNSIGNED_INT: - ASSIGN_4V(dst, UINT_AS_FLT(0), UINT_AS_FLT(0), - UINT_AS_FLT(0), UINT_AS_FLT(1)); + ASSIGN_4V(dst, UINT_AS_UNION(0), UINT_AS_UNION(0), +UINT_AS_UNION(0), UINT_AS_UNION(1)); break; default: - ASSIGN_4V(dst, 0.0f, 0.0f, 0.0f, 1.0f); /* silence warnings */ - ASSERT(!Unexpected type in COPY_CLEAN_4V_TYPE_AS_FLOAT macro); + ASSIGN_4V(dst, FLOAT_AS_UNION(0), FLOAT_AS_UNION(0), FLOAT_AS_UNION(0), FLOAT_AS_UNION(1)); /* silence warnings */ Line wrap. +
[Mesa-dev] [PATCH v4] Fixing an x86 FPU bug.
From: Marius Predut marius.pre...@intel.com On 32-bit, for floating point operations is used x86 FPU registers instead SSE, reason for when reinterprets an integer as a float result is unexpected (modify floats when they are written to memory). The patch was checked with and without -O3 compiler flag. Also, it add performace improvement because treat GLfloats as GLint. On x86 systems, moving a float as a int (thereby using integer registers instead of FP registers) is a performance win. Neil Roberts review: -include changes on all places that are storing attribute values. Brian Paul review: - use fi_type type instead gl_constant_value Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82668 Signed-off-by: Marius Predut marius.pre...@intel.com --- src/mesa/main/context.c |3 ++- src/mesa/main/macros.h| 32 src/mesa/vbo/vbo_attrib_tmp.h | 20 src/mesa/vbo/vbo_context.h| 14 +++--- src/mesa/vbo/vbo_exec.h | 11 ++- src/mesa/vbo/vbo_exec_api.c | 35 +-- src/mesa/vbo/vbo_exec_draw.c |6 +++--- src/mesa/vbo/vbo_exec_eval.c | 22 +++--- src/mesa/vbo/vbo_save.h | 16 src/mesa/vbo/vbo_save_api.c | 34 +- src/mesa/vbo/vbo_save_draw.c |4 ++-- 11 files changed, 105 insertions(+), 92 deletions(-) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 63d30a2..f0597e2 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -134,6 +134,7 @@ #include math/m_matrix.h #include main/dispatch.h /* for _gloffset_COUNT */ #include uniforms.h +#include macros.h #ifdef USE_SPARC_ASM #include sparc/sparc.h @@ -656,7 +657,7 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api) consts-MaxSamples = 0; /* GLSL default if NativeIntegers == FALSE */ - consts-UniformBooleanTrue = FLT_AS_UINT(1.0f); + consts-UniformBooleanTrue = FLOAT_AS_UNION(1.0f).u; /* GL_ARB_sync */ consts-MaxServerWaitTimeout = 0x1fff7fffULL; diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h index 2d59c6f..9ca3460 100644 --- a/src/mesa/main/macros.h +++ b/src/mesa/main/macros.h @@ -170,25 +170,25 @@ extern GLfloat _mesa_ubyte_to_float_color_tab[256]; ub = ((GLubyte) F_TO_I((f) * 255.0F)) #endif -static inline GLfloat INT_AS_FLT(GLint i) +static fi_type UINT_AS_UNION(GLuint u) { fi_type tmp; - tmp.i = i; - return tmp.f; + tmp.u = u; + return tmp; } -static inline GLfloat UINT_AS_FLT(GLuint u) +static inline fi_type INT_AS_UNION(GLint i) { fi_type tmp; - tmp.u = u; - return tmp.f; + tmp.i = i; + return tmp; } -static inline unsigned FLT_AS_UINT(float f) +static inline fi_type FLOAT_AS_UNION(GLfloat f) { fi_type tmp; tmp.f = f; - return tmp.u; + return tmp; } /** @@ -620,24 +620,24 @@ do { \ * The default values are chosen based on \p type. */ static inline void -COPY_CLEAN_4V_TYPE_AS_FLOAT(GLfloat dst[4], int sz, const GLfloat src[4], +COPY_CLEAN_4V_TYPE_AS_UNION(fi_type dst[4], int sz, const fi_type src[4], GLenum type) { switch (type) { case GL_FLOAT: - ASSIGN_4V(dst, 0, 0, 0, 1); + ASSIGN_4V(dst, FLOAT_AS_UNION(0), FLOAT_AS_UNION(0), FLOAT_AS_UNION(0), FLOAT_AS_UNION(1)); break; case GL_INT: - ASSIGN_4V(dst, INT_AS_FLT(0), INT_AS_FLT(0), - INT_AS_FLT(0), INT_AS_FLT(1)); + ASSIGN_4V(dst, INT_AS_UNION(0), INT_AS_UNION(0), +INT_AS_UNION(0), INT_AS_UNION(1)); break; case GL_UNSIGNED_INT: - ASSIGN_4V(dst, UINT_AS_FLT(0), UINT_AS_FLT(0), - UINT_AS_FLT(0), UINT_AS_FLT(1)); + ASSIGN_4V(dst, UINT_AS_UNION(0), UINT_AS_UNION(0), +UINT_AS_UNION(0), UINT_AS_UNION(1)); break; default: - ASSIGN_4V(dst, 0.0f, 0.0f, 0.0f, 1.0f); /* silence warnings */ - ASSERT(!Unexpected type in COPY_CLEAN_4V_TYPE_AS_FLOAT macro); + ASSIGN_4V(dst, FLOAT_AS_UNION(0), FLOAT_AS_UNION(0), FLOAT_AS_UNION(0), FLOAT_AS_UNION(1)); /* silence warnings */ + ASSERT(!Unexpected type in COPY_CLEAN_4V_TYPE_AS_UNION macro); } COPY_SZ_4V(dst, sz, src); } diff --git a/src/mesa/vbo/vbo_attrib_tmp.h b/src/mesa/vbo/vbo_attrib_tmp.h index ec66934..17a0a10 100644 --- a/src/mesa/vbo/vbo_attrib_tmp.h +++ b/src/mesa/vbo/vbo_attrib_tmp.h @@ -28,6 +28,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include util/u_format_r11g11b10f.h #include main/varray.h + +/* ATTR */ +#define ATTR( A, N, T, V0, V1, V2, V3 ) ATTR_##T((A), (N), (T), (V0), (V1), (V2), (V3)) + +#define ATTR_GL_UNSIGNED_INT( A, N, T, V0, V1, V2, V3 ) \ +ATTR_UNION(A, N, T, UINT_AS_UNION(V0), UINT_AS_UNION(V1), UINT_AS_UNION(V2), UINT_AS_UNION(V3)) +#define ATTR_GL_INT( A, N, T, V0, V1, V2, V3 ) \ +ATTR_UNION(A, N, T,