[Mesa-dev] [PATCH] egl: EXT_pixel_format_float plumbing
Patch adds support and capability to match with new surface attribute, component type. Currently no configs with floating point type are exposed. With this change, following dEQP test starts to pass: dEQP-EGL.functional.choose_config.color_component_type_ext.dont_care dEQP-EGL.functional.choose_config.color_component_type_ext.fixed dEQP-EGL.functional.choose_config.color_component_type_ext.float Signed-off-by: Tapani Pälli--- src/egl/drivers/dri2/egl_dri2.c | 2 ++ src/egl/main/eglapi.c | 1 + src/egl/main/eglconfig.c| 9 + src/egl/main/eglconfig.h| 2 ++ src/egl/main/egldisplay.h | 1 + 5 files changed, 15 insertions(+) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index b486be7487..a511081df8 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -695,6 +695,8 @@ dri2_setup_screen(_EGLDisplay *disp) dri2_renderer_query_integer(dri2_dpy, __DRI2_RENDERER_HAS_CONTEXT_PRIORITY); + disp->Extensions.EXT_pixel_format_float = EGL_TRUE; + if (dri2_renderer_query_integer(dri2_dpy, __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB)) disp->Extensions.KHR_gl_colorspace = EGL_TRUE; diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 76dabba2eb..c1bf5bbfe1 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -514,6 +514,7 @@ _eglCreateExtensionsString(_EGLDisplay *dpy) _EGL_CHECK_EXTENSION(KHR_surfaceless_context); if (dpy->Extensions.EXT_swap_buffers_with_damage) _eglAppendExtension(, "EGL_KHR_swap_buffers_with_damage"); + _EGL_CHECK_EXTENSION(EXT_pixel_format_float); _EGL_CHECK_EXTENSION(KHR_wait_sync); if (dpy->Extensions.KHR_no_config_context) diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c index f057b0bfe8..2d3b3ddd90 100644 --- a/src/egl/main/eglconfig.c +++ b/src/egl/main/eglconfig.c @@ -68,6 +68,7 @@ _eglInitConfig(_EGLConfig *conf, _EGLDisplay *dpy, EGLint id) conf->TransparentType = EGL_NONE; conf->NativeVisualType = EGL_NONE; conf->ColorBufferType = EGL_RGB_BUFFER; + conf->ComponentType = EGL_COLOR_COMPONENT_TYPE_FIXED_EXT; } @@ -254,6 +255,9 @@ static const struct { { EGL_RECORDABLE_ANDROID,ATTRIB_TYPE_BOOLEAN, ATTRIB_CRITERION_EXACT, EGL_DONT_CARE }, + { EGL_COLOR_COMPONENT_TYPE_EXT, ATTRIB_TYPE_ENUM, +ATTRIB_CRITERION_EXACT, +EGL_COLOR_COMPONENT_TYPE_FIXED_EXT }, }; @@ -316,6 +320,11 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching) if (val != EGL_RGB_BUFFER && val != EGL_LUMINANCE_BUFFER) valid = EGL_FALSE; break; + case EGL_COLOR_COMPONENT_TYPE_EXT: +if (val != EGL_COLOR_COMPONENT_TYPE_FIXED_EXT && +val != EGL_COLOR_COMPONENT_TYPE_FLOAT_EXT) + valid = EGL_FALSE; +break; default: assert(0); break; diff --git a/src/egl/main/eglconfig.h b/src/egl/main/eglconfig.h index 22da697e83..21f0a28412 100644 --- a/src/egl/main/eglconfig.h +++ b/src/egl/main/eglconfig.h @@ -88,6 +88,7 @@ struct _egl_config EGLint YInvertedNOK; EGLint FramebufferTargetAndroid; EGLint RecordableAndroid; + EGLint ComponentType; }; @@ -137,6 +138,7 @@ _eglOffsetOfConfig(EGLint attr) ATTRIB_MAP(EGL_Y_INVERTED_NOK,YInvertedNOK); ATTRIB_MAP(EGL_FRAMEBUFFER_TARGET_ANDROID, FramebufferTargetAndroid); ATTRIB_MAP(EGL_RECORDABLE_ANDROID,RecordableAndroid); + ATTRIB_MAP(EGL_COLOR_COMPONENT_TYPE_EXT, ComponentType); #undef ATTRIB_MAP default: return -1; diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 981cbf4ca0..0f57680b21 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -103,6 +103,7 @@ struct _egl_extensions EGLBoolean EXT_create_context_robustness; EGLBoolean EXT_image_dma_buf_import; EGLBoolean EXT_image_dma_buf_import_modifiers; + EGLBoolean EXT_pixel_format_float; EGLBoolean EXT_swap_buffers_with_damage; unsigned int IMG_context_priority; -- 2.13.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Enable disk shader cache by default
On 2017-11-08 17:26:47, Timothy Arceri wrote: > Reviewed-by: Timothy Arceri> > Mark may want to consider adding some of the once a day type CI runs for > this. For example running the test suite for two consecutive runs on the > same build so that the second run uses the shader cache and also a > second run the uses MESA_GLSL=cache_fb to force testing of the cache > fallback path. Yeah. We discussed this previously, but I don't think it's been implemented yet. My opinion is that it could perhaps be a weekly test. We also discussed a nir serialization test, similar to our current nir clone daily test. I don't think this is implemented yet either. -Jordan > > On 09/11/17 11:58, Jordan Justen wrote: > > f9d5a7add42af5a2e4410526d1480a08f41317ae along with > > a16dc04ad51c32e5c7d136e4dd6273d983385d3f appears to have fixed the one > > known regression with shader cache. (Deus Ex instability.) > > > > We should enable the shader cache by default to stabilize it before > > the next major Mesa release. > > > > Signed-off-by: Jordan Justen > > --- > > docs/relnotes/17.4.0.html | 2 +- > > src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 --- > > 2 files changed, 1 insertion(+), 4 deletions(-) > > > > diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html > > index f81b5bd62d3..48dcd5cce38 100644 > > --- a/docs/relnotes/17.4.0.html > > +++ b/docs/relnotes/17.4.0.html > > @@ -44,7 +44,7 @@ Note: some of the new features are only available with > > certain drivers. > > > > > > > > -Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE > > environment variable is set to "0" or "false" > > +Disk shader cache support for i965 > > > > > > Bug fixes > > diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c > > b/src/mesa/drivers/dri/i965/brw_disk_cache.c > > index 853ea98af03..cd0524c5cbf 100644 > > --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c > > +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c > > @@ -420,9 +420,6 @@ void > > brw_disk_cache_init(struct brw_context *brw) > > { > > #ifdef ENABLE_SHADER_CACHE > > - if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", true)) > > - return; > > - > > char renderer[10]; > > MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), > > "i965_%04x", > > brw->screen->deviceID); > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] glsl/linker: location aliasing requires types to have the same width
Hi Ilia, are you okay with this version of the patch? Iago On Tue, 2017-11-07 at 10:50 +0100, Iago Toral Quiroga wrote: > Regarding location aliasing requirements, the OpenGL spec says: > > "Further, when location aliasing, the aliases sharing the location > must have the same underlying numerical type (floating-point or > integer)." > > Khronos has further clarified that this also requires the underlying > types to have the same width, so we can't put a float and a double > in the same location slot for example. Future versions of the spec > will > be corrected to make this clear. > > This patch amends our implementation to account for this restriction. > > In the process of doing this, I also noticed that we would attempt > to check aliasing requirements for record variables (including the > test > for the numerical type) which is not allowed, instead, we should be > producing a linker error as soon as we see any attempt to do location > aliasing on non-numerical variables. For the particular case of > structs, > we were producing a linker error in this case, but only because we > assumed that struct fields use all components in each location, so > any attempt to alias locations consumed by struct fields would > produce > a link error due to component aliasing, which is not accurate of the > actual problem. This patch would make it produce an error for > attempting > to alias a non-numerical variable instead, which is always accurate. > > v2: > - Do not assert if we see invalid numerical types. These come > straight from shader code, so we should produce linker errors if > shaders attempt to do location aliasing on variables that are not > numerical such as records. > - While we are at it, improve error reporting for the case of > numerical type mismatch to include the shader stage. > > v3: > - Allow location aliasing of images and samplers. If we get these > it means bindless support is active and they should be handled > as 64-bit integers (Ilia) > - Make sure we produce link errors for any non-numerical type > for which we attempt location aliasing, not just structs. > --- > src/compiler/glsl/link_varyings.cpp | 64 ++- > -- > 1 file changed, 46 insertions(+), 18 deletions(-) > > diff --git a/src/compiler/glsl/link_varyings.cpp > b/src/compiler/glsl/link_varyings.cpp > index 1a9894baab..e0d757eaaf 100644 > --- a/src/compiler/glsl/link_varyings.cpp > +++ b/src/compiler/glsl/link_varyings.cpp > @@ -405,15 +405,15 @@ compute_variable_location_slot(ir_variable > *var, gl_shader_stage stage) > > struct explicit_location_info { > ir_variable *var; > - unsigned numerical_type; > + int numerical_type; > unsigned interpolation; > bool centroid; > bool sample; > bool patch; > }; > > -static inline unsigned > -get_numerical_type(const glsl_type *type) > +static inline int > +get_numerical_sized_type(const glsl_type *type) > { > /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout > Qualifiers, Page 68, > * (Location aliasing): > @@ -421,10 +421,25 @@ get_numerical_type(const glsl_type *type) > *"Further, when location aliasing, the aliases sharing the > location > * must have the same underlying numerical type (floating- > point or > * integer) > +* > +* Khronos has further clarified that this also requires the > underlying > +* types to have the same width, so we can't put a float and a > double > +* in the same location slot for example. Future versions of the > spec will > +* be corrected to make this clear. > +* > +* Notice that we allow location aliasing for bindless > image/samplers too > +* since these are defined as 64-bit integers. > */ > - if (type->is_float() || type->is_double()) > + if (type->is_float()) > return GLSL_TYPE_FLOAT; > - return GLSL_TYPE_INT; > + else if (type->is_integer()) > + return GLSL_TYPE_INT; > + else if (type->is_double()) > + return GLSL_TYPE_DOUBLE; > + else if (type->is_integer_64() || type->is_sampler() || type- > >is_image()) > + return GLSL_TYPE_INT64; > + > + return -1; /* Not a numerical type */ > } > > static bool > @@ -442,14 +457,17 @@ check_location_aliasing(struct > explicit_location_info explicit_locations[][4], > gl_shader_stage stage) > { > unsigned last_comp; > - if (type->without_array()->is_record()) { > - /* The component qualifier can't be used on structs so just > treat > - * all component slots as used. > + const glsl_type *type_without_array = type->without_array(); > + int numerical_type = > get_numerical_sized_type(type_without_array); > + if (numerical_type == -1) { > + /* The component qualifier can't be used on non-numerical > types so just > + * treat all component slots as used. This will also make it > so that > + * any location aliasing attempt on
[Mesa-dev] [PATCH 11/17] main: Support 1 Mesa format with get for GL_PROGRAM_BINARY_FORMATS
Mesa supports either 0 or 1 formats. If 1 format is supported, it is GL_PROGRAM_BINARY_FORMAT_MESA as defined in the GL_MESA_program_binary_formats extension spec. Signed-off-by: Jordan Justen--- src/mesa/main/get.c | 9 + src/mesa/main/get_hash_params.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index e68a93b10ee..8b28f583018 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -1153,6 +1153,15 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu } } break; + + /* GL_ARB_get_program_binary */ + case GL_PROGRAM_BINARY_FORMATS: + assert(ctx->Const.NumProgramBinaryFormats <= 1); + v->value_int_n.n = MIN2(ctx->Const.NumProgramBinaryFormats, 1); + if (ctx->Const.NumProgramBinaryFormats > 0) { + v->value_int_n.ints[0] = GL_PROGRAM_BINARY_FORMAT_MESA; + } + break; } } diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 8c6193d761f..be0c68e4181 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -325,7 +325,7 @@ descriptor=[ # GL_ARB_get_program_binary / GL_OES_get_program_binary [ "NUM_PROGRAM_BINARY_FORMATS", "CONTEXT_UINT(Const.NumProgramBinaryFormats), NO_EXTRA" ], - [ "PROGRAM_BINARY_FORMATS", "LOC_CUSTOM, TYPE_INVALID, 0, NO_EXTRA" ], + [ "PROGRAM_BINARY_FORMATS", "LOC_CUSTOM, TYPE_INT_N, 0, NO_EXTRA" ], # GL_INTEL_performance_query [ "PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL", "CONST(MAX_PERFQUERY_QUERY_NAME_LENGTH), extra_INTEL_performance_query" ], -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/17] main: Support calling driver for ProgramBinary
Signed-off-by: Jordan Justen--- src/mesa/main/dd.h| 4 src/mesa/main/shaderapi.c | 38 +- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index c20d8b80e1d..b46f2693b83 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -1141,6 +1141,10 @@ struct dd_function_table { struct gl_shader_program *shProg, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, GLvoid *binary); + void (*ProgramBinary)(struct gl_context *ctx, + struct gl_shader_program *shProg, + GLenum binaryFormat, const GLvoid *binary, + GLsizei length); /*@}*/ }; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 28711a67f03..47a51279353 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -2224,9 +2224,6 @@ _mesa_ProgramBinary(GLuint program, GLenum binaryFormat, if (!shProg) return; - (void) binaryFormat; - (void) binary; - /* Section 2.3.1 (Errors) of the OpenGL 4.5 spec says: * * "If a negative number is provided where an argument of type sizei or @@ -2237,20 +2234,27 @@ _mesa_ProgramBinary(GLuint program, GLenum binaryFormat, return; } - /* The ARB_get_program_binary spec says: -* -* " and must be those returned by a previous -* call to GetProgramBinary, and must be the length of the -* program binary as returned by GetProgramBinary or GetProgramiv with -* PROGRAM_BINARY_LENGTH. Loading the program binary will fail, -* setting the LINK_STATUS of to FALSE, if these conditions -* are not met." -* -* Since any value of binaryFormat passed "is not one of those specified as -* allowable for [this] command, an INVALID_ENUM error is generated." -*/ - shProg->data->LinkStatus = linking_failure; - _mesa_error(ctx, GL_INVALID_ENUM, "glProgramBinary"); + assert(ctx->Const.NumProgramBinaryFormats == 0 || + ctx->Driver.ProgramBinary); + if (ctx->Const.NumProgramBinaryFormats == 0 || + binaryFormat != GL_PROGRAM_BINARY_FORMAT_MESA) { + /* The ARB_get_program_binary spec says: + * + * " and must be those returned by a previous + * call to GetProgramBinary, and must be the length of the + * program binary as returned by GetProgramBinary or GetProgramiv with + * PROGRAM_BINARY_LENGTH. Loading the program binary will fail, + * setting the LINK_STATUS of to FALSE, if these conditions + * are not met." + * + * Since any value of binaryFormat passed "is not one of those specified as + * allowable for [this] command, an INVALID_ENUM error is generated." + */ + shProg->data->LinkStatus = linking_failure; + _mesa_error(ctx, GL_INVALID_ENUM, "glProgramBinary"); + } else { + ctx->Driver.ProgramBinary(ctx, shProg, binaryFormat, binary, length); + } } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 13/17] main: Support calling driver for GetProgramBinary
Signed-off-by: Jordan Justen--- src/mesa/main/dd.h| 4 src/mesa/main/shaderapi.c | 17 +++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 91eff55f84d..c20d8b80e1d 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -1137,6 +1137,10 @@ struct dd_function_table { void (*GetProgramBinaryLength)(struct gl_context *ctx, struct gl_shader_program *shProg, GLint *length); + void (*GetProgramBinary)(struct gl_context *ctx, +struct gl_shader_program *shProg, +GLsizei bufSize, GLsizei *length, +GLenum *binaryFormat, GLvoid *binary); /*@}*/ }; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index f12825d2536..28711a67f03 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -2200,12 +2200,17 @@ _mesa_GetProgramBinary(GLuint program, GLsizei bufSize, GLsizei *length, return; } - *length = 0; - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetProgramBinary(driver supports zero binary formats)"); - - (void) binaryFormat; - (void) binary; + assert(ctx->Const.NumProgramBinaryFormats == 0 || + ctx->Driver.GetProgramBinary); + if (ctx->Const.NumProgramBinaryFormats == 0) { + *length = 0; + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetProgramBinary(driver supports zero binary formats)"); + } else { + ctx->Driver.GetProgramBinary(ctx, shProg, bufSize, length, + binaryFormat, binary); + assert(*length == 0 || *binaryFormat == GL_PROGRAM_BINARY_FORMAT_MESA); + } } void GLAPIENTRY -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/17] i965: Add brw_program_serialize_nir
Signed-off-by: Jordan Justen--- src/mesa/drivers/dri/i965/brw_link.cpp | 9 ++--- src/mesa/drivers/dri/i965/brw_program.c | 12 src/mesa/drivers/dri/i965/brw_program.h | 3 +++ 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index 24545d52ecb..878b3240841 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -300,13 +300,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) NIR_PASS_V(prog->nir, nir_lower_samplers, shProg); NIR_PASS_V(prog->nir, nir_lower_atomics, shProg); - if (brw->ctx.Cache) { - struct blob writer; - blob_init(); - nir_serialize(, prog->nir); - prog->driver_cache_blob = ralloc_size(NULL, writer.size); - memcpy(prog->driver_cache_blob, writer.data, writer.size); - prog->driver_cache_blob_size = writer.size; + if (ctx->Cache) { + brw_program_serialize_nir(ctx, prog, (gl_shader_stage)stage); } infos[stage] = >nir->info; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 809766574f8..798b7d24dd6 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -781,6 +781,18 @@ brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo, return next_binding_table_offset; } +void +brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog, + gl_shader_stage stage) +{ + struct blob writer; + blob_init(); + nir_serialize(, prog->nir); + prog->driver_cache_blob = ralloc_size(NULL, writer.size); + memcpy(prog->driver_cache_blob, writer.data, writer.size); + prog->driver_cache_blob_size = writer.size; +} + void brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog, gl_shader_stage stage) diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index bd9b4ad168a..a5e41522841 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -82,6 +82,9 @@ brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo, uint32_t next_binding_table_offset); void +brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog, + gl_shader_stage stage); +void brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog, gl_shader_stage stage); -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/17] main: Allow non-zero NUM_PROGRAM_BINARY_FORMATS
Signed-off-by: Jordan Justen--- src/mesa/main/get_hash_params.py | 2 +- src/mesa/main/mtypes.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index acd5cd1f011..8c6193d761f 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -324,7 +324,7 @@ descriptor=[ [ "SHADER_BINARY_FORMATS", "LOC_CUSTOM, TYPE_INVALID, 0, extra_ARB_ES2_compatibility_api_es2" ], # GL_ARB_get_program_binary / GL_OES_get_program_binary - [ "NUM_PROGRAM_BINARY_FORMATS", "CONST(0), NO_EXTRA" ], + [ "NUM_PROGRAM_BINARY_FORMATS", "CONTEXT_UINT(Const.NumProgramBinaryFormats), NO_EXTRA" ], [ "PROGRAM_BINARY_FORMATS", "LOC_CUSTOM, TYPE_INVALID, 0, NO_EXTRA" ], # GL_INTEL_performance_query diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 023692cc0e1..f64bf0a2ad4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -4002,6 +4002,9 @@ struct gl_constants /** When drivers are OK with mapped buffers during draw and other calls. */ bool AllowMappedBuffersDuringExecution; + + /** GL_ARB_get_program_binary */ + GLuint NumProgramBinaryFormats; }; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/17] i965: Add brw_program_deserialize_nir
Signed-off-by: Jordan Justen--- src/mesa/drivers/dri/i965/brw_disk_cache.c | 31 -- src/mesa/drivers/dri/i965/brw_program.c| 16 +++ src/mesa/drivers/dri/i965/brw_program.h| 4 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c index 853ea98af03..65bb52726eb 100644 --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c @@ -24,7 +24,6 @@ #include "compiler/blob.h" #include "compiler/glsl/ir_uniform.h" #include "compiler/glsl/shader_cache.h" -#include "compiler/nir/nir_serialize.h" #include "main/mtypes.h" #include "util/build_id.h" #include "util/debug.h" @@ -61,27 +60,6 @@ gen_shader_sha1(struct brw_context *brw, struct gl_program *prog, _mesa_sha1_compute(manifest, strlen(manifest), out_sha1); } -static void -restore_serialized_nir_shader(struct brw_context *brw, struct gl_program *prog, - gl_shader_stage stage) -{ - prog->program_written_to_cache = false; - if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { - fprintf(stderr, "falling back to nir %s.\n", - _mesa_shader_stage_to_abbrev(prog->info.stage)); - } - - if (!prog->nir) { - assert(prog->driver_cache_blob && prog->driver_cache_blob_size > 0); - const struct nir_shader_compiler_options *options = - brw->ctx.Const.ShaderCompilerOptions[stage].NirOptions; - struct blob_reader reader; - blob_reader_init(, prog->driver_cache_blob, - prog->driver_cache_blob_size); - prog->nir = nir_deserialize(NULL, options, ); - } -} - static void write_blob_program_data(struct blob *binary, gl_shader_stage stage, const void *program, @@ -298,7 +276,14 @@ brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage) return true; fail: - restore_serialized_nir_shader(brw, prog, stage); + prog->program_written_to_cache = false; + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { + fprintf(stderr, "falling back to nir %s.\n", + _mesa_shader_stage_to_abbrev(prog->info.stage)); + } + + brw_program_deserialize_nir(>ctx, prog, stage); + return false; } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 7607bc38840..39308f306df 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -40,6 +40,7 @@ #include "util/ralloc.h" #include "compiler/glsl/ir.h" #include "compiler/glsl/glsl_to_nir.h" +#include "compiler/nir/nir_serialize.h" #include "brw_program.h" #include "brw_context.h" @@ -779,3 +780,18 @@ brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo, assert(next_binding_table_offset <= BRW_MAX_SURFACES); return next_binding_table_offset; } + +void +brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog, +gl_shader_stage stage) +{ + if (!prog->nir) { + assert(prog->driver_cache_blob && prog->driver_cache_blob_size > 0); + const struct nir_shader_compiler_options *options = + ctx->Const.ShaderCompilerOptions[stage].NirOptions; + struct blob_reader reader; + blob_reader_init(, prog->driver_cache_blob, + prog->driver_cache_blob_size); + prog->nir = nir_deserialize(NULL, options, ); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index 701b8da482e..bd9b4ad168a 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -81,6 +81,10 @@ brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo, struct brw_stage_prog_data *stage_prog_data, uint32_t next_binding_table_offset); +void +brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog, +gl_shader_stage stage); + void brw_stage_prog_data_free(const void *prog_data); -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/17] compiler: Fold shader_cache in with libglsl sources
It appears that we include the shader cache sources into libglsl regardless. The Meson build already does this. Signed-off-by: Jordan Justen--- src/compiler/Android.glsl.mk | 3 +-- src/compiler/Makefile.glsl.am | 3 +-- src/compiler/Makefile.sources | 6 ++ 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/compiler/Android.glsl.mk b/src/compiler/Android.glsl.mk index 0aabafa2673..8533830833f 100644 --- a/src/compiler/Android.glsl.mk +++ b/src/compiler/Android.glsl.mk @@ -35,8 +35,7 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := \ $(LIBGLCPP_FILES) \ - $(LIBGLSL_FILES) \ - $(LIBGLSL_SHADER_CACHE_FILES) + $(LIBGLSL_FILES) LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ diff --git a/src/compiler/Makefile.glsl.am b/src/compiler/Makefile.glsl.am index 179f415fe6f..11ff14b2852 100644 --- a/src/compiler/Makefile.glsl.am +++ b/src/compiler/Makefile.glsl.am @@ -134,8 +134,7 @@ glsl_libglsl_la_LIBADD = \ glsl_libglsl_la_SOURCES = \ $(LIBGLSL_GENERATED_FILES) \ - $(LIBGLSL_FILES)\ - $(LIBGLSL_SHADER_CACHE_FILES) + $(LIBGLSL_FILES) glsl_libstandalone_la_SOURCES = \ $(GLSL_COMPILER_CXX_FILES) diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index abcd8c6641f..f7eb69dc2bc 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -141,13 +141,11 @@ LIBGLSL_FILES = \ glsl/propagate_invariance.cpp \ glsl/s_expression.cpp \ glsl/s_expression.h \ + glsl/shader_cache.cpp \ + glsl/shader_cache.h \ glsl/string_to_uint_map.cpp \ glsl/string_to_uint_map.h -LIBGLSL_SHADER_CACHE_FILES = \ - glsl/shader_cache.cpp \ - glsl/shader_cache.h - # glsl_compiler GLSL_COMPILER_CXX_FILES = \ -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/17] glsl: Split out shader program serialization
This will allow us to use the program serialization to implement ARB_get_program_binary. Signed-off-by: Jordan Justen--- src/compiler/Makefile.sources |2 + src/compiler/glsl/meson.build |2 + src/compiler/glsl/serialize.cpp| 1238 src/compiler/glsl/serialize.h | 50 ++ src/compiler/glsl/shader_cache.cpp | 1185 +- src/compiler/shader_info.h |1 + 6 files changed, 1297 insertions(+), 1181 deletions(-) create mode 100644 src/compiler/glsl/serialize.cpp create mode 100644 src/compiler/glsl/serialize.h diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index f7eb69dc2bc..2d3b8aecad8 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -141,6 +141,8 @@ LIBGLSL_FILES = \ glsl/propagate_invariance.cpp \ glsl/s_expression.cpp \ glsl/s_expression.h \ + glsl/serialize.cpp \ + glsl/serialize.h \ glsl/shader_cache.cpp \ glsl/shader_cache.h \ glsl/string_to_uint_map.cpp \ diff --git a/src/compiler/glsl/meson.build b/src/compiler/glsl/meson.build index aa0e7153f42..a200b9f1458 100644 --- a/src/compiler/glsl/meson.build +++ b/src/compiler/glsl/meson.build @@ -182,6 +182,8 @@ files_libglsl = files( 's_expression.h', 'string_to_uint_map.cpp', 'string_to_uint_map.h', + 'serialize.cpp', + 'serialize.h', 'shader_cache.cpp', 'shader_cache.h', ) diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp new file mode 100644 index 000..b4c9545702e --- /dev/null +++ b/src/compiler/glsl/serialize.cpp @@ -0,0 +1,1238 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file serialize.cpp + * + * GLSL serialization + * + * Supports serializing and deserializing glsl programs using a blob. + */ + +#include "compiler/glsl_types.h" +#include "compiler/shader_info.h" +#include "ir_uniform.h" +#include "main/mtypes.h" +#include "string_to_uint_map.h" + +extern "C" { +#include "main/shaderobj.h" +#include "program/program.h" +} + +static void +write_subroutines(struct blob *metadata, struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_linked_shader *sh = prog->_LinkedShaders[i]; + if (!sh) + continue; + + struct gl_program *glprog = sh->Program; + + blob_write_uint32(metadata, glprog->sh.NumSubroutineUniforms); + blob_write_uint32(metadata, glprog->sh.MaxSubroutineFunctionIndex); + blob_write_uint32(metadata, glprog->sh.NumSubroutineFunctions); + for (unsigned j = 0; j < glprog->sh.NumSubroutineFunctions; j++) { + int num_types = glprog->sh.SubroutineFunctions[j].num_compat_types; + + blob_write_string(metadata, glprog->sh.SubroutineFunctions[j].name); + blob_write_uint32(metadata, glprog->sh.SubroutineFunctions[j].index); + blob_write_uint32(metadata, num_types); + + for (int k = 0; k < num_types; k++) { +encode_type_to_blob(metadata, +glprog->sh.SubroutineFunctions[j].types[k]); + } + } + } +} + +static void +read_subroutines(struct blob_reader *metadata, struct gl_shader_program *prog) +{ + struct gl_subroutine_function *subs; + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_linked_shader *sh = prog->_LinkedShaders[i]; + if (!sh) + continue; + + struct gl_program *glprog = sh->Program; + + glprog->sh.NumSubroutineUniforms = blob_read_uint32(metadata); + glprog->sh.MaxSubroutineFunctionIndex = blob_read_uint32(metadata); + glprog->sh.NumSubroutineFunctions = blob_read_uint32(metadata); + + subs = rzalloc_array(prog, struct gl_subroutine_function, +
[Mesa-dev] [PATCH 07/17] i965: Free serialized nir after deserializing
Signed-off-by: Jordan Justen--- src/mesa/drivers/dri/i965/brw_program.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 39308f306df..809766574f8 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -794,4 +794,10 @@ brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog, prog->driver_cache_blob_size); prog->nir = nir_deserialize(NULL, options, ); } + + if (prog->driver_cache_blob) { + ralloc_free(prog->driver_cache_blob); + prog->driver_cache_blob = NULL; + prog->driver_cache_blob_size = 0; + } } -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 16/17] util: Add Mesa ARB_get_program_binary helper functions
Signed-off-by: Jordan Justen--- src/util/Makefile.sources | 2 + src/util/meson.build | 2 + src/util/program_binary.c | 322 ++ src/util/program_binary.h | 91 + 4 files changed, 417 insertions(+) create mode 100644 src/util/program_binary.c create mode 100644 src/util/program_binary.h diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources index c7f6516a992..d9048bbd182 100644 --- a/src/util/Makefile.sources +++ b/src/util/Makefile.sources @@ -21,6 +21,8 @@ MESA_UTIL_FILES := \ macros.h \ mesa-sha1.c \ mesa-sha1.h \ + program_binary.c \ + program_binary.h \ sha1/sha1.c \ sha1/sha1.h \ ralloc.c \ diff --git a/src/util/meson.build b/src/util/meson.build index c9cb3e861e9..9bc10222a72 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -45,6 +45,8 @@ files_mesa_util = files( 'macros.h', 'mesa-sha1.c', 'mesa-sha1.h', + 'program_binary.c', + 'program_binary.h', 'sha1/sha1.c', 'sha1/sha1.h', 'ralloc.c', diff --git a/src/util/program_binary.c b/src/util/program_binary.c new file mode 100644 index 000..4447dd632d9 --- /dev/null +++ b/src/util/program_binary.c @@ -0,0 +1,322 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (c) 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file program_binary.c + * + * Helper functions for serializing a binary program. + */ + + +#include "main/mtypes.h" +#include "crc32.h" +#include "program_binary.h" +#include "zlib.h" + +/** + * Mesa supports one binary format, but it must differentiate between formats + * produced by different drivers and different Mesa versions. + * + * Mesa uses a uint32_t value to specify an internal format. The only format + * defined has one uint32_t value of 0, followed by 20 bytes specifying a sha1 + * that uniquely identifies the Mesa driver type and version. + */ + +struct program_binary_header { + /* If internal_format is 0, it must be followed by the 20 byte sha1 that +* identifies the Mesa driver and version supported. If we want to support +* something besides a sha1, then a new internal_format value can be added. +*/ + uint32_t internal_format; + uint8_t sha1[20]; + /* Fields following sha1 can be changed since the sha1 will guarantee that +* the binary only works with the same Mesa version. +*/ + uint32_t deflated_size; + uint32_t inflated_size; + uint32_t crc32; +}; + +unsigned +get_program_binary_max_size(unsigned payload_size) +{ + return sizeof(struct program_binary_header) + payload_size; +} + +/* From the zlib docs: + *"If the memory is available, buffers sizes on the order of 128K or 256K + *bytes should be used." + */ +#define BUFSIZE 256 * 1024 + +/** + * Compresses buffer + */ +static size_t +deflate_and_write_to_buf(const void *in_data, size_t in_data_size, + uint8_t *dest, size_t max_dest_size) +{ + unsigned char out[BUFSIZE]; + size_t written = 0; + + /* allocate deflate state */ + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.next_in = (uint8_t *) in_data; + strm.avail_in = in_data_size; + + int ret = deflateInit(, Z_BEST_COMPRESSION); + if (ret != Z_OK) + return 0; + + /* compress until end of in_data */ + size_t compressed_size = 0; + int flush; + do { + int remaining = in_data_size - BUFSIZE; + flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH; + in_data_size -= BUFSIZE; + + /* Run deflate() on input until the output buffer is not full (which + * means there is no more data to deflate). + */ + do { + strm.avail_out = BUFSIZE; + strm.next_out = out; + + ret = deflate(, flush);/* no bad return value */ + assert(ret != Z_STREAM_ERROR);
[Mesa-dev] [PATCH 09/17] i965: Fix memory leak when serializing nir
Signed-off-by: Jordan Justen--- src/mesa/drivers/dri/i965/brw_program.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 798b7d24dd6..f795fc1dbc3 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -791,6 +791,7 @@ brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog, prog->driver_cache_blob = ralloc_size(NULL, writer.size); memcpy(prog->driver_cache_blob, writer.data, writer.size); prog->driver_cache_blob_size = writer.size; + blob_finish(); } void -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 15/17] main: Clear shader program data whenever ProgramBinary is called
The GL_ARB_get_program_binary extension spec says: "If ProgramBinary fails to load a binary, no error is generated, but any information about a previous link or load of that program object is lost." Signed-off-by: Jordan Justen--- src/mesa/main/shaderapi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 47a51279353..03e8488ccee 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -2224,6 +2224,8 @@ _mesa_ProgramBinary(GLuint program, GLenum binaryFormat, if (!shProg) return; + _mesa_clear_shader_program_data(ctx, shProg); + /* Section 2.3.1 (Errors) of the OpenGL 4.5 spec says: * * "If a negative number is provided where an argument of type sizei or -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 17/17] i965: Add ARB_get_program_binary support using nir_serialization
Signed-off-by: Jordan Justen--- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_context.c| 9 ++ src/mesa/drivers/dri/i965/brw_context.h| 16 ++ src/mesa/drivers/dri/i965/brw_program_binary.c | 200 + src/mesa/drivers/dri/i965/meson.build | 1 + 5 files changed, 227 insertions(+) create mode 100644 src/mesa/drivers/dri/i965/brw_program_binary.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 2980cdb3c54..3fba8dc17ef 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -37,6 +37,7 @@ i965_FILES = \ brw_performance_query.c \ brw_program.c \ brw_program.h \ + brw_program_binary.c \ brw_program_cache.c \ brw_primitive_restart.c \ brw_queryobj.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 0ebd4329935..b685e53852b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -329,6 +329,12 @@ brw_init_driver_functions(struct brw_context *brw, if (devinfo->gen >= 6) functions->GetSamplePosition = gen6_get_sample_position; + + /* GL_ARB_get_program_binary */ + brw_program_binary_init(brw->screen->deviceID); + functions->GetProgramBinaryLength = brw_get_program_binary_length; + functions->GetProgramBinary = brw_get_program_binary; + functions->ProgramBinary = brw_program_binary; } static void @@ -697,6 +703,9 @@ brw_initialize_context_constants(struct brw_context *brw) if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) ctx->Const.AllowMappedBuffersDuringExecution = true; + + /* GL_ARB_get_program_binary */ + ctx->Const.NumProgramBinaryFormats = 1; } static void diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8aa0c5ff64c..0c10db0ef34 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1556,6 +1556,22 @@ brw_check_for_reset(struct brw_context *brw); extern void brw_init_compute_functions(struct dd_function_table *functions); +/* brw_program_binary.c */ +extern void +brw_program_binary_init(unsigned device_id); +extern void +brw_get_program_binary_length(struct gl_context *ctx, + struct gl_shader_program *sh_prog, + GLint *length); +extern void +brw_get_program_binary(struct gl_context *ctx, + struct gl_shader_program *sh_prog, + GLsizei bufSize, GLsizei *length, + GLenum *binary_format, GLvoid *binary); +extern void +brw_program_binary(struct gl_context *ctx, struct gl_shader_program *sh_prog, + GLenum binary_format, const GLvoid *binary, GLsizei length); + /*== * Inline conversion functions. These are better-typed than the * macros used previously: diff --git a/src/mesa/drivers/dri/i965/brw_program_binary.c b/src/mesa/drivers/dri/i965/brw_program_binary.c new file mode 100644 index 000..55a2d097b8c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_program_binary.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "compiler/blob.h" +#include "compiler/glsl/serialize.h" +#include "compiler/nir/nir_serialize.h" +#include "main/mtypes.h" +#include "util/build_id.h" +#include "util/debug.h" +#include "util/mesa-sha1.h" +#include "util/program_binary.h" +#include "program/prog_parameter.h" + +#include "brw_context.h" +#include "brw_program.h" + +static uint8_t driver_sha1[20]; + +void
[Mesa-dev] [PATCH 12/17] main: Support calling driver for GL_PROGRAM_BINARY_LENGTH
Signed-off-by: Jordan Justen--- src/mesa/main/dd.h| 12 src/mesa/main/shaderapi.c | 8 +++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index da03b2e8b94..91eff55f84d 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -1126,6 +1126,18 @@ struct dd_function_table { GLuint64 size, int fd); /*@}*/ + + /** +* \name GL_ARB_get_program_binary +*/ + /*@{*/ + /** +* Called to retrieve a binary serialized copy of the current program. +*/ + void (*GetProgramBinaryLength)(struct gl_context *ctx, + struct gl_shader_program *shProg, + GLint *length); + /*@}*/ }; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 72824355838..f12825d2536 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -834,7 +834,13 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, *params = shProg->BinaryRetreivableHint; return; case GL_PROGRAM_BINARY_LENGTH: - *params = 0; + assert(ctx->Const.NumProgramBinaryFormats == 0 || + ctx->Driver.GetProgramBinaryLength); + if (ctx->Const.NumProgramBinaryFormats == 0) { + *params = 0; + } else { + ctx->Driver.GetProgramBinaryLength(ctx, shProg, params); + } return; case GL_ACTIVE_ATOMIC_COUNTER_BUFFERS: if (!ctx->Extensions.ARB_shader_atomic_counters) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/17] main, glsl: Add UniformDataDefaults which stores uniform defaults
The ARB_get_program_binary extension requires that uniform values in a program be restored to their initial value just after linking. This patch saves off the initial values just after linking. When the program is restored by glProgramBinary, we can use this to copy the initial value of uniforms into UniformDataSlots. Signed-off-by: Jordan Justen--- src/compiler/glsl/link_uniform_initializers.cpp | 2 ++ src/compiler/glsl/link_uniforms.cpp | 3 +++ src/compiler/glsl/serialize.cpp | 18 -- src/mesa/main/mtypes.h | 1 + 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/link_uniform_initializers.cpp b/src/compiler/glsl/link_uniform_initializers.cpp index f70d9100e12..2395f5cf695 100644 --- a/src/compiler/glsl/link_uniform_initializers.cpp +++ b/src/compiler/glsl/link_uniform_initializers.cpp @@ -354,5 +354,7 @@ link_set_uniform_initializers(struct gl_shader_program *prog, } } + memcpy(prog->data->UniformDataDefaults, prog->data->UniformDataSlots, + sizeof(union gl_constant_value) * prog->data->NumUniformDataSlots); ralloc_free(mem_ctx); } diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp index 7d141549f55..51e02bcf840 100644 --- a/src/compiler/glsl/link_uniforms.cpp +++ b/src/compiler/glsl/link_uniforms.cpp @@ -1338,6 +1338,9 @@ link_assign_uniform_storage(struct gl_context *ctx, prog->data->NumUniformStorage); data = rzalloc_array(prog->data->UniformStorage, union gl_constant_value, num_data_slots); + prog->data->UniformDataDefaults = + rzalloc_array(prog->data->UniformStorage, + union gl_constant_value, num_data_slots); } else { data = prog->data->UniformDataSlots; } diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp index b4c9545702e..e55f1680ffc 100644 --- a/src/compiler/glsl/serialize.cpp +++ b/src/compiler/glsl/serialize.cpp @@ -449,7 +449,12 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog) unsigned vec_size = prog->data->UniformStorage[i].type->component_slots() * MAX2(prog->data->UniformStorage[i].array_elements, 1); - blob_write_bytes(metadata, prog->data->UniformStorage[i].storage, + unsigned slot = +prog->data->UniformStorage[i].storage - +prog->data->UniformDataSlots; + blob_write_bytes(metadata, >data->UniformDataSlots[slot], + sizeof(union gl_constant_value) * vec_size); + blob_write_bytes(metadata, >data->UniformDataDefaults[slot], sizeof(union gl_constant_value) * vec_size); } } @@ -472,6 +477,9 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog) data = rzalloc_array(uniforms, union gl_constant_value, prog->data->NumUniformDataSlots); prog->data->UniformDataSlots = data; + prog->data->UniformDataDefaults = + rzalloc_array(uniforms, union gl_constant_value, +prog->data->NumUniformDataSlots); prog->UniformHash = new string_to_uint_map; @@ -512,8 +520,14 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog) unsigned vec_size = prog->data->UniformStorage[i].type->component_slots() * MAX2(prog->data->UniformStorage[i].array_elements, 1); + unsigned slot = +prog->data->UniformStorage[i].storage - +prog->data->UniformDataSlots; + blob_copy_bytes(metadata, + (uint8_t *) >data->UniformDataSlots[slot], + sizeof(union gl_constant_value) * vec_size); blob_copy_bytes(metadata, - (uint8_t *) prog->data->UniformStorage[i].storage, + (uint8_t *) >data->UniformDataDefaults[slot], sizeof(union gl_constant_value) * vec_size); assert(vec_size + prog->data->UniformStorage[i].storage <= diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 2acf64eb56d..023692cc0e1 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2871,6 +2871,7 @@ struct gl_shader_program_data /* Shader cache variables used during restore */ unsigned NumUniformDataSlots; union gl_constant_value *UniformDataSlots; + union gl_constant_value *UniformDataDefaults; bool cache_fallback; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/17] docs/specs: Add GL_MESA_program_binary_formats extension spec
Similar idea to Tim's "spec: MESA_program_binary", but simplified and written to support both ARB_get_program_binary and OES_get_program_binary. Signed-off-by: Jordan JustenCc: Ian Romanick Cc: Timothy Arceri --- docs/specs/MESA_program_binary_formats.txt | 59 ++ docs/specs/enums.txt | 3 ++ src/mapi/glapi/registry/gl.xml | 2 +- 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 docs/specs/MESA_program_binary_formats.txt diff --git a/docs/specs/MESA_program_binary_formats.txt b/docs/specs/MESA_program_binary_formats.txt new file mode 100644 index 000..65bd1a1a739 --- /dev/null +++ b/docs/specs/MESA_program_binary_formats.txt @@ -0,0 +1,59 @@ +Name + +MESA_program_binary_formats + +Name Strings + +GL_MESA_program_binary_formats + +Contact + +Jordan Justen (jordan.l.justen 'at' intel.com) + +Status + +Complete. + +Version + +Last Modified Date: October 28, 2017 +Revision: #1 + +Number + +TBD + +Dependencies + +For use with the OpenGL ARB_get_program_binary extension, or the +OpenGL ES OES_get_program_binary extension. + +Overview + +The get_program_binary exensions require a GLenum binaryFormat. +This extension documents that format for use with Mesa. + +New Procedures and Functions + +None. + +New Tokens + +GL_PROGRAM_BINARY_FORMAT_MESA 0x875F + +For ARB_get_program_binary, GL_PROGRAM_BINARY_FORMAT_MESA may be +returned from GetProgramBinary calls in the +parameter and when retrieving the value of PROGRAM_BINARY_FORMATS. + +For OES_get_program_binary, GL_PROGRAM_BINARY_FORMAT_MESA may be +returned from GetProgramBinaryOES calls in the +parameter and when retrieving the value of +PROGRAM_BINARY_FORMATS_OES. + +New State + +None. + +Revision History + +#0110/28/2017Jordan Justen First draft. diff --git a/docs/specs/enums.txt b/docs/specs/enums.txt index 4b0485f3490..8109cc50f97 100644 --- a/docs/specs/enums.txt +++ b/docs/specs/enums.txt @@ -63,6 +63,9 @@ GL_MESAX_texture_stack: GL_TEXTURE_1D_STACK_BINDING_MESAX0x875D GL_TEXTURE_2D_STACK_BINDING_MESAX0x875E +GL_MESA_program_binary_formats: +GL_PROGRAM_BINARY_FORMAT_MESA 0x875F + GL_MESA_program_debug GL_FRAGMENT_PROGRAM_POSITION_MESA 0x8BB0 GL_FRAGMENT_PROGRAM_CALLBACK_MESA 0x8BB1 diff --git a/src/mapi/glapi/registry/gl.xml b/src/mapi/glapi/registry/gl.xml index cbabe11b398..7360414bb35 100644 --- a/src/mapi/glapi/registry/gl.xml +++ b/src/mapi/glapi/registry/gl.xml @@ -5505,7 +5505,7 @@ typedef unsigned int GLhandleARB; - + -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/17] include/GL: Add GL_MESA_program_binary_formats
Signed-off-by: Jordan Justen--- include/GL/gl.h | 8 1 file changed, 8 insertions(+) diff --git a/include/GL/gl.h b/include/GL/gl.h index 5b284802885..6ae8088f6cb 100644 --- a/include/GL/gl.h +++ b/include/GL/gl.h @@ -2101,6 +2101,14 @@ typedef void (APIENTRYP PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC) (GLenum t #endif +#ifndef GL_MESA_program_binary_formats +#define GL_MESA_program_binary_formats 1 + +#define GL_PROGRAM_BINARY_FORMAT_MESA 0x875F + +#endif /* GL_MESA_program_binary_formats */ + + /** ** NOTE! If you add new functions to this file, or update ** glext.h be sure to regenerate the gl_mangle.h file. See comments -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 00/17] i965 ARB_get_program_binary support
git://people.freedesktop.org/~jljusten/mesa i965-get-program-binary-v1 This series adds i965 support for ARB_get_program_binary with greater than 0 supported formats. Today we support this extension, but advertise support for 0 formats. This series allows i965 to advertise support for 1 format. This series defines a common Mesa format for ARB_get_program_binary, along with helper functions to read and write the format. We also define an OpenGL Mesa spec to be used with this binary format. The binary saved can only be reloaded on the exact same Mesa build using the exact same hardware. The i965 implementation saves out a serialize nir represenation of the program. Later we can add support for saving the gen binary program as well. (We will still need the nir program for state based recompiles.) This implementation passes piglit, deqp and glcts functions. It also works with Dota 2, which appears to make use of the extension. Jordan Justen (17): docs/specs: Add GL_MESA_program_binary_formats extension spec include/GL: Add GL_MESA_program_binary_formats compiler: Fold shader_cache in with libglsl sources glsl: Split out shader program serialization main, glsl: Add UniformDataDefaults which stores uniform defaults i965: Add brw_program_deserialize_nir i965: Free serialized nir after deserializing i965: Add brw_program_serialize_nir i965: Fix memory leak when serializing nir main: Allow non-zero NUM_PROGRAM_BINARY_FORMATS main: Support 1 Mesa format with get for GL_PROGRAM_BINARY_FORMATS main: Support calling driver for GL_PROGRAM_BINARY_LENGTH main: Support calling driver for GetProgramBinary main: Support calling driver for ProgramBinary main: Clear shader program data whenever ProgramBinary is called util: Add Mesa ARB_get_program_binary helper functions i965: Add ARB_get_program_binary support using nir_serialization docs/specs/MESA_program_binary_formats.txt | 59 ++ docs/specs/enums.txt|3 + include/GL/gl.h |8 + src/compiler/Android.glsl.mk|3 +- src/compiler/Makefile.glsl.am |3 +- src/compiler/Makefile.sources |8 +- src/compiler/glsl/link_uniform_initializers.cpp |2 + src/compiler/glsl/link_uniforms.cpp |3 + src/compiler/glsl/meson.build |2 + src/compiler/glsl/serialize.cpp | 1252 +++ src/compiler/glsl/serialize.h | 50 + src/compiler/glsl/shader_cache.cpp | 1185 + src/compiler/shader_info.h |1 + src/mapi/glapi/registry/gl.xml |2 +- src/mesa/drivers/dri/i965/Makefile.sources |1 + src/mesa/drivers/dri/i965/brw_context.c |9 + src/mesa/drivers/dri/i965/brw_context.h | 16 + src/mesa/drivers/dri/i965/brw_disk_cache.c | 31 +- src/mesa/drivers/dri/i965/brw_link.cpp |9 +- src/mesa/drivers/dri/i965/brw_program.c | 35 + src/mesa/drivers/dri/i965/brw_program.h |7 + src/mesa/drivers/dri/i965/brw_program_binary.c | 200 src/mesa/drivers/dri/i965/meson.build |1 + src/mesa/main/dd.h | 20 + src/mesa/main/get.c |9 + src/mesa/main/get_hash_params.py|4 +- src/mesa/main/mtypes.h |4 + src/mesa/main/shaderapi.c | 63 +- src/util/Makefile.sources |2 + src/util/meson.build|2 + src/util/program_binary.c | 322 ++ src/util/program_binary.h | 91 ++ 32 files changed, 2162 insertions(+), 1245 deletions(-) create mode 100644 docs/specs/MESA_program_binary_formats.txt create mode 100644 src/compiler/glsl/serialize.cpp create mode 100644 src/compiler/glsl/serialize.h create mode 100644 src/mesa/drivers/dri/i965/brw_program_binary.c create mode 100644 src/util/program_binary.c create mode 100644 src/util/program_binary.h -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: replace GLenum with GLenum16 in common structures
On 11/08/2017 08:16 PM, Marek Olšák wrote: From: Marek OlšákFor lower CPU cache usage. All enums fit within 2 bytes. gl_context = 152400 -> 136824 bytes Wow. vbo_context = 22696 -> 21520 bytes --- src/mesa/drivers/dri/nouveau/nv04_state_frag.c | 4 +- src/mesa/drivers/dri/nouveau/nv10_state_frag.c | 4 +- src/mesa/main/glheader.h | 1 + src/mesa/main/mtypes.h | 210 - src/mesa/vbo/vbo_exec.h| 2 +- src/mesa/vbo/vbo_save.h| 4 +- src/mesa/vbo/vbo_save_draw.c | 2 +- 7 files changed, 114 insertions(+), 113 deletions(-) diff --git a/src/mesa/drivers/dri/nouveau/nv04_state_frag.c b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c index 248a7d2..bfe8eae 100644 --- a/src/mesa/drivers/dri/nouveau/nv04_state_frag.c +++ b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c @@ -42,22 +42,22 @@ NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA0 struct combiner_state { struct gl_context *ctx; int unit; GLboolean alpha; GLboolean premodulate; /* GL state */ GLenum mode; - GLenum *source; - GLenum *operand; + GLenum16 *source; + GLenum16 *operand; GLuint logscale; /* Derived HW state */ uint32_t hw; }; #define __INIT_COMBINER_ALPHA_A GL_TRUE #define __INIT_COMBINER_ALPHA_RGB GL_FALSE /* Initialize a combiner_state struct from the texture unit diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_frag.c b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c index c6e4bb0..42dff08 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_frag.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c @@ -60,22 +60,22 @@ /* spare0_i = A_i * B_i + C_i * D_i */ #define RC_OUT_SUMNV10_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0 struct combiner_state { struct gl_context *ctx; int unit; GLboolean premodulate; /* GL state */ GLenum mode; - GLenum *source; - GLenum *operand; + GLenum16 *source; + GLenum16 *operand; GLuint logscale; /* Derived HW state */ uint64_t in; uint32_t out; }; /* Initialize a combiner_state struct from the texture unit * context. */ #define INIT_COMBINER(chan, ctx, rc, i) do { \ diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h index 3f2a923..3729604 100644 --- a/src/mesa/main/glheader.h +++ b/src/mesa/main/glheader.h @@ -36,20 +36,21 @@ #define GL_GLEXT_PROTOTYPES #include "GL/gl.h" #include "GL/glext.h" #ifdef __cplusplus extern "C" { #endif +typedef unsigned short GLenum16; /* custom Mesa type to save space */ typedef int GLclampx; #ifndef GL_OES_EGL_image typedef void *GLeglImageOES; #endif #ifndef GL_OES_EGL_image_external #define GL_TEXTURE_EXTERNAL_OES 0x8D65 diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index fd5306c..078cf20 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -375,21 +375,21 @@ struct gl_light /** * Light model state. */ struct gl_lightmodel { GLfloat Ambient[4];/**< ambient color */ GLboolean LocalViewer; /**< Local (or infinite) view point? */ GLboolean TwoSide; /**< Two (or one) sided lighting? */ - GLenum ColorControl;/**< either GL_SINGLE_COLOR + GLenum16 ColorControl; /**< either GL_SINGLE_COLOR *or GL_SEPARATE_SPECULAR_COLOR */ }; /** * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT) */ struct gl_accum_attrib { GLfloat ClearColor[4]; /**< Accumulation buffer clear color */ @@ -411,53 +411,53 @@ union gl_color_union /** * Color buffer attribute group (GL_COLOR_BUFFER_BIT). */ struct gl_colorbuffer_attrib { GLuint ClearIndex; /**< Index for glClear */ union gl_color_union ClearColor;/**< Color for glClear, unclamped */ GLuint IndexMask; /**< Color index write mask */ GLubyte ColorMask[MAX_DRAW_BUFFERS][4]; /**< Each flag is 0xff or 0x0 */ - GLenum DrawBuffer[MAX_DRAW_BUFFERS];/**< Which buffer to draw into */ + GLenum DrawBuffer[MAX_DRAW_BUFFERS];/**< Which buffer to draw into */ GLenum16 ? /** * \name alpha testing */ /*@{*/ GLboolean AlphaEnabled;/**< Alpha test enabled flag */ - GLenum AlphaFunc; /**< Alpha test function */ + GLenum16 AlphaFunc; /**< Alpha test function */ GLfloat AlphaRefUnclamped; GLclampf AlphaRef; /**< Alpha reference value */ /*@}*/ /** * \name Blending */ /*@{*/ GLbitfield BlendEnabled; /**< Per-buffer blend enable flags */ /* NOTE: this
[Mesa-dev] [PATCH] r600/query: drop rest of vi workaround code.
From: Dave AirlieThis isn't needed in r600 anymore. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_query.c | 46 ++- src/gallium/drivers/r600/r600_query.h | 4 --- 2 files changed, 13 insertions(+), 37 deletions(-) diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index aa3e36f..0003e2b 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -506,7 +506,6 @@ void r600_query_hw_destroy(struct r600_common_screen *rscreen, } r600_resource_reference(>buffer.buf, NULL); - r600_resource_reference(>workaround_buf, NULL); FREE(rquery); } @@ -932,23 +931,19 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT; - if (query->workaround_buf) { - op = PRED_OP(PREDICATION_OP_BOOL64); - } else { - switch (query->b.type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - op = PRED_OP(PREDICATION_OP_ZPASS); - break; - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - op = PRED_OP(PREDICATION_OP_PRIMCOUNT); - invert = !invert; - break; - default: - assert(0); - return; - } + switch (query->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + op = PRED_OP(PREDICATION_OP_ZPASS); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + op = PRED_OP(PREDICATION_OP_PRIMCOUNT); + invert = !invert; + break; + default: + assert(0); + return; } /* if true then invert, see GL_ARB_conditional_render_inverted */ @@ -957,19 +952,6 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, else op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */ - /* Use the value written by compute shader as a workaround. Note that -* the wait flag does not apply in this predication mode. -* -* The shader outputs the result value to L2. Workarounds only affect VI -* and later, where the CP reads data from L2, so we don't need an -* additional flush. -*/ - if (query->workaround_buf) { - uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset; - emit_set_predicate(ctx, query->workaround_buf, va, op); - return; - } - op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW; /* emit predicate packets for all data blocks */ @@ -1067,8 +1049,6 @@ bool r600_query_hw_begin(struct r600_common_context *rctx, if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES)) r600_query_hw_reset_buffers(rctx, query); - r600_resource_reference(>workaround_buf, NULL); - r600_query_hw_emit_start(rctx, query); if (!query->buffer.buf) return false; diff --git a/src/gallium/drivers/r600/r600_query.h b/src/gallium/drivers/r600/r600_query.h index 1a3c683..acba1a6 100644 --- a/src/gallium/drivers/r600/r600_query.h +++ b/src/gallium/drivers/r600/r600_query.h @@ -193,10 +193,6 @@ struct r600_query_hw { struct list_head list; /* For transform feedback: which stream the query is for */ unsigned stream; - - /* Workaround via compute shader */ - struct r600_resource *workaround_buf; - unsigned workaround_offset; }; bool r600_query_hw_init(struct r600_common_screen *rscreen, -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v4] i965 : optimized bucket index calculation
From: Aravindan MuthukumarReducing Bucket index calculation to O(1). This algorithm calculates the index using matrix method. Matrix arrangement is as below: Assuming PAGE_SIZE is 4096. 1*4096 2*40963*40964*4096 5*4096 6*40967*40968*4096 10*4096 12*4096 14*4096 16*4096 20*4096 24*4096 28*4096 32*4096 ... ... ... ... ... ... ... ... ... ... ... max_cache_size From this matrix its clearly seen that every row follows the below way: ... ... ...n n+(1/4)n n+(1/2)n n+(3/4)n2n Row is calculated as log2(size/PAGE_SIZE) Column is calculated as converting the difference between the elements to fit into power size of two and indexing it. Final Index is (row*4)+(col-1) Tested with Intel Mesa CI. Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20) v4: Review comments on style and code comments implemented (Ian). v3: Review comments implemented (Ian). v2: Review comments implemented (Jason). Signed-off-by: Aravindan Muthukumar Signed-off-by: Kedar Karanje Reviewed-by: Yogesh Marathe --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 47 -- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 17036b5..f21df5a 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -86,6 +86,8 @@ #define memclear(s) memset(, 0, sizeof(s)) +#define PAGE_SIZE 4096 + #define FILE_DEBUG_FLAG DEBUG_BUFMGR static inline int @@ -180,19 +182,44 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) return ALIGN(pitch, tile_width); } +/** + * This function finds the correct bucket fit for the input size. + * The function works with O(1) complexity when the requested size + * was queried instead of iterating the size through all the buckets. + */ static struct bo_cache_bucket * bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { - int i; + /* Calculating the pages and rounding up to the page size. */ + const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + + /* Row Bucket sizesclz((x-1) | 3) RowColumn +*in pages stride size +* 0: 1 2 3 4 -> 30 30 30 304 1 +* 1: 5 6 7 8 -> 29 29 29 294 1 +* 2: 10 12 14 16 -> 28 28 28 288 2 +* 3: 20 24 28 32 -> 27 27 27 27 16 4 +*/ + const unsigned row = 30 - __builtin_clz((pages - 1) | 3); + const unsigned row_max_pages = 4 << row; + + /* The '& ~2' is the special case for row 1. In row 1, max pages / +* 2 is 2, but the previous row maximum is zero (because there is +* no previous row). All row maximum sizes are power of 2, so that +* is the only case where that bit will be set. +*/ + const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2; + int col_size_log2 = row - 1; + col_size_log2 += (col_size_log2 < 0); - for (i = 0; i < bufmgr->num_buckets; i++) { - struct bo_cache_bucket *bucket = >cache_bucket[i]; - if (bucket->size >= size) { - return bucket; - } - } + const unsigned col = (pages - prev_row_max_pages + +((1 << col_size_log2) - 1)) >> col_size_log2; - return NULL; + /* Calculating the index based on the row and column. */ + const unsigned index = (row * 4) + (col - 1); + + return (index < bufmgr->num_buckets) ? + >cache_bucket[index] : NULL; } int @@ -1254,6 +1281,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) list_inithead(>cache_bucket[i].head); bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; + + assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]); + assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]); + assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]); } static void -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/9] st/mesa: add support for hw atomics to glsl->tgsi. (v4)
On 9 November 2017 at 11:54, Dave Airliewrote: > From: Dave Airlie > > This adds support for creating the hw atomic tgsi from > the glsl codepaths. > > v2: drop the atomic index and move to backend. > v3: drop buffer decls. (Marek) > v4: fix off by one (Gert) Found a bug in my fix for this one, https://cgit.freedesktop.org/~airlied/mesa/log/?h=r600-atomic-submit has the correct version, Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation
> On 11/06/2017 08:30 PM, aravindan.muthuku...@intel.com wrote: > > From: Aravindan Muthukumar> > > > Now the complexity has been reduced to O(1) > > > > Algorithm calculates the index using matrix method. > > Matrix arrangement is as below: > > Assuming PAGE_SIZE is 4096. > > > > 1*4096 2*40963*40964*4096 > > 5*4096 6*40967*40968*4096 > > 10*4096 12*4096 14*4096 16*4096 > > 20*4096 24*4096 28*4096 32*4096 > >... ... ... ... > >... ... ... ... > >... ... ... max_cache_size > > > > From this matrix its clearly seen that every row follows the below > > way: > > ... ... ...n > > n+(1/4)n n+(1/2)n n+(3/4)n2n > > > > Row is calculated as log2(size/PAGE_SIZE) Column is calculated as > > converting the difference between the elements to fit into power size > > of two and indexing it. > > > > Final Index is (row*4)+(col-1) > > > > Tested with Intel Mesa CI. > > > > Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% > > (n=20) > > > > v3: review comments implemented (Ian). > > v2: review comments implemented (Jason). > > > > Signed-off-by: Aravindan Muthukumar > > Signed-off-by: Kedar Karanje > > Reviewed-by: Yogesh Marathe > > --- > > src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 > > +++--- > > 1 file changed, 30 insertions(+), 8 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > > index 17036b5..9a423da 100644 > > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > > @@ -86,6 +86,8 @@ > > > > #define memclear(s) memset(, 0, sizeof(s)) > > > > +#define PAGE_SIZE 4096 > > + > > #define FILE_DEBUG_FLAG DEBUG_BUFMGR > > > > static inline int > > @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t > pitch, uint32_t tiling) > > return ALIGN(pitch, tile_width); > > } > > > > +/* > > + * This function finds the correct bucket fit for the input size. > > + * The function works with O(1) complexity when the requested size > > + * was queried instead of iterating the size through all the buckets. > > + */ > > static struct bo_cache_bucket * > > bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { > > - int i; > > + /* Calculating the pages and rounding up to the page size. */ > > + const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; > > > > - for (i = 0; i < bufmgr->num_buckets; i++) { > > - struct bo_cache_bucket *bucket = >cache_bucket[i]; > > - if (bucket->size >= size) { > > - return bucket; > > - } > > - } > > + /* Finding the row number based on the calculated pages. */ > > + const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3); > > > > - return NULL; > > Why did you make random (and incorrect) style changes and delete > (useful) comments from the code I sent? > > > > Thanks Ian. I added comments based on my understanding and I get the > > > point I'll push v4 with your comments. > > + const unsigned int row_max_pages = 4 << rows; > > + const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2; > > + > > + /* Finding the column number using column interval. */ > > + int col_size_log2 = rows - 1; > > + col_size_log2 += (col_size_log2 < 0); > > + > > + const unsigned int col = ( (pages - prev_row_max_pages + > > +( (1 << col_size_log2) - 1) ) >> > > + col_size_log2 ); > > + > > + /* Calculating the index based on the row and column. */ > > + const unsigned int index = (rows * 4) + (col - 1); > > + > > + return (index < bufmgr->num_buckets) ? > > + >cache_bucket[index] : NULL; > > } > > > > int > > @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) > > list_inithead(>cache_bucket[i].head); > > bufmgr->cache_bucket[i].size = size; > > bufmgr->num_buckets++; > > + > > + assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]); > > + assert(bucket_for_size(bufmgr, size - 2048) == > > >cache_bucket[i]); > > + assert(bucket_for_size(bufmgr, size + 1) != > > + >cache_bucket[i]); > > } > > > > static void > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/4] glsl: s/unsigned/glsl_base_type/ in glsl type code (v2)
On 11/08/2017 08:12 PM, Brian Paul wrote: On 11/08/2017 06:28 PM, Ian Romanick wrote: Any thoughts about my data using __attribute__((__packed__))? Sorry, I didn't have time to dig into it. I took a look this evening. I think the ENUM_8BIT idea will work for GCC and MSVC but only for C++ sources. MSVC doesn't like the sized enum syntax in C compilation units (unless there's some compiler flag I haven't found yet). So, we could use it in the GLSL compiler, but not over in the gallium headers. Does that matter to you? Could I address this issue in a follow-on after the current series? FWIW: here's what it would basically look like: #ifndef __cplusplus #error This only works with C++ #endif #if defined(_MSC_VER) #define ENUM_8BIT(NAME) enum NAME : unsigned char #elif defined(__GNUC__) #define ENUM_8BIT(NAME) enum __attribute__((__packed__)) NAME #else #define ENUM_8BIT(NAME) enum NAME #endif ENUM_8BIT(glsl_base_type) { GLSL_TYPE_UINT = 0, GLSL_TYPE_INT, [...] }; -Brian -Brian On 11/07/2017 04:07 PM, Brian Paul wrote: Declare glsl_type::sampled_type as glsl_base_type as we do for the base_type field. And make base_type a bitfield to save a few bytes. Update glsl_type constructor to take glsl_base_type instead of unsigned and pass GLSL_TYPE_VOID instead of zero. No Piglit regressions with llvmpipe. v2: - Declare both base_type and sampled_type as 8-bit fields - Use the new ASSERT_BITFIELD_SIZE() macro. --- src/compiler/glsl_types.cpp | 30 +++--- src/compiler/glsl_types.h | 28 +--- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 704b63c..107a81f 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -50,9 +50,9 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, unsigned vector_elements, unsigned matrix_columns, const char *name) : gl_type(gl_type), - base_type(base_type), + base_type(base_type), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampled_type(0), interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), vector_elements(vector_elements), matrix_columns(matrix_columns), length(0) { @@ -79,11 +79,11 @@ glsl_type::glsl_type(GLenum gl_type, glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, enum glsl_sampler_dim dim, bool shadow, bool array, - unsigned type, const char *name) : + glsl_base_type type, const char *name) : gl_type(gl_type), - base_type(base_type), + base_type(base_type), sampled_type(type), sampler_dimensionality(dim), sampler_shadow(shadow), - sampler_array(array), sampled_type(type), interface_packing(0), + sampler_array(array), interface_packing(0), interface_row_major(0), length(0) { mtx_lock(_type::mem_mutex); @@ -102,9 +102,9 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, const char *name) : gl_type(0), - base_type(GLSL_TYPE_STRUCT), + base_type(GLSL_TYPE_STRUCT), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampled_type(0), interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), vector_elements(0), matrix_columns(0), length(num_fields) { @@ -131,9 +131,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, enum glsl_interface_packing packing, bool row_major, const char *name) : gl_type(0), - base_type(GLSL_TYPE_INTERFACE), + base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampled_type(0), interface_packing((unsigned) packing), + interface_packing((unsigned) packing), interface_row_major((unsigned) row_major), vector_elements(0), matrix_columns(0), length(num_fields) @@ -159,9 +159,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, glsl_type::glsl_type(const glsl_type *return_type, const glsl_function_param *params, unsigned num_params) : gl_type(0), - base_type(GLSL_TYPE_FUNCTION), + base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampled_type(0), interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), vector_elements(0), matrix_columns(0), length(num_params) { @@ -191,9 +191,9 @@ glsl_type::glsl_type(const glsl_type *return_type, glsl_type::glsl_type(const char *subroutine_name) : gl_type(0),
[Mesa-dev] [PATCH 7/6] radeonsi: don't call r600_can_dma_copy_buffer for DISCARD_RANGE
From: Marek Olšákwe don't use dma_data in this codepath. --- src/gallium/drivers/radeon/r600_buffer_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index cdcd37b..2e0f71a 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -426,22 +426,21 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, /* At this point, the buffer is always idle. */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } else { /* Fall back to a temporary buffer. */ usage |= PIPE_TRANSFER_DISCARD_RANGE; } } if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | -PIPE_TRANSFER_PERSISTENT)) && - r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) || +PIPE_TRANSFER_PERSISTENT))) || (rbuffer->flags & RADEON_FLAG_SPARSE))) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rbuffer->flags & RADEON_FLAG_SPARSE || force_discard_range || si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] radeonsi: remove has_cp_dma, has_streamout flags
From: Marek Olšák--- src/gallium/drivers/radeon/r600_buffer_common.c | 5 + src/gallium/drivers/radeon/r600_pipe_common.h | 2 -- src/gallium/drivers/radeonsi/si_pipe.c | 3 --- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index f0cfd09..cdcd37b 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -347,24 +347,21 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx, transfer->staging = staging; *ptransfer = >b.b; return data; } static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, unsigned dstx, unsigned srcx, unsigned size) { bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); - return rctx->screen->has_cp_dma || - (dword_aligned && (rctx->dma.cs || - rctx->screen->has_streamout)); - + return dword_aligned && rctx->dma.cs; } static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 61560ac..2ac 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -384,22 +384,20 @@ struct r600_memory_object { uint32_toffset; }; struct r600_common_screen { struct pipe_screen b; struct radeon_winsys*ws; enum radeon_family family; enum chip_class chip_class; struct radeon_info info; uint64_tdebug_flags; - boolhas_cp_dma; - boolhas_streamout; boolhas_rbplus; /* if RB+ registers exist */ boolrbplus_allowed; /* if RB+ is allowed */ struct disk_cache *disk_shader_cache; struct slab_parent_pool pool_transfers; /* Texture filter settings. */ int force_aniso; /* -1 = disabled */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e84de36..55760fa 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1100,23 +1100,20 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->dfsm_allowed = sscreen->dpbb_allowed && !(sscreen->b.debug_flags & DBG(NO_DFSM)); } /* While it would be nice not to have this flag, we are constrained * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ sscreen->llvm_has_working_vgpr_indexing = sscreen->b.chip_class <= VI; - sscreen->b.has_cp_dma = true; - sscreen->b.has_streamout = true; - /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. */ if (sscreen->b.family == CHIP_STONEY || sscreen->b.chip_class >= GFX9) { sscreen->b.has_rbplus = true; sscreen->b.rbplus_allowed = !(sscreen->b.debug_flags & DBG(NO_RB_PLUS)) && (sscreen->b.family == CHIP_STONEY || -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] radeonsi: pack r600_surface better
From: Marek Olšák160 -> 136 bytes --- src/gallium/drivers/radeon/r600_pipe_common.h | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 6b0a743..61560ac 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -293,41 +293,41 @@ struct r600_texture { */ unsignedps_draw_ratio; /* The number of clears since the last DCC usage analysis. */ unsignednum_slow_clears; }; struct r600_surface { struct pipe_surface base; /* These can vary with block-compressed textures. */ - unsigned width0; - unsigned height0; + uint16_t width0; + uint16_t height0; - bool color_initialized; - bool depth_initialized; + bool color_initialized:1; + bool depth_initialized:1; /* Misc. color flags. */ - bool color_is_int8; - bool color_is_int10; - bool dcc_incompatible; + bool color_is_int8:1; + bool color_is_int10:1; + bool dcc_incompatible:1; /* Color registers. */ unsigned cb_color_info; unsigned cb_color_view; unsigned cb_color_attrib; unsigned cb_color_attrib2; /* GFX9 and later */ unsigned cb_dcc_control;/* VI and later */ - unsigned spi_shader_col_format; /* no blending, no alpha-to-coverage. */ - unsigned spi_shader_col_format_alpha; /* alpha-to-coverage */ - unsigned spi_shader_col_format_blend; /* blending without alpha. */ - unsigned spi_shader_col_format_blend_alpha; /* blending with alpha. */ + unsigned spi_shader_col_format:8; /* no blending, no alpha-to-coverage. */ + unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */ + unsigned spi_shader_col_format_blend:8; /* blending without alpha. */ + unsigned spi_shader_col_format_blend_alpha:8; /* blending with alpha. */ /* DB registers. */ uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE */ uint64_t db_stencil_base; uint64_t db_htile_data_base; unsigned db_depth_info; unsigned db_z_info; unsigned db_z_info2;/* GFX9+ */ unsigned db_depth_view; unsigned db_depth_size; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] radeonsi: pack r600_texture better
From: Marek Olšák1752 -> 1736 bytes --- src/gallium/drivers/radeon/r600_pipe_common.h | 53 +-- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 43b11262..6b0a743 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -226,82 +226,81 @@ struct r600_cmask_info { uint64_t offset; uint64_t size; unsigned alignment; unsigned slice_tile_max; uint64_t base_address_reg; }; struct r600_texture { struct r600_resourceresource; + struct radeon_surf surface; uint64_tsize; - unsignednum_level0_transfers; - enum pipe_formatdb_render_format; - boolis_depth; - booldb_compatible; - boolcan_sample_z; - boolcan_sample_s; - unsigneddirty_level_mask; /* each bit says if that mipmap is compressed */ - unsignedstencil_dirty_level_mask; /* each bit says if that mipmap is compressed */ struct r600_texture *flushed_depth_texture; - struct radeon_surf surface; /* Colorbuffer compression and fast clear. */ struct r600_fmask_info fmask; struct r600_cmask_info cmask; struct r600_resource*cmask_buffer; uint64_tdcc_offset; /* 0 = disabled */ unsignedcb_color_info; /* fast clear enable bit */ unsignedcolor_clear_value[2]; unsignedlast_msaa_resolve_target_micro_mode; + unsignednum_level0_transfers; /* Depth buffer compression and fast clear. */ uint64_thtile_offset; - booltc_compatible_htile; - booldepth_cleared; /* if it was cleared at least once */ float depth_clear_value; - boolstencil_cleared; /* if it was cleared at least once */ + uint16_tdirty_level_mask; /* each bit says if that mipmap is compressed */ + uint16_tstencil_dirty_level_mask; /* each bit says if that mipmap is compressed */ + enum pipe_formatdb_render_format:16; uint8_t stencil_clear_value; - boolupgraded_depth; /* upgraded from unorm to Z32_FLOAT */ + booltc_compatible_htile:1; + booldepth_cleared:1; /* if it was cleared at least once */ + boolstencil_cleared:1; /* if it was cleared at least once */ + boolupgraded_depth:1; /* upgraded from unorm to Z32_FLOAT */ + boolis_depth:1; + booldb_compatible:1; + boolcan_sample_z:1; + boolcan_sample_s:1; + /* We need to track DCC dirtiness, because st/dri usually calls +* flush_resource twice per frame (not a bug) and we don't wanna +* decompress DCC twice. Also, the dirty tracking must be done even +* if DCC isn't used, because it's required by the DCC usage analysis +* for a possible future enablement. +*/ + boolseparate_dcc_dirty:1; + /* Statistics gathering for the DCC enablement heuristic. */ + booldcc_gather_statistics:1; + /* Counter that should be non-zero if the texture is bound to a +* framebuffer. Implemented in radeonsi only. +*/ + unsignedframebuffers_bound; /* Whether the texture is a displayable back buffer and needs DCC * decompression, which is expensive. Therefore, it's enabled only * if statistics suggest that it will pay off and it's allocated * separately. It can't be bound as a sampler by apps. Limited to * target == 2D and last_level == 0. If enabled, dcc_offset contains * the absolute GPUVM address, not the relative one. */ struct r600_resource*dcc_separate_buffer; /* When DCC is temporarily disabled, the separate buffer is here. */ struct r600_resource*last_dcc_separate_buffer; - /* We need to track DCC dirtiness, because st/dri usually calls -* flush_resource twice per frame (not
[Mesa-dev] [PATCH] mesa: replace GLenum with GLenum16 in common structures
From: Marek OlšákFor lower CPU cache usage. All enums fit within 2 bytes. gl_context = 152400 -> 136824 bytes vbo_context = 22696 -> 21520 bytes --- src/mesa/drivers/dri/nouveau/nv04_state_frag.c | 4 +- src/mesa/drivers/dri/nouveau/nv10_state_frag.c | 4 +- src/mesa/main/glheader.h | 1 + src/mesa/main/mtypes.h | 210 - src/mesa/vbo/vbo_exec.h| 2 +- src/mesa/vbo/vbo_save.h| 4 +- src/mesa/vbo/vbo_save_draw.c | 2 +- 7 files changed, 114 insertions(+), 113 deletions(-) diff --git a/src/mesa/drivers/dri/nouveau/nv04_state_frag.c b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c index 248a7d2..bfe8eae 100644 --- a/src/mesa/drivers/dri/nouveau/nv04_state_frag.c +++ b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c @@ -42,22 +42,22 @@ NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA0 struct combiner_state { struct gl_context *ctx; int unit; GLboolean alpha; GLboolean premodulate; /* GL state */ GLenum mode; - GLenum *source; - GLenum *operand; + GLenum16 *source; + GLenum16 *operand; GLuint logscale; /* Derived HW state */ uint32_t hw; }; #define __INIT_COMBINER_ALPHA_A GL_TRUE #define __INIT_COMBINER_ALPHA_RGB GL_FALSE /* Initialize a combiner_state struct from the texture unit diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_frag.c b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c index c6e4bb0..42dff08 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_frag.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c @@ -60,22 +60,22 @@ /* spare0_i = A_i * B_i + C_i * D_i */ #define RC_OUT_SUM NV10_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0 struct combiner_state { struct gl_context *ctx; int unit; GLboolean premodulate; /* GL state */ GLenum mode; - GLenum *source; - GLenum *operand; + GLenum16 *source; + GLenum16 *operand; GLuint logscale; /* Derived HW state */ uint64_t in; uint32_t out; }; /* Initialize a combiner_state struct from the texture unit * context. */ #define INIT_COMBINER(chan, ctx, rc, i) do { \ diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h index 3f2a923..3729604 100644 --- a/src/mesa/main/glheader.h +++ b/src/mesa/main/glheader.h @@ -36,20 +36,21 @@ #define GL_GLEXT_PROTOTYPES #include "GL/gl.h" #include "GL/glext.h" #ifdef __cplusplus extern "C" { #endif +typedef unsigned short GLenum16; /* custom Mesa type to save space */ typedef int GLclampx; #ifndef GL_OES_EGL_image typedef void *GLeglImageOES; #endif #ifndef GL_OES_EGL_image_external #define GL_TEXTURE_EXTERNAL_OES 0x8D65 diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index fd5306c..078cf20 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -375,21 +375,21 @@ struct gl_light /** * Light model state. */ struct gl_lightmodel { GLfloat Ambient[4]; /**< ambient color */ GLboolean LocalViewer; /**< Local (or infinite) view point? */ GLboolean TwoSide; /**< Two (or one) sided lighting? */ - GLenum ColorControl;/**< either GL_SINGLE_COLOR + GLenum16 ColorControl; /**< either GL_SINGLE_COLOR *or GL_SEPARATE_SPECULAR_COLOR */ }; /** * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT) */ struct gl_accum_attrib { GLfloat ClearColor[4]; /**< Accumulation buffer clear color */ @@ -411,53 +411,53 @@ union gl_color_union /** * Color buffer attribute group (GL_COLOR_BUFFER_BIT). */ struct gl_colorbuffer_attrib { GLuint ClearIndex; /**< Index for glClear */ union gl_color_union ClearColor;/**< Color for glClear, unclamped */ GLuint IndexMask; /**< Color index write mask */ GLubyte ColorMask[MAX_DRAW_BUFFERS][4]; /**< Each flag is 0xff or 0x0 */ - GLenum DrawBuffer[MAX_DRAW_BUFFERS];/**< Which buffer to draw into */ + GLenum DrawBuffer[MAX_DRAW_BUFFERS];/**< Which buffer to draw into */ /** * \name alpha testing */ /*@{*/ GLboolean AlphaEnabled; /**< Alpha test enabled flag */ - GLenum AlphaFunc; /**< Alpha test function */ + GLenum16 AlphaFunc; /**< Alpha test function */ GLfloat AlphaRefUnclamped; GLclampf AlphaRef; /**< Alpha reference value */ /*@}*/ /** * \name Blending */ /*@{*/ GLbitfield BlendEnabled;/**< Per-buffer blend enable flags */ /* NOTE: this does _not_ depend on fragment clamping or any other clamping * control, only on the fixed-pointness of the
[Mesa-dev] [PATCH 3/6] radeonsi: clean up r600_surface
From: Marek Olšák216 -> 160 bytes --- src/gallium/drivers/radeon/r600_pipe_common.h | 37 --- src/gallium/drivers/radeon/r600_texture.c | 3 --- 2 files changed, 11 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 48501a1..43b11262 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -301,63 +301,48 @@ struct r600_surface { struct pipe_surface base; /* These can vary with block-compressed textures. */ unsigned width0; unsigned height0; bool color_initialized; bool depth_initialized; /* Misc. color flags. */ - bool alphatest_bypass; - bool export_16bpc; bool color_is_int8; bool color_is_int10; bool dcc_incompatible; /* Color registers. */ unsigned cb_color_info; - unsigned cb_color_base; unsigned cb_color_view; - unsigned cb_color_size; /* R600 only */ - unsigned cb_color_dim; /* EG only */ - unsigned cb_color_pitch;/* EG and later */ - unsigned cb_color_slice;/* EG and later */ - unsigned cb_color_attrib; /* EG and later */ + unsigned cb_color_attrib; unsigned cb_color_attrib2; /* GFX9 and later */ unsigned cb_dcc_control;/* VI and later */ - unsigned cb_color_fmask;/* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ - unsigned cb_color_fmask_slice; /* EG and later */ - unsigned cb_color_cmask;/* CB_COLORn_TILE (r600 only) */ - unsigned cb_color_mask; /* R600 only */ - unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */ - unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */ - unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */ - unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */ - struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ - struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */ + unsigned spi_shader_col_format; /* no blending, no alpha-to-coverage. */ + unsigned spi_shader_col_format_alpha; /* alpha-to-coverage */ + unsigned spi_shader_col_format_blend; /* blending without alpha. */ + unsigned spi_shader_col_format_blend_alpha; /* blending with alpha. */ /* DB registers. */ - uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */ - uint64_t db_stencil_base; /* EG and later */ + uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE */ + uint64_t db_stencil_base; uint64_t db_htile_data_base; - unsigned db_depth_info; /* R600 only, then SI and later */ - unsigned db_z_info; /* EG and later */ + unsigned db_depth_info; + unsigned db_z_info; unsigned db_z_info2;/* GFX9+ */ unsigned db_depth_view; unsigned db_depth_size; - unsigned db_depth_slice;/* EG and later */ - unsigned db_stencil_info; /* EG and later */ + unsigned db_depth_slice; + unsigned db_stencil_info; unsigned db_stencil_info2; /* GFX9+ */ - unsigned db_prefetch_limit; /* R600 only */ unsigned db_htile_surface; - unsigned db_preload_control;/* EG and later */ }; struct r600_mmio_counter { unsigned busy; unsigned idle; }; union r600_mmio_counters { struct { /* For global GPU load including SDMA. */ diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index a1b6ed4..8c33923 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1966,23 +1966,20 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, } return si_create_surface_custom(pipe, tex, templ, width0, height0, width, height); } static void r600_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surface) { - struct r600_surface *surf = (struct r600_surface*)surface; - r600_resource_reference(>cb_buffer_fmask, NULL); - r600_resource_reference(>cb_buffer_cmask, NULL); pipe_resource_reference(>texture, NULL); FREE(surface); } static void r600_clear_texture(struct pipe_context *pipe, struct pipe_resource *tex, unsigned level, const struct pipe_box *box,
[Mesa-dev] [PATCH 1/6] radeonsi: remove DBG_NO_DISCARD_RANGE
From: Marek Olšák--- src/gallium/drivers/radeon/r600_buffer_common.c | 2 -- src/gallium/drivers/radeon/r600_pipe_common.c | 2 -- src/gallium/drivers/radeon/r600_pipe_common.h | 1 - 3 files changed, 5 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 519e52e..f0cfd09 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -361,21 +361,20 @@ static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, } static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; struct r600_resource *rbuffer = r600_resource(resource); uint8_t *data; assert(box->x + box->width <= resource->width0); /* From GL_AMD_pinned_memory issues: * * 4) Is glMapBuffer on a shared buffer guaranteed to return the *same system address which was specified at creation time? * @@ -429,21 +428,20 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, if (r600_invalidate_buffer(rctx, rbuffer)) { /* At this point, the buffer is always idle. */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } else { /* Fall back to a temporary buffer. */ usage |= PIPE_TRANSFER_DISCARD_RANGE; } } if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && - !(rscreen->debug_flags & DBG(NO_DISCARD_RANGE)) && ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_PERSISTENT)) && r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) || (rbuffer->flags & RADEON_FLAG_SPARSE))) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rbuffer->flags & RADEON_FLAG_SPARSE || force_discard_range || diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index aa72187..e0d50d0 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -821,22 +821,20 @@ static const struct debug_named_value common_debug_options[] = { { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." }, { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." }, { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." }, { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." }, { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." }, /* features */ { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" }, { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" }, - /* GL uses the word INVALIDATE, gallium uses the word DISCARD */ - { "noinvalrange", DBG(NO_DISCARD_RANGE), "Disable handling of INVALIDATE_RANGE map flags" }, { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" }, { "notiling", DBG(NO_TILING), "Disable tiling" }, { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." }, { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." }, { "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader creation." }, { "nowc", DBG(NO_WC), "Disable GTT write combining" }, { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." }, { "nodcc", DBG(NO_DCC), "Disable DCC." }, { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." }, { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." }, diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index f803ee4..47306c6 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -94,21 +94,20 @@ enum { /* Information logging options: */ DBG_INFO, DBG_TEX, DBG_COMPUTE, DBG_VM, /* Driver options: */ DBG_FORCE_DMA, DBG_NO_ASYNC_DMA, - DBG_NO_DISCARD_RANGE, DBG_NO_WC, DBG_CHECK_VM,
[Mesa-dev] [PATCH 2/6] radeonsi: remove r600_texture::non_disp_tiling
From: Marek Olšák--- src/gallium/drivers/radeon/r600_pipe_common.h | 2 -- src/gallium/drivers/radeon/r600_texture.c | 7 --- 2 files changed, 9 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 47306c6..48501a1 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -256,22 +256,20 @@ struct r600_texture { /* Depth buffer compression and fast clear. */ uint64_thtile_offset; booltc_compatible_htile; booldepth_cleared; /* if it was cleared at least once */ float depth_clear_value; boolstencil_cleared; /* if it was cleared at least once */ uint8_t stencil_clear_value; boolupgraded_depth; /* upgraded from unorm to Z32_FLOAT */ - boolnon_disp_tiling; /* R600-Cayman only */ - /* Whether the texture is a displayable back buffer and needs DCC * decompression, which is expensive. Therefore, it's enabled only * if statistics suggest that it will pay off and it's allocated * separately. It can't be bound as a sampler by apps. Limited to * target == 2D and last_level == 0. If enabled, dcc_offset contains * the absolute GPUVM address, not the relative one. */ struct r600_resource*dcc_separate_buffer; /* When DCC is temporarily disabled, the separate buffer is here. */ struct r600_resource*last_dcc_separate_buffer; diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 0800c0f..a1b6ed4 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -546,21 +546,20 @@ static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, rtex->can_sample_s = new_tex->can_sample_s; rtex->surface = new_tex->surface; rtex->fmask = new_tex->fmask; rtex->cmask = new_tex->cmask; rtex->cb_color_info = new_tex->cb_color_info; rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode; rtex->htile_offset = new_tex->htile_offset; rtex->tc_compatible_htile = new_tex->tc_compatible_htile; rtex->depth_cleared = new_tex->depth_cleared; rtex->stencil_cleared = new_tex->stencil_cleared; - rtex->non_disp_tiling = new_tex->non_disp_tiling; rtex->dcc_gather_statistics = new_tex->dcc_gather_statistics; rtex->framebuffers_bound = new_tex->framebuffers_bound; if (new_bind_flag == PIPE_BIND_LINEAR) { assert(!rtex->htile_offset); assert(!rtex->cmask.size); assert(!rtex->fmask.size); assert(!rtex->dcc_offset); assert(!rtex->is_depth); } @@ -1148,24 +1147,20 @@ r600_texture_create_object(struct pipe_screen *screen, rtex->db_render_format = base->format; else { rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT; rtex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT && base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT; } } else { rtex->db_render_format = base->format; } - /* Tiled depth textures utilize the non-displayable tile order. -* This must be done after r600_setup_surface. -* Applies to R600-Cayman. */ - rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D; /* Applies to GCN. */ rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode; /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers * between frames, so the only thing that can enable separate DCC * with DRI2 is multiple slow clears within a frame. */ rtex->ps_draw_ratio = 0; if (rtex->is_depth) { @@ -1484,22 +1479,20 @@ bool si_init_flushed_depth_texture(struct pipe_context *ctx, resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH; if (staging) resource.flags |= R600_RESOURCE_FLAG_TRANSFER; *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, ); if (*flushed_depth_texture == NULL) { R600_ERR("failed to create temporary texture to hold flushed depth\n"); return false; } - - (*flushed_depth_texture)->non_disp_tiling = false; return true; } /** * Initialize the
[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)
https://bugs.freedesktop.org/show_bug.cgi?id=103586 --- Comment #10 from Dave Gilbert--- I believe I'm still seeing this: dg@hath:~/ocl2$ clinfo Number of platforms 1 Platform Name Clover Platform Vendor Mesa Platform VersionOpenCL 1.1 Mesa 17.4.0-devel (git-a16dc04ad5) dg@hath:~/ocl2$ echo $LD_LIBRARY_PATH /home/dg/mesa/try/lib: so I *think* it's using my build. and I believe I'm still seeing it. Is my test valid or do I really need that finish? -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/4] glsl: s/unsigned/glsl_base_type/ in glsl type code (v2)
On 11/08/2017 06:28 PM, Ian Romanick wrote: Any thoughts about my data using __attribute__((__packed__))? Sorry, I didn't have time to dig into it. I took a look this evening. I think the ENUM_8BIT idea will work for GCC and MSVC but only for C++ sources. MSVC doesn't like the sized enum syntax in C compilation units (unless there's some compiler flag I haven't found yet). So, we could use it in the GLSL compiler, but not over in the gallium headers. Does that matter to you? Could I address this issue in a follow-on after the current series? -Brian On 11/07/2017 04:07 PM, Brian Paul wrote: Declare glsl_type::sampled_type as glsl_base_type as we do for the base_type field. And make base_type a bitfield to save a few bytes. Update glsl_type constructor to take glsl_base_type instead of unsigned and pass GLSL_TYPE_VOID instead of zero. No Piglit regressions with llvmpipe. v2: - Declare both base_type and sampled_type as 8-bit fields - Use the new ASSERT_BITFIELD_SIZE() macro. --- src/compiler/glsl_types.cpp | 30 +++--- src/compiler/glsl_types.h | 28 +--- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 704b63c..107a81f 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -50,9 +50,9 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, unsigned vector_elements, unsigned matrix_columns, const char *name) : gl_type(gl_type), - base_type(base_type), + base_type(base_type), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampled_type(0), interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), vector_elements(vector_elements), matrix_columns(matrix_columns), length(0) { @@ -79,11 +79,11 @@ glsl_type::glsl_type(GLenum gl_type, glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, enum glsl_sampler_dim dim, bool shadow, bool array, - unsigned type, const char *name) : + glsl_base_type type, const char *name) : gl_type(gl_type), - base_type(base_type), + base_type(base_type), sampled_type(type), sampler_dimensionality(dim), sampler_shadow(shadow), - sampler_array(array), sampled_type(type), interface_packing(0), + sampler_array(array), interface_packing(0), interface_row_major(0), length(0) { mtx_lock(_type::mem_mutex); @@ -102,9 +102,9 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, const char *name) : gl_type(0), - base_type(GLSL_TYPE_STRUCT), + base_type(GLSL_TYPE_STRUCT), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampled_type(0), interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), vector_elements(0), matrix_columns(0), length(num_fields) { @@ -131,9 +131,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, enum glsl_interface_packing packing, bool row_major, const char *name) : gl_type(0), - base_type(GLSL_TYPE_INTERFACE), + base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampled_type(0), interface_packing((unsigned) packing), + interface_packing((unsigned) packing), interface_row_major((unsigned) row_major), vector_elements(0), matrix_columns(0), length(num_fields) @@ -159,9 +159,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, glsl_type::glsl_type(const glsl_type *return_type, const glsl_function_param *params, unsigned num_params) : gl_type(0), - base_type(GLSL_TYPE_FUNCTION), + base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampled_type(0), interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), vector_elements(0), matrix_columns(0), length(num_params) { @@ -191,9 +191,9 @@ glsl_type::glsl_type(const glsl_type *return_type, glsl_type::glsl_type(const char *subroutine_name) : gl_type(0), - base_type(GLSL_TYPE_SUBROUTINE), + base_type(GLSL_TYPE_SUBROUTINE), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampled_type(0), interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), vector_elements(1), matrix_columns(1), length(0) { @@ -442,9 +442,9 @@ _mesa_glsl_release_types(void) glsl_type::glsl_type(const
Re: [Mesa-dev] [PATCH 2/4] r600: use mysterious DX10_CLAMP bit in pixel shader setup
FWIW I'd really appreciate it if someone could shed some light on that mystery bit there... Roland Am 09.11.2017 um 03:58 schrieb srol...@vmware.com: > From: Roland Scheidegger> > I don't know what this bit really does. The docs are somewhere between > misleading and wrong however, as at least the newer ones (that bit exists with > GCN as well) imply all NaNs would get converted to zeros, which is definitely > NOT the case (and that would not be dx10 compliant neither), the r600 ones are > also talking about "dx10 style" vs "dx9 style" clamp, whatever that means for > dx9... Makes no difference at all with piglit's isinf-and-isnan tests, so very > obviously NaNs are still generated just fine. > radeonsi also seems to set this bit nowadays (the llvm amdgpu code comment > now says "Make clamp modifier on NaN input returns 0" instead of "Do not > clamp NAN to 0" since it was changed). > > This prevents misrenderings in This War of Mine since using ieee > muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use > clamped rcp opcode, which would also fix this. > > AMD, it would be really really nice if there would be useful/correct/accurate > information about this bit... > The bit can be set for all shader stages, and maybe it should be set but I > really have no idea... > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544 > --- > src/gallium/drivers/r600/evergreen_state.c | 1 + > src/gallium/drivers/r600/r600_state.c | 1 + > 2 files changed, 2 insertions(+) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index 96eb35a981..ddd59dc0b5 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -3235,6 +3235,7 @@ void evergreen_update_ps_state(struct pipe_context > *ctx, struct r600_pipe_shader > r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */ >S_028844_NUM_GPRS(rshader->bc.ngpr) | >S_028844_PRIME_CACHE_ON_DRAW(1) | > + S_028844_DX10_CLAMP(1) | >S_028844_STACK_SIZE(rshader->bc.nstack)); > /* After that, the NOP relocation packet must be emitted (shader->bo, > RADEON_USAGE_READ). */ > > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index c21e8dabb1..c0d0b1667a 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -2548,6 +2548,7 @@ void r600_update_ps_state(struct pipe_context *ctx, > struct r600_pipe_shader *sha > r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2); > r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/ >S_028850_NUM_GPRS(rshader->bc.ngpr) | > + S_028850_DX10_CLAMP(1) | >S_028850_STACK_SIZE(rshader->bc.nstack) | >S_028850_UNCACHED_FIRST_INST(ufi)); > r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */ > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] r600: use the clamped versions of rcp/rsq for eg/cayman.
All on Juniper. But anyway, I've got another solution, with the only drawback that I don't really know what it does due to docs being lackluster/misleading there :-). But that would let us keep the ieee opcodes. And while I don't know what it does, I suspect it's a better idea regardless ;-). hw supported by nouveau drivers might just do the same without anyone even knowing with some luck... Roland Am 08.11.2017 um 07:20 schrieb Ilia Mirkin: > Actually cayman gets half of it - it gets the abs, but not clamped. I > wonder what happens if you go the other way -- use the IEEE version of > the op for RSQ() (presumably you're not testing this on cayman). > > On Wed, Nov 8, 2017 at 1:18 AM, Ilia Mirkinwrote: >> tgsi_rsq appears to ignore the passed-in op and always puts in >> ALU_OP1_RECIPSQRT_CLAMPED anyways. It also sticks an absolute value on >> the RSQ() argument. This only happens for eg, not cayman. (Probably >> why only the rcp_clamped change appeared to be necessary.) >> >> This is odd though, because there's no clamping like that in other >> drivers. The trace you made looks fine on both nvc0 and nv50. >> >> On Tue, Nov 7, 2017 at 11:01 PM, wrote: >>> From: Roland Scheidegger >>> >>> r600 already used the clamped versions, but for some reason this was >>> different to eg/cayman. >>> (Note that it has been different since essentially forever, 7 years, since >>> df62338c491f2cace1a48f99de78e83b5edd82fd in particular, which changed >>> this for r600 but not eg (cayman wasn't supported back then, but probably >>> copied this from the eg part later). The commit does not mention any reason >>> why this difference should exist.) >>> This seems a bit unfortunate, since it would be nice to use ieee arithmetic, >>> I have no idea what this could potentially break and no idea if it really >>> makes sense going back to legacy-style rcp/rsq... >>> This however prevents misrenderings in This War of Mine since using ieee >>> muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), albeit strictly speaking >>> only rcp_clamped is necessary for this. It seems likely the root cause is >>> some x * rcp(y) calculation where both x and y evaluate to 0. Albeit it >>> apparently works with other drivers, not sure what's up with that... >>> >>> Bugzilla: >>> https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D103544=DwIBaQ=uilaK90D4TOVoH58JNXRgQ=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0=FYMO5RZgIVWRluOu5DMEzW2xYcV3A5ZOOioWlZcdonw=b2fwORC2yAK7ezpjud4xeebdQRW-632-SZjTJGI-JHk= >>> --- >>> src/gallium/drivers/r600/r600_shader.c | 16 >>> 1 file changed, 12 insertions(+), 4 deletions(-) >>> >>> diff --git a/src/gallium/drivers/r600/r600_shader.c >>> b/src/gallium/drivers/r600/r600_shader.c >>> index 6a755bb3fd..62fc4da901 100644 >>> --- a/src/gallium/drivers/r600/r600_shader.c >>> +++ b/src/gallium/drivers/r600/r600_shader.c >>> @@ -9033,8 +9033,12 @@ static const struct r600_shader_tgsi_instruction >>> eg_shader_tgsi_instruction[] = >>> [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, >>> [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, >>> [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, >>> - [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, >>> tgsi_trans_srcx_replicate}, >>> - [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, >>> + /* XXX: >>> +* For state trackers other than OpenGL, we'll want to use >>> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead. >>> +*/ >>> + [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, >>> tgsi_trans_srcx_replicate}, >>> + [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_CLAMPED, tgsi_rsq}, >>> [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, >>> [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, >>> [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, >>> @@ -9256,8 +9260,12 @@ static const struct r600_shader_tgsi_instruction >>> cm_shader_tgsi_instruction[] = >>> [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, >>> [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, >>> [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, >>> - [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, >>> cayman_emit_float_instr}, >>> - [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, >>> cayman_emit_float_instr}, >>> + /* XXX: >>> +* For state trackers other than OpenGL, we'll want to use >>> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead. >>> +*/ >>> + [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, >>> cayman_emit_float_instr}, >>> + [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_CLAMPED, >>> cayman_emit_float_instr}, >>> [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, >>> [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, >>> [TGSI_OPCODE_MUL] = {
Re: [Mesa-dev] [PATCH 2/2] r600: use the clamped versions of rcp/rsq for eg/cayman.
Am 08.11.2017 um 07:18 schrieb Ilia Mirkin: > tgsi_rsq appears to ignore the passed-in op and always puts in > ALU_OP1_RECIPSQRT_CLAMPED anyways. It also sticks an absolute value on > the RSQ() argument. This only happens for eg, not cayman. (Probably > why only the rcp_clamped change appeared to be necessary.) Right, I noticed that afterwards, it is using the clamped version for rsq for eg. > > This is odd though, because there's no clamping like that in other > drivers. The trace you made looks fine on both nvc0 and nv50. Yes, it also looks fine on llvmpipe, which adheres to strict ieee rules (or rather, strict dx10 rules for this, but they are mostly identical, with min/max returning the non-nan, which I'd nearly bet nvc0/nv50 do too). Roland > > On Tue, Nov 7, 2017 at 11:01 PM,wrote: >> From: Roland Scheidegger >> >> r600 already used the clamped versions, but for some reason this was >> different to eg/cayman. >> (Note that it has been different since essentially forever, 7 years, since >> df62338c491f2cace1a48f99de78e83b5edd82fd in particular, which changed >> this for r600 but not eg (cayman wasn't supported back then, but probably >> copied this from the eg part later). The commit does not mention any reason >> why this difference should exist.) >> This seems a bit unfortunate, since it would be nice to use ieee arithmetic, >> I have no idea what this could potentially break and no idea if it really >> makes sense going back to legacy-style rcp/rsq... >> This however prevents misrenderings in This War of Mine since using ieee >> muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), albeit strictly speaking >> only rcp_clamped is necessary for this. It seems likely the root cause is >> some x * rcp(y) calculation where both x and y evaluate to 0. Albeit it >> apparently works with other drivers, not sure what's up with that... >> >> Bugzilla: >> https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D103544=DwIBaQ=uilaK90D4TOVoH58JNXRgQ=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0=O3sPiamq_x4GgPnGelOAW-6LG12mV9kyATKu7PI5o10=W1XEpaa39PmD6AFtLpI21D3QTrP7XYVwGWfFYCT0PRM= >> --- >> src/gallium/drivers/r600/r600_shader.c | 16 >> 1 file changed, 12 insertions(+), 4 deletions(-) >> >> diff --git a/src/gallium/drivers/r600/r600_shader.c >> b/src/gallium/drivers/r600/r600_shader.c >> index 6a755bb3fd..62fc4da901 100644 >> --- a/src/gallium/drivers/r600/r600_shader.c >> +++ b/src/gallium/drivers/r600/r600_shader.c >> @@ -9033,8 +9033,12 @@ static const struct r600_shader_tgsi_instruction >> eg_shader_tgsi_instruction[] = >> [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, >> [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, >> [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, >> - [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, >> tgsi_trans_srcx_replicate}, >> - [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, >> + /* XXX: >> +* For state trackers other than OpenGL, we'll want to use >> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead. >> +*/ >> + [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, >> tgsi_trans_srcx_replicate}, >> + [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_CLAMPED, tgsi_rsq}, >> [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, >> [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, >> [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, >> @@ -9256,8 +9260,12 @@ static const struct r600_shader_tgsi_instruction >> cm_shader_tgsi_instruction[] = >> [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, >> [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, >> [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, >> - [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, >> cayman_emit_float_instr}, >> - [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, >> cayman_emit_float_instr}, >> + /* XXX: >> +* For state trackers other than OpenGL, we'll want to use >> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead. >> +*/ >> + [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, >> cayman_emit_float_instr}, >> + [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_CLAMPED, >> cayman_emit_float_instr}, >> [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, >> [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, >> [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, >> -- >> 2.12.3 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev=DwIBaQ=uilaK90D4TOVoH58JNXRgQ=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0=O3sPiamq_x4GgPnGelOAW-6LG12mV9kyATKu7PI5o10=5mjPUHmN5M2pr4cVX5DmE7_sMJfXmAVL27FxPc55SLo=
[Mesa-dev] [PATCH 1/4] r600: use min_dx10/max_dx10 instead of min/max
From: Roland ScheideggerI believe this is the safe thing to do, especially ever since the driver actually generates NaNs for muls too. Albeit since the radeon ISA docs are inaccurate/wrong there, I'm not entirely sure what the non-dx10 versions do, but (as required by dx10) the dx10 versions should pick a non-nan source over a nan source. Other drivers presumably do the same (radeonsi, llvmpipe). This was shown to make some difference for bug 103544, albeit it is not required to fix it. --- src/gallium/drivers/r600/r600_shader.c | 12 ++-- src/gallium/drivers/r600/sb/sb_expr.cpp | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 188fbc9d47..6a755bb3fd 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -8844,8 +8844,8 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, @@ -9042,8 +9042,8 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, @@ -9265,8 +9265,8 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp index 3dd3a4815b..7a5d62c8e8 100644 --- a/src/gallium/drivers/r600/sb/sb_expr.cpp +++ b/src/gallium/drivers/r600/sb/sb_expr.cpp @@ -753,7 +753,9 @@ bool expr_handler::fold_alu_op2(alu_node& n) { n.bc.src[0].abs == n.bc.src[1].abs) { switch (n.bc.op) { case ALU_OP2_MIN: // (MIN x, x) => (MOV x) + case ALU_OP2_MIN_DX10: case ALU_OP2_MAX: + case ALU_OP2_MAX_DX10: convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs); return fold_alu_op1(n); case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2) -- 2.12.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] r600: set the number type correctly for float rts in cb setup
From: Roland ScheideggerFloat rts were always set as unorm instead of float. Not sure of the consequences, but at least it looks like the blend clamp would have been enabled, which is against the rules (only eg really bothered to even attempt to specify this correctly, r600 always used clamp anyway). Albeit r600 (not r700) setup still looks bugged to me due to never setting BLEND_FLOAT32 which must be set according to docs... Not sure if the hw really cares, no piglit change. --- src/gallium/drivers/r600/evergreen_state.c | 7 ++- src/gallium/drivers/r600/r600_state.c | 10 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index ddd59dc0b5..ba08f38f8c 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1042,7 +1042,7 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx, } } ntype = V_028C70_NUMBER_UNORM; - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_028C70_NUMBER_SRGB; else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { if (desc->channel[i].normalized) @@ -1054,7 +1054,10 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx, ntype = V_028C70_NUMBER_UNORM; else if (desc->channel[i].pure_integer) ntype = V_028C70_NUMBER_UINT; + } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { + ntype = V_028C70_NUMBER_FLOAT; } + pitch = (pitch / 8) - 1; color->pitch = S_028C64_PITCH_TILE_MAX(pitch); @@ -1180,6 +1183,8 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx, ntype = V_028C70_NUMBER_UNORM; else if (desc->channel[i].pure_integer) ntype = V_028C70_NUMBER_UINT; + } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { + ntype = V_028C70_NUMBER_FLOAT; } if (R600_BIG_ENDIAN) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index c0d0b1667a..0bda8d5b3f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -817,7 +817,7 @@ static void r600_init_color_surface(struct r600_context *rctx, unsigned offset; const struct util_format_description *desc; int i; - bool blend_bypass = 0, blend_clamp = 1, do_endian_swap = FALSE; + bool blend_bypass = 0, blend_clamp = 0, do_endian_swap = FALSE; if (rtex->db_compatible && !r600_can_sample_zs(rtex, false)) { r600_init_flushed_depth_texture(>b.b, surf->base.texture, NULL); @@ -869,6 +869,8 @@ static void r600_init_color_surface(struct r600_context *rctx, ntype = V_0280A0_NUMBER_UNORM; else if (desc->channel[i].pure_integer) ntype = V_0280A0_NUMBER_UINT; + } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { + ntype = V_0280A0_NUMBER_FLOAT; } if (R600_BIG_ENDIAN) @@ -883,6 +885,11 @@ static void r600_init_color_surface(struct r600_context *rctx, endian = r600_colorformat_endian_swap(format, do_endian_swap); + /* blend clamp should be set for all NORM/SRGB types */ + if (ntype == V_0280A0_NUMBER_UNORM || ntype == V_0280A0_NUMBER_SNORM || + ntype == V_0280A0_NUMBER_SRGB) + blend_clamp = 1; + /* set blend bypass according to docs if SINT/UINT or 8/24 COLOR variants */ if (ntype == V_0280A0_NUMBER_UINT || ntype == V_0280A0_NUMBER_SINT || @@ -916,6 +923,7 @@ static void r600_init_color_surface(struct r600_context *rctx, ntype != V_0280A0_NUMBER_UINT && ntype != V_0280A0_NUMBER_SINT) && G_0280A0_BLEND_CLAMP(color_info) && + /* XXX this condition is always true since BLEND_FLOAT32 is never set (bug?). */ !G_0280A0_BLEND_FLOAT32(color_info)) { color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM); surf->export_16bpc = true; -- 2.12.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] r600: use mysterious DX10_CLAMP bit in pixel shader setup
From: Roland ScheideggerI don't know what this bit really does. The docs are somewhere between misleading and wrong however, as at least the newer ones (that bit exists with GCN as well) imply all NaNs would get converted to zeros, which is definitely NOT the case (and that would not be dx10 compliant neither), the r600 ones are also talking about "dx10 style" vs "dx9 style" clamp, whatever that means for dx9... Makes no difference at all with piglit's isinf-and-isnan tests, so very obviously NaNs are still generated just fine. radeonsi also seems to set this bit nowadays (the llvm amdgpu code comment now says "Make clamp modifier on NaN input returns 0" instead of "Do not clamp NAN to 0" since it was changed). This prevents misrenderings in This War of Mine since using ieee muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use clamped rcp opcode, which would also fix this. AMD, it would be really really nice if there would be useful/correct/accurate information about this bit... The bit can be set for all shader stages, and maybe it should be set but I really have no idea... Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544 --- src/gallium/drivers/r600/evergreen_state.c | 1 + src/gallium/drivers/r600/r600_state.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 96eb35a981..ddd59dc0b5 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3235,6 +3235,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */ S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | +S_028844_DX10_CLAMP(1) | S_028844_STACK_SIZE(rshader->bc.nstack)); /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index c21e8dabb1..c0d0b1667a 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2548,6 +2548,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2); r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/ S_028850_NUM_GPRS(rshader->bc.ngpr) | +S_028850_DX10_CLAMP(1) | S_028850_STACK_SIZE(rshader->bc.nstack) | S_028850_UNCACHED_FIRST_INST(ufi)); r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */ -- 2.12.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] r600: use ieee version of rcp
From: Roland Scheideggerr600 used the clamped version for rcp, whereas both evergreen and cayman used the ieee version. I don't know why that discrepancy exists (it does so since day 1) but there does not seem to be a valid reason for this, so make it consistent. This seems now safer than before the previous commit (using the mystery dx10 clamp). Note that rsq still uses clamped version (as before even though the table may have suggested otherwise for evergreen) for r600/eg, but not for cayman. I just don't feel lucky enough to change this (it should also be noted r600 supports sqrt natively, which is always ieee, therefore might not really see rsqrt with glsl often presumably). Compile tested only... --- src/gallium/drivers/r600/r600_shader.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6a755bb3fd..628c33787e 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -8830,11 +8830,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, - /* XXX: -* For state trackers other than OpenGL, we'll want to use -* _RECIP_IEEE instead. -*/ - [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, @@ -9034,7 +9030,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, - [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, + [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, -- 2.12.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/9] gallium/tgsi: start adding hw atomics (v3.1)
Just some naming trivia, not a proper review: On Wed, Nov 8, 2017 at 8:54 PM, Dave Airliewrote: > From: Dave Airlie > > This adds support for a hw atomic counters to TGSI. > > A new register file for storing atomic counters is added, > along with a new atomic counter semantic, along with docs > for both. > > v2: drop semantic, move hw counter to backend, > Ilia pointed out SSO would have busted my plan, and he > was right. > v3: drop BUFFER decls. (Marek) > v3.1: minor fixups for whitespace, set ureg error > if we overflow the hw atomic limits. (nha) > > Reviewed-by: Marek Olšák > Reviewed-by: Nicolai Hähnle > Signed-off-by: Dave Airlie > --- > src/gallium/auxiliary/tgsi/tgsi_strings.c | 1 + > src/gallium/auxiliary/tgsi/tgsi_ureg.c | 81 > ++ > src/gallium/auxiliary/tgsi/tgsi_ureg.h | 7 +++ > src/gallium/docs/source/tgsi.rst | 33 ++-- > src/gallium/include/pipe/p_shader_tokens.h | 1 + > src/gallium/include/pipe/p_state.h | 1 + > 6 files changed, 121 insertions(+), 3 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c > b/src/gallium/auxiliary/tgsi/tgsi_strings.c > index 0872db9..4f28b49 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c > @@ -58,6 +58,7 @@ static const char *tgsi_file_names[] = > "BUFFER", > "MEMORY", > "CONSTBUF", > + "HWATOMIC", HW_ATOMIC? (or rename the file?) > }; > > const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = > diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c > b/src/gallium/auxiliary/tgsi/tgsi_ureg.c > index b26434c..4f3ac97 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c > @@ -80,6 +80,7 @@ struct ureg_tokens { > #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS > #define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS) > #define UREG_MAX_CONSTANT_RANGE 32 > +#define UREG_MAX_HW_ATOMIC_RANGE 32 > #define UREG_MAX_IMMEDIATE 4096 > #define UREG_MAX_ADDR 3 > #define UREG_MAX_ARRAY_TEMPS 256 > @@ -92,6 +93,15 @@ struct const_decl { > unsigned nr_constant_ranges; > }; > > +struct hw_atomic_decl { > + struct { > + unsigned first; > + unsigned last; > + unsigned array_id; > + } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE]; > + unsigned nr_hw_atomic_ranges; > +}; > + > #define DOMAIN_DECL 0 > #define DOMAIN_INSN 1 > > @@ -182,6 +192,8 @@ struct ureg_program > > struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS]; > > + struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS]; > + > unsigned properties[TGSI_PROPERTY_COUNT]; > > unsigned nr_addrs; > @@ -583,6 +595,30 @@ out: > return ureg_src_dimension(src, 0); > } > > + > +/* Returns a new hw atomic register. Keep track of which have been > + * referred to so that we can emit decls later. > + */ > +void > +ureg_DECL_hw_atomic(struct ureg_program *ureg, > +unsigned first, > +unsigned last, > +unsigned buffer_id, > +unsigned array_id) > +{ > + struct hw_atomic_decl *decl = >hw_atomic_decls[buffer_id]; > + > + if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) { > + uint i = decl->nr_hw_atomic_ranges++; > + > + decl->hw_atomic_range[i].first = first; > + decl->hw_atomic_range[i].last = last; > + decl->hw_atomic_range[i].array_id = array_id; > + } else { > + set_bad(ureg); > + } > +} > + > static struct ureg_dst alloc_temporary( struct ureg_program *ureg, > boolean local ) > { > @@ -1501,6 +1537,35 @@ emit_decl_semantic(struct ureg_program *ureg, > } > } > > +static void > +emit_decl_atomic_2d(struct ureg_program *ureg, > +unsigned first, > +unsigned last, > +unsigned index2D, > +unsigned array_id) > +{ > + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : > 3); > + > + out[0].value = 0; > + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; > + out[0].decl.NrTokens = 3; > + out[0].decl.File = TGSI_FILE_HW_ATOMIC; > + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; > + out[0].decl.Dimension = 1; > + out[0].decl.Array = array_id != 0; > + > + out[1].value = 0; > + out[1].decl_range.First = first; > + out[1].decl_range.Last = last; > + > + out[2].value = 0; > + out[2].decl_dim.Index2D = index2D; > + > + if (array_id) { > + out[3].value = 0; > + out[3].array.ArrayID = array_id; > + } > +} > > static void > emit_decl_fs(struct ureg_program *ureg, > @@ -1908,6 +1973,22 @@ static void emit_decls( struct ureg_program *ureg ) >} > } > > + for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) { > + struct hw_atomic_decl *decl =
[Mesa-dev] [PATCH 5/9] st/mesa: start adding support for hw atomics atom. (v2)
From: Dave AirlieThis adds a new atom that calls the new driver API to bind buffers containing hw atomics. v2: fixup bindings for sparse buffers. (mareko/nha) don't bind buffer atomics when hw atomics are enabled. use NewAtomicBuffer (mareko) Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_atom_atomicbuf.c | 41 ++-- src/mesa/state_tracker/st_atom_list.h| 2 ++ src/mesa/state_tracker/st_cb_bufferobjects.c | 2 +- src/mesa/state_tracker/st_context.c | 9 +- src/mesa/state_tracker/st_context.h | 1 + 5 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c index ee5944f..c502ba6 100644 --- a/src/mesa/state_tracker/st_atom_atomicbuf.c +++ b/src/mesa/state_tracker/st_atom_atomicbuf.c @@ -46,7 +46,7 @@ st_bind_atomics(struct st_context *st, struct gl_program *prog, { unsigned i; - if (!prog || !st->pipe->set_shader_buffers) + if (!prog || !st->pipe->set_shader_buffers || st->has_hw_atomics) return; for (i = 0; i < prog->sh.data->NumAtomicBuffers; i++) { @@ -63,7 +63,7 @@ st_bind_atomics(struct st_context *st, struct gl_program *prog, sb.buffer_offset = binding->Offset; sb.buffer_size = st_obj->buffer->width0 - binding->Offset; -/* AutomaticSize is FALSE if the buffer was set with BindBufferRange. + /* AutomaticSize is FALSE if the buffer was set with BindBufferRange. * Take the minimum just to be sure. */ if (!binding->AutomaticSize) @@ -128,3 +128,40 @@ st_bind_cs_atomics(struct st_context *st) st_bind_atomics(st, prog, PIPE_SHADER_COMPUTE); } + +void +st_bind_hw_atomic_buffers(struct st_context *st) +{ + struct pipe_shader_buffer buffers[PIPE_MAX_HW_ATOMIC_BUFFERS]; + int i; + int num_buffers = 0; + + if (!st->has_hw_atomics) + return; + + for (i = 0; i < st->ctx->Const.MaxAtomicBufferBindings; i++) { + struct gl_buffer_binding *binding = >ctx->AtomicBufferBindings[i]; + struct st_buffer_object *st_obj = st_buffer_object(binding->BufferObject); + struct pipe_shader_buffer *sb = [num_buffers]; + + if (st_obj && st_obj->buffer) { +sb->buffer = st_obj->buffer; +sb->buffer_offset = binding->Offset; +sb->buffer_size = st_obj->buffer->width0 - binding->Offset; + +/* AutomaticSize is FALSE if the buffer was set with BindBufferRange. + * Take the minimum just to be sure. + */ +if (!binding->AutomaticSize) + sb->buffer_size = MIN2(sb->buffer_size, (unsigned) binding->Size); + } else { +sb->buffer = NULL; +sb->buffer_offset = 0; +sb->buffer_size = 0; + } + num_buffers++; + } + + st->pipe->set_hw_atomic_buffers(st->pipe, 0, num_buffers, + buffers); +} diff --git a/src/mesa/state_tracker/st_atom_list.h b/src/mesa/state_tracker/st_atom_list.h index b76854e..8f50a72 100644 --- a/src/mesa/state_tracker/st_atom_list.h +++ b/src/mesa/state_tracker/st_atom_list.h @@ -66,6 +66,8 @@ ST_STATE(ST_NEW_GS_SSBOS, st_bind_gs_ssbos) ST_STATE(ST_NEW_PIXEL_TRANSFER, st_update_pixel_transfer) ST_STATE(ST_NEW_TESS_STATE, st_update_tess) +ST_STATE(ST_NEW_HW_ATOMICS, st_bind_hw_atomic_buffers) + /* this must be done after the vertex program update */ ST_STATE(ST_NEW_VERTEX_ARRAYS, st_update_array) diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index 86ebfc6..a9104a9 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -348,7 +348,7 @@ bufferobj_data(struct gl_context *ctx, if (st_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER) ctx->NewDriverState |= ST_NEW_SAMPLER_VIEWS | ST_NEW_IMAGE_UNITS; if (st_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER) - ctx->NewDriverState |= ST_NEW_ATOMIC_BUFFER; + ctx->NewDriverState |= ctx->DriverFlags.NewAtomicBuffer; return GL_TRUE; } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 5d8dd8b..e82090b 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -405,6 +405,10 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, st->has_multi_draw_indirect = screen->get_param(screen, PIPE_CAP_MULTI_DRAW_INDIRECT); + st->has_hw_atomics = + screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, + PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS) ? true : false; + /* GL limits and extensions */ st_init_limits(pipe->screen, >Const, >Extensions); st_init_extensions(pipe->screen, >Const, @@ -497,7 +501,10 @@ static void st_init_driver_flags(struct st_context *st) /* Shader resources */ f->NewTextureBuffer =
[Mesa-dev] [PATCH 8/9] r600: add support for hw atomic counters. (v3)
From: Dave AirlieThis adds support for the evergreen/cayman atomic counters. These are implemented using GDS append/consume counters. The values for each counter are loaded before drawing and saved after each draw using special CP packets. v2: move hw atomic assignment into driver. v3: fix messing up caps (Gert Wollny), only store ranges in driver, drop buffers. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 159 ++ src/gallium/drivers/r600/r600_pipe.c | 15 ++ src/gallium/drivers/r600/r600_pipe.h | 22 +++ src/gallium/drivers/r600/r600_shader.c | 239 --- src/gallium/drivers/r600/r600_shader.h | 19 +++ src/gallium/drivers/r600/r600_state_common.c | 46 ++ src/gallium/drivers/r600/r600d_common.h | 2 + 7 files changed, 480 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 96eb35a..634cd96 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3716,6 +3716,38 @@ static void evergreen_set_tess_state(struct pipe_context *ctx, rctx->tess_state_dirty = true; } +static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, + unsigned start_slot, + unsigned count, + const struct pipe_shader_buffer *buffers) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_atomic_buffer_state *astate; + int i, idx; + + astate = >atomic_buffer_state; + + /* we'd probably like to expand this to 8 later so put the logic in */ + for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { + const struct pipe_shader_buffer *buf; + struct pipe_shader_buffer *abuf; + + abuf = >buffer[i]; + + if (!buffers || !buffers[idx].buffer) { + pipe_resource_reference(>buffer, NULL); + astate->enabled_mask &= ~(1 << i); + continue; + } + buf = [idx]; + + pipe_resource_reference(>buffer, buf->buffer); + abuf->buffer_offset = buf->buffer_offset; + abuf->buffer_size = buf->buffer_size; + astate->enabled_mask |= (1 << i); + } +} + void evergreen_init_state_functions(struct r600_context *rctx) { unsigned id = 1; @@ -3801,6 +3833,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple; rctx->b.b.set_min_samples = evergreen_set_min_samples; rctx->b.b.set_tess_state = evergreen_set_tess_state; + rctx->b.b.set_hw_atomic_buffers = evergreen_set_hw_atomic_buffers; if (rctx->b.chip_class == EVERGREEN) rctx->b.b.get_sample_position = evergreen_get_sample_position; else @@ -4107,3 +4140,129 @@ void eg_trace_emit(struct r600_context *rctx) radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, AC_ENCODE_TRACE_POINT(rctx->trace_id)); } + +bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, + struct r600_shader_atomic *combined_atomics, + uint8_t *atomic_used_mask_p) +{ + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; + struct r600_atomic_buffer_state *astate = >atomic_buffer_state; + unsigned pkt_flags = 0; + uint8_t atomic_used_mask = 0; + int i, j, k; + + for (i = 0; i < EG_NUM_HW_STAGES; i++) { + uint8_t num_atomic_stage; + struct r600_pipe_shader *pshader; + + pshader = rctx->hw_shader_stages[i].shader; + if (!pshader) + continue; + + num_atomic_stage = pshader->shader.nhwatomic_ranges; + if (!num_atomic_stage) + continue; + + for (j = 0; j < num_atomic_stage; j++) { + struct r600_shader_atomic *atomic = >shader.atomics[j]; + int natomics = atomic->end - atomic->start + 1; + + for (k = 0; k < natomics; k++) { + /* seen this in a previous stage */ + if (atomic_used_mask & (1u << (atomic->hw_idx + k))) + continue; + + combined_atomics[atomic->hw_idx + k].hw_idx = atomic->hw_idx + k; + combined_atomics[atomic->hw_idx + k].buffer_id = atomic->buffer_id; + combined_atomics[atomic->hw_idx + k].start = atomic->start + k; +
[Mesa-dev] [PATCH 9/9] docs: update r600 atomic counter status.
From: Dave AirlieSigned-off-by: Dave Airlie --- docs/features.txt | 6 +++--- docs/relnotes/17.4.0.html | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 10ccf9d..86d07ba 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -150,7 +150,7 @@ GL 4.2, GLSL 4.20 -- all DONE: i965/gen7+, nvc0, radeonsi GL_ARB_texture_compression_bptc DONE (i965, r600) GL_ARB_compressed_texture_pixel_storage DONE (all drivers) - GL_ARB_shader_atomic_counters DONE (i965, softpipe) + GL_ARB_shader_atomic_counters DONE (i965, r600, softpipe) GL_ARB_texture_storageDONE (all drivers) GL_ARB_transform_feedback_instanced DONE (i965, nv50, r600, llvmpipe, softpipe, swr) GL_ARB_base_instance DONE (i965, nv50, r600, llvmpipe, softpipe, swr) @@ -227,7 +227,7 @@ GL 4.6, GLSL 4.60 GL_ARB_indirect_parametersDONE (i965/gen7+, nvc0, radeonsi) GL_ARB_pipeline_statistics_query DONE (i965, nvc0, radeonsi, llvmpipe, softpipe, swr) GL_ARB_polygon_offset_clamp DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, swr) - GL_ARB_shader_atomic_counter_ops DONE (i965/gen7+, nvc0, radeonsi, softpipe) + GL_ARB_shader_atomic_counter_ops DONE (i965/gen7+, nvc0, r600, radeonsi, softpipe) GL_ARB_shader_draw_parameters DONE (i965, nvc0, radeonsi) GL_ARB_shader_group_vote DONE (i965, nvc0, radeonsi) GL_ARB_spirv_extensions in progress (Nicolai Hähnle, Ian Romanick) @@ -246,7 +246,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) GL_ARB_framebuffer_no_attachments DONE (i965/gen7+, r600, softpipe) GL_ARB_program_interface_queryDONE (all drivers) - GL_ARB_shader_atomic_counters DONE (i965/gen7+, softpipe) + GL_ARB_shader_atomic_counters DONE (i965/gen7+, r600, softpipe) GL_ARB_shader_image_load_storeDONE (i965/gen7+, softpipe) GL_ARB_shader_image_size DONE (i965/gen7+, softpipe) GL_ARB_shader_storage_buffer_object DONE (i965/gen7+, softpipe) diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html index f81b5bd..19e0c80 100644 --- a/docs/relnotes/17.4.0.html +++ b/docs/relnotes/17.4.0.html @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers. Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE environment variable is set to "0" or "false" +GL_ARB_shader_atomic_counters and GL_ARB_shader_atomic_counter_ops on r600/evergreen+ Bug fixes -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/9] st/mesa: setup hw atomic limits. (v1.1)
From: Dave AirlieHW atomics need to use caps to set some limits, and some other limits may also need limiting. This fixes things up to work for evergreen hw, it may need more changes in the future if other hw wants to use this path. v1.1: fix indent. Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_extensions.c | 45 ++ 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index fa2d002..d4b8dc9 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -79,6 +79,7 @@ void st_init_limits(struct pipe_screen *screen, unsigned sh; boolean can_ubo = TRUE; int temp; + bool ssbo_atomic = true; c->MaxTextureLevels = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS), @@ -243,11 +244,21 @@ void st_init_limits(struct pipe_screen *screen, c->MaxUniformBlockSize / 4 * pc->MaxUniformBlocks); - pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - pc->MaxAtomicBuffers = screen->get_shader_param( -screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2; - pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers; - + temp = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS); + if (temp) { + /* + * for separate atomic counters get the actual hw limits + * per stage on atomic counters and buffers + */ + ssbo_atomic = false; + pc->MaxAtomicCounters = temp; + pc->MaxAtomicBuffers = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS); + pc->MaxShaderStorageBlocks = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS); + } else { + pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; + pc->MaxAtomicBuffers = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2; + pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers; + } pc->MaxImageUniforms = screen->get_shader_param( screen, sh, PIPE_SHADER_CAP_MAX_SHADER_IMAGES); @@ -407,14 +418,26 @@ void st_init_limits(struct pipe_screen *screen, screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL); c->MaxAtomicBufferBindings = - c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; - c->MaxCombinedAtomicBuffers = + c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + + if (!ssbo_atomic) { + /* for separate atomic buffers - there atomic buffer size will be + limited */ + c->MaxAtomicBufferSize = c->Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters * ATOMIC_COUNTER_SIZE; + /* on all HW with separate atomic (evergreen) the following + lines are true. not sure it's worth adding CAPs for this at this + stage. */ + c->MaxCombinedAtomicCounters = c->Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters; + c->MaxCombinedAtomicBuffers = c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + } else { + c->MaxCombinedAtomicBuffers = c->Program[MESA_SHADER_VERTEX].MaxAtomicBuffers + c->Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers + c->Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers + c->Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers + c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; - assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS); + assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS); + } if (c->MaxCombinedAtomicBuffers > 0) { extensions->ARB_shader_atomic_counters = GL_TRUE; @@ -425,8 +448,10 @@ void st_init_limits(struct pipe_screen *screen, c->ShaderStorageBufferOffsetAlignment = screen->get_param(screen, PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT); if (c->ShaderStorageBufferOffsetAlignment) { - c->MaxCombinedShaderStorageBlocks = c->MaxShaderStorageBufferBindings = - c->MaxCombinedAtomicBuffers; + /* for hw atomic counters leaves these at default for now */ + if (ssbo_atomic) + c->MaxCombinedShaderStorageBlocks = c->MaxShaderStorageBufferBindings = +c->MaxCombinedAtomicBuffers; c->MaxCombinedShaderOutputResources += c->MaxCombinedShaderStorageBlocks; c->MaxShaderStorageBlockSize = 1 << 27; -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/9] gallium: add hw atomic buffer binding API.
From: Dave AirlieThis API binds atomic buffers for all bound shaders (as per the GL semantics). This is needed to support cross shader hw atomic counters. Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák Signed-off-by: Dave Airlie --- src/gallium/docs/source/context.rst | 8 src/gallium/include/pipe/p_context.h | 16 2 files changed, 24 insertions(+) diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index ba7fef8..5898157 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -145,6 +145,14 @@ to the array index which is used for sampling. * ``sampler_view_destroy`` destroys a sampler view and releases its reference to associated texture. +Hardware Atomic buffers +^^^ + +Buffers containing hw atomics are required to support the feature +on some drivers. + +Drivers that require this need to fill the ``set_hw_atomic_buffers`` method. + Shader Resources diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 4609d4d..c2153f7 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -332,6 +332,22 @@ struct pipe_context { const struct pipe_shader_buffer *buffers); /** +* Bind an array of hw atomic buffers for use by all shaders. +* And buffers that were previously bound to the specified range +* will be unbound. +* +* \param start_slot first buffer slot to bind. +* \param count number of consecutive buffers to bind. +* \param buffersarray of pointers to the buffers to bind, it +* should contain at least \a count elements +* unless it's NULL, in which case no buffers will +* be bound. +*/ + void (*set_hw_atomic_buffers)(struct pipe_context *, + unsigned start_slot, unsigned count, + const struct pipe_shader_buffer *buffers); + + /** * Bind an array of images that will be used by a shader. * Any images that were previously bound to the specified range * will be unbound. -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/9] st/mesa: add support for hw atomics to glsl->tgsi. (v4)
From: Dave AirlieThis adds support for creating the hw atomic tgsi from the glsl codepaths. v2: drop the atomic index and move to backend. v3: drop buffer decls. (Marek) v4: fix off by one (Gert) Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 100 - 1 file changed, 85 insertions(+), 15 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index fd9df61..472a6c7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -152,6 +152,13 @@ find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) return GLSL_TYPE_ERROR; } +struct hwatomic_decl { + unsigned location; + unsigned binding; + unsigned size; + unsigned array_id; +}; + struct glsl_to_tgsi_visitor : public ir_visitor { public: glsl_to_tgsi_visitor(); @@ -176,6 +183,9 @@ public: unsigned num_outputs; unsigned num_output_arrays; + struct hwatomic_decl atomic_info[PIPE_MAX_HW_ATOMIC_BUFFERS]; + unsigned num_atomics; + unsigned num_atomic_arrays; int num_address_regs; uint32_t samplers_used; glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; @@ -3206,24 +3216,64 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) exec_node *param = ir->actual_parameters.get_head(); ir_dereference *deref = static_cast(param); ir_variable *location = deref->variable_referenced(); - - st_src_reg buffer( - PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT); - + bool has_hw_atomics = st_context(ctx)->has_hw_atomics; /* Calculate the surface offset */ st_src_reg offset; unsigned array_size = 0, base = 0; uint16_t index = 0; + st_src_reg resource; get_deref_offsets(deref, _size, , , , false); - if (offset.file != PROGRAM_UNDEFINED) { - emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), - offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); - emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), - offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE)); + if (has_hw_atomics) { + variable_storage *entry = find_variable_storage(location); + st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, location->data.binding); + + if (!entry) { + entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC, + num_atomics); + _mesa_hash_table_insert(this->variables, location, entry); + + atomic_info[num_atomics].location = location->data.location; + atomic_info[num_atomics].binding = location->data.binding; + atomic_info[num_atomics].size = location->type->arrays_of_arrays_size(); + atomic_info[num_atomics].array_id = 0; + num_atomics++; + } + + if (offset.file != PROGRAM_UNDEFINED) { + if (atomic_info[entry->index].array_id == 0) { +num_atomic_arrays++; +atomic_info[entry->index].array_id = num_atomic_arrays; + } + buffer.array_id = atomic_info[entry->index].array_id; + } + + buffer.index = index; + buffer.index += location->data.offset / ATOMIC_COUNTER_SIZE; + buffer.has_index2 = true; + + if (offset.file != PROGRAM_UNDEFINED) { + buffer.reladdr = ralloc(mem_ctx, st_src_reg); + *buffer.reladdr = offset; + emit_arl(ir, sampler_reladdr, offset); + } + offset = st_src_reg_for_int(0); + + resource = buffer; } else { - offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE); + st_src_reg buffer(PROGRAM_BUFFER, location->data.binding, +GLSL_TYPE_ATOMIC_UINT); + + if (offset.file != PROGRAM_UNDEFINED) { + emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), + offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), + offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE)); + } else { + offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE); + } + resource = buffer; } ir->return_deref->accept(this); @@ -3286,7 +3336,7 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) inst = emit_asm(ir, opcode, dst, offset, data, data2); } - inst->resource = buffer; + inst->resource = resource; } void @@ -4388,6 +4438,8 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_outputs = 0; num_input_arrays = 0; num_output_arrays = 0; + num_atomics = 0; + num_atomic_arrays = 0; num_immediates = 0; num_address_regs = 0; samplers_used = 0; @@
[Mesa-dev] [PATCH 2/9] gallium/tgsi: start adding hw atomics (v3.1)
From: Dave AirlieThis adds support for a hw atomic counters to TGSI. A new register file for storing atomic counters is added, along with a new atomic counter semantic, along with docs for both. v2: drop semantic, move hw counter to backend, Ilia pointed out SSO would have busted my plan, and he was right. v3: drop BUFFER decls. (Marek) v3.1: minor fixups for whitespace, set ureg error if we overflow the hw atomic limits. (nha) Reviewed-by: Marek Olšák Reviewed-by: Nicolai Hähnle Signed-off-by: Dave Airlie --- src/gallium/auxiliary/tgsi/tgsi_strings.c | 1 + src/gallium/auxiliary/tgsi/tgsi_ureg.c | 81 ++ src/gallium/auxiliary/tgsi/tgsi_ureg.h | 7 +++ src/gallium/docs/source/tgsi.rst | 33 ++-- src/gallium/include/pipe/p_shader_tokens.h | 1 + src/gallium/include/pipe/p_state.h | 1 + 6 files changed, 121 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index 0872db9..4f28b49 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -58,6 +58,7 @@ static const char *tgsi_file_names[] = "BUFFER", "MEMORY", "CONSTBUF", + "HWATOMIC", }; const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index b26434c..4f3ac97 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -80,6 +80,7 @@ struct ureg_tokens { #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS) #define UREG_MAX_CONSTANT_RANGE 32 +#define UREG_MAX_HW_ATOMIC_RANGE 32 #define UREG_MAX_IMMEDIATE 4096 #define UREG_MAX_ADDR 3 #define UREG_MAX_ARRAY_TEMPS 256 @@ -92,6 +93,15 @@ struct const_decl { unsigned nr_constant_ranges; }; +struct hw_atomic_decl { + struct { + unsigned first; + unsigned last; + unsigned array_id; + } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE]; + unsigned nr_hw_atomic_ranges; +}; + #define DOMAIN_DECL 0 #define DOMAIN_INSN 1 @@ -182,6 +192,8 @@ struct ureg_program struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS]; + struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS]; + unsigned properties[TGSI_PROPERTY_COUNT]; unsigned nr_addrs; @@ -583,6 +595,30 @@ out: return ureg_src_dimension(src, 0); } + +/* Returns a new hw atomic register. Keep track of which have been + * referred to so that we can emit decls later. + */ +void +ureg_DECL_hw_atomic(struct ureg_program *ureg, +unsigned first, +unsigned last, +unsigned buffer_id, +unsigned array_id) +{ + struct hw_atomic_decl *decl = >hw_atomic_decls[buffer_id]; + + if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) { + uint i = decl->nr_hw_atomic_ranges++; + + decl->hw_atomic_range[i].first = first; + decl->hw_atomic_range[i].last = last; + decl->hw_atomic_range[i].array_id = array_id; + } else { + set_bad(ureg); + } +} + static struct ureg_dst alloc_temporary( struct ureg_program *ureg, boolean local ) { @@ -1501,6 +1537,35 @@ emit_decl_semantic(struct ureg_program *ureg, } } +static void +emit_decl_atomic_2d(struct ureg_program *ureg, +unsigned first, +unsigned last, +unsigned index2D, +unsigned array_id) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = TGSI_FILE_HW_ATOMIC; + out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; + out[0].decl.Dimension = 1; + out[0].decl.Array = array_id != 0; + + out[1].value = 0; + out[1].decl_range.First = first; + out[1].decl_range.Last = last; + + out[2].value = 0; + out[2].decl_dim.Index2D = index2D; + + if (array_id) { + out[3].value = 0; + out[3].array.ArrayID = array_id; + } +} static void emit_decl_fs(struct ureg_program *ureg, @@ -1908,6 +1973,22 @@ static void emit_decls( struct ureg_program *ureg ) } } + for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) { + struct hw_atomic_decl *decl = >hw_atomic_decls[i]; + + if (decl->nr_hw_atomic_ranges) { + uint j; + + for (j = 0; j < decl->nr_hw_atomic_ranges; j++) { +emit_decl_atomic_2d(ureg, +decl->hw_atomic_range[j].first, +decl->hw_atomic_range[j].last, +i, +decl->hw_atomic_range[j].array_id); + } + } + } +
[Mesa-dev] [PATCH 4/9] mesa/program: add hw atomic counter file
From: Dave AirlieThis is needed for the GLSL->TGSI translation for hw atomic counters. Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák Signed-off-by: Dave Airlie --- src/mesa/main/mtypes.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index d092630..b905a26 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2060,6 +2060,7 @@ typedef enum PROGRAM_BUFFER, /**< for shader buffers, compile-time only */ PROGRAM_MEMORY, /**< for shared, global and local memory */ PROGRAM_IMAGE, /**< for shader images, compile-time only */ + PROGRAM_HW_ATOMIC, /**< for hw atomic counters, compile-time only */ PROGRAM_FILE_MAX } gl_register_file; -- 2.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] gallium/r600 atomic - v4
Hopefully last pass, a few fixes in here, patch 5 is the only outstanding non-reviewed one, I think I've fixed the sparse buffer binding in it well enough, there is also fix for Gert's off-by one. Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/9] gallium: add CAPs to support HW atomic counters. (v3)
From: Dave AirlieThis looks like an evergreen specific feature, but with atomic counters AMD have hw specific counters they use instead of operating on buffers directly. These are separate to the buffer atomics, so require different limits and code paths. I've left the CAP for atomic type extensible in case someone else has a variant on this sort of thing (freedreno maybe?) and needs to change it. This adds all the CAPs required to add support for those atomic counters, along with a related CAP for limiting the number of output resources. I'd like to land this and the st patch then I can start to upstream the evergreen support for these and other GL4.x features. v2: drop the ATOMIC_COUNTER_MODE cap, just use the return from the HW counters. If 0 we use the current mode. v3: fix some rebase errors (Gert Wollny) Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák Signed-off-by: Dave Airlie --- src/gallium/auxiliary/gallivm/lp_bld_limits.h| 2 ++ src/gallium/auxiliary/tgsi/tgsi_exec.h | 2 ++ src/gallium/docs/source/screen.rst | 5 - src/gallium/drivers/etnaviv/etnaviv_screen.c | 2 ++ src/gallium/drivers/freedreno/freedreno_screen.c | 2 ++ src/gallium/drivers/nouveau/nv30/nv30_screen.c | 2 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 ++ src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 ++ src/gallium/drivers/r300/r300_screen.c | 2 ++ src/gallium/drivers/r600/r600_pipe.c | 2 ++ src/gallium/drivers/radeonsi/si_pipe.c | 2 ++ src/gallium/drivers/svga/svga_screen.c | 4 src/gallium/drivers/vc4/vc4_screen.c | 2 ++ src/gallium/drivers/virgl/virgl_screen.c | 2 ++ src/gallium/include/pipe/p_defines.h | 2 ++ 15 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index ea320bb..c7755bf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -140,6 +140,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 514c69e..ad920dc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -541,6 +541,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: return 0; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: return PIPE_MAX_SHADER_BUFFERS; diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 9f00059..519728f 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -519,7 +519,10 @@ MOV OUT[0], CONST[0][3] # copy vector 3 of constbuf 0 * ``PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS``: Whether the merge registers TGSI pass is skipped. This might reduce code size and register pressure if the underlying driver has a real backend compiler. - +* ``PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS``: If atomic counters are separate, + how many HW counters are available for this stage. (0 uses SSBO atomics). +* ``PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS``: If atomic counters are + separate, how many atomic counter buffers are available for this stage. .. _pipe_compute_cap: diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index b0c4b7b..e3de442 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -459,6 +459,8 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: return 0; } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index e5504b6..bc66dab 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -554,6 +554,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, return 32; case
Re: [Mesa-dev] [PATCH 2/4] st/mesa: use enum types instead of int/unsigned (v3)
On 11/08/2017 09:08 AM, Erik Faye-Lund wrote: > On Wed, Nov 8, 2017 at 1:07 AM, Brian Paulwrote: >> Use the proper enum types for various variables. Makes life in gdb >> a little nicer. Note that the size of enum bitfields must be one >> larger so the high bit is always zero (for MSVC). > > You *could* also do something like this on MSVC to get unsigned enum > values, thus not needing the extra bit: > > ---8<--- > #include > > #ifdef _MSC_VER > #define FORCE_UNSIGNED : unsigned > #else > #define FORCE_UNSIGNED > #endif > > enum Foo FORCE_UNSIGNED { Looking at https://stackoverflow.com/questions/837319/packing-enums-using-the-msvc-compiler, I think some variation of this may solve the attribute((packed)) problem too. See my other e-mail. Basically an enum can be forced to be a type of a particular size using this technique. Maybe a ENUM_8BITS that is ": unsigned char" on MSVC and "__attribute__((__packed__))" on GCC? >FOO_A = 1, >FOO_B = 255 > }; > > struct Bar { >Foo foo : 8; > }; > > int main() > { >Bar foo; >foo.foo = FOO_B; >printf("%d\n", foo.foo); >return 0; > } > ---8<--- > > This outputs 255 on MSVC. > > It's not beautiful, though. > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/4] glsl: s/unsigned/glsl_base_type/ in glsl type code (v2)
Any thoughts about my data using __attribute__((__packed__))? On 11/07/2017 04:07 PM, Brian Paul wrote: > Declare glsl_type::sampled_type as glsl_base_type as we do for the > base_type field. And make base_type a bitfield to save a few bytes. > > Update glsl_type constructor to take glsl_base_type instead of unsigned > and pass GLSL_TYPE_VOID instead of zero. > > No Piglit regressions with llvmpipe. > > v2: > - Declare both base_type and sampled_type as 8-bit fields > - Use the new ASSERT_BITFIELD_SIZE() macro. > --- > src/compiler/glsl_types.cpp | 30 +++--- > src/compiler/glsl_types.h | 28 +--- > 2 files changed, 36 insertions(+), 22 deletions(-) > > diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp > index 704b63c..107a81f 100644 > --- a/src/compiler/glsl_types.cpp > +++ b/src/compiler/glsl_types.cpp > @@ -50,9 +50,9 @@ glsl_type::glsl_type(GLenum gl_type, > glsl_base_type base_type, unsigned vector_elements, > unsigned matrix_columns, const char *name) : > gl_type(gl_type), > - base_type(base_type), > + base_type(base_type), sampled_type(GLSL_TYPE_VOID), > sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), > - sampled_type(0), interface_packing(0), interface_row_major(0), > + interface_packing(0), interface_row_major(0), > vector_elements(vector_elements), matrix_columns(matrix_columns), > length(0) > { > @@ -79,11 +79,11 @@ glsl_type::glsl_type(GLenum gl_type, > > glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, > enum glsl_sampler_dim dim, bool shadow, bool array, > - unsigned type, const char *name) : > + glsl_base_type type, const char *name) : > gl_type(gl_type), > - base_type(base_type), > + base_type(base_type), sampled_type(type), > sampler_dimensionality(dim), sampler_shadow(shadow), > - sampler_array(array), sampled_type(type), interface_packing(0), > + sampler_array(array), interface_packing(0), > interface_row_major(0), length(0) > { > mtx_lock(_type::mem_mutex); > @@ -102,9 +102,9 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type > base_type, > glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, > const char *name) : > gl_type(0), > - base_type(GLSL_TYPE_STRUCT), > + base_type(GLSL_TYPE_STRUCT), sampled_type(GLSL_TYPE_VOID), > sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), > - sampled_type(0), interface_packing(0), interface_row_major(0), > + interface_packing(0), interface_row_major(0), > vector_elements(0), matrix_columns(0), > length(num_fields) > { > @@ -131,9 +131,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, > unsigned num_fields, > enum glsl_interface_packing packing, > bool row_major, const char *name) : > gl_type(0), > - base_type(GLSL_TYPE_INTERFACE), > + base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID), > sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), > - sampled_type(0), interface_packing((unsigned) packing), > + interface_packing((unsigned) packing), > interface_row_major((unsigned) row_major), > vector_elements(0), matrix_columns(0), > length(num_fields) > @@ -159,9 +159,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, > unsigned num_fields, > glsl_type::glsl_type(const glsl_type *return_type, > const glsl_function_param *params, unsigned num_params) > : > gl_type(0), > - base_type(GLSL_TYPE_FUNCTION), > + base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID), > sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), > - sampled_type(0), interface_packing(0), interface_row_major(0), > + interface_packing(0), interface_row_major(0), > vector_elements(0), matrix_columns(0), > length(num_params) > { > @@ -191,9 +191,9 @@ glsl_type::glsl_type(const glsl_type *return_type, > > glsl_type::glsl_type(const char *subroutine_name) : > gl_type(0), > - base_type(GLSL_TYPE_SUBROUTINE), > + base_type(GLSL_TYPE_SUBROUTINE), sampled_type(GLSL_TYPE_VOID), > sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), > - sampled_type(0), interface_packing(0), interface_row_major(0), > + interface_packing(0), interface_row_major(0), > vector_elements(1), matrix_columns(1), > length(0) > { > @@ -442,9 +442,9 @@ _mesa_glsl_release_types(void) > > > glsl_type::glsl_type(const glsl_type *array, unsigned length) : > - base_type(GLSL_TYPE_ARRAY), > + base_type(GLSL_TYPE_ARRAY), sampled_type(GLSL_TYPE_VOID), > sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), > - sampled_type(0), interface_packing(0), interface_row_major(0), > + interface_packing(0), interface_row_major(0), > vector_elements(0),
Re: [Mesa-dev] [PATCH] i965: Enable disk shader cache by default
Reviewed-by: Timothy ArceriMark may want to consider adding some of the once a day type CI runs for this. For example running the test suite for two consecutive runs on the same build so that the second run uses the shader cache and also a second run the uses MESA_GLSL=cache_fb to force testing of the cache fallback path. On 09/11/17 11:58, Jordan Justen wrote: f9d5a7add42af5a2e4410526d1480a08f41317ae along with a16dc04ad51c32e5c7d136e4dd6273d983385d3f appears to have fixed the one known regression with shader cache. (Deus Ex instability.) We should enable the shader cache by default to stabilize it before the next major Mesa release. Signed-off-by: Jordan Justen --- docs/relnotes/17.4.0.html | 2 +- src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html index f81b5bd62d3..48dcd5cce38 100644 --- a/docs/relnotes/17.4.0.html +++ b/docs/relnotes/17.4.0.html @@ -44,7 +44,7 @@ Note: some of the new features are only available with certain drivers. -Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE environment variable is set to "0" or "false" +Disk shader cache support for i965 Bug fixes diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c index 853ea98af03..cd0524c5cbf 100644 --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c @@ -420,9 +420,6 @@ void brw_disk_cache_init(struct brw_context *brw) { #ifdef ENABLE_SHADER_CACHE - if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", true)) - return; - char renderer[10]; MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "i965_%04x", brw->screen->deviceID); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] gallium/r600 hw atomic support (v3)
On 7 November 2017 at 20:45, Gert Wollnywrote: > Am Dienstag, den 07.11.2017, 16:30 +1000 schrieb Dave Airlie: >> This is the 3rd submission of the gallium/r600 hw atomic counter >> support. >> >> This is fixes some rebase artifacts, removes the BUFFER decls from >> the TGSI, and fixes some indirect crashes in the r600 backend, > > Well, I still get some crashes, i.e. piglits > > spec@arb_arrays_of_arrays@execution@atomic_counters@vs-indirect-index > spec@arb_arrays_of_arrays@execution@atomic_counters@fs-indirect-index > > abort with stack smashing reported. Backtrace for vs-indirect-index is oops off-by one, fixed locally. Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] ac/nir: add support for all intrinsics. (v2)
Reviewed-by: Bas NieuwenhuizenOn Thu, Nov 9, 2017 at 2:12 AM, Dave Airlie wrote: > From: Dave Airlie > > This is derived from tgsi/radeonsi code from the GLSL intrinsics. > > This should pre-fix radv for the upcoming spirv patches. > > v2: actually use wait_cnt, sleep deprived dad time! (Bas) > > Signed-off-by: Dave Airlie > --- > src/amd/common/ac_nir_to_llvm.c | 32 +++- > 1 file changed, 31 insertions(+), 1 deletion(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 2ae656693fc..f922b32bf7b 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -3654,6 +3654,31 @@ static void emit_waitcnt(struct nir_to_llvm_context > *ctx, >ctx->ac.voidt, args, 1, 0); > } > > +static void emit_membar(struct nir_to_llvm_context *ctx, > + const nir_intrinsic_instr *instr) > +{ > + unsigned waitcnt = NOOP_WAITCNT; > + > + switch (instr->intrinsic) { > + case nir_intrinsic_memory_barrier: > + case nir_intrinsic_group_memory_barrier: > + waitcnt &= VM_CNT & LGKM_CNT; > + break; > + case nir_intrinsic_memory_barrier_atomic_counter: > + case nir_intrinsic_memory_barrier_buffer: > + case nir_intrinsic_memory_barrier_image: > + waitcnt &= VM_CNT; > + break; > + case nir_intrinsic_memory_barrier_shared: > + waitcnt &= LGKM_CNT; > + break; > + default: > + break; > + } > + if (waitcnt != NOOP_WAITCNT) > + emit_waitcnt(ctx, waitcnt); > +} > + > static void emit_barrier(struct nir_to_llvm_context *ctx) > { > /* SI only (thanks to a hw bug workaround): > @@ -4144,7 +4169,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx, > emit_discard_if(ctx, instr); > break; > case nir_intrinsic_memory_barrier: > - emit_waitcnt(ctx->nctx, VM_CNT); > + case nir_intrinsic_group_memory_barrier: > + case nir_intrinsic_memory_barrier_atomic_counter: > + case nir_intrinsic_memory_barrier_buffer: > + case nir_intrinsic_memory_barrier_image: > + case nir_intrinsic_memory_barrier_shared: > + emit_membar(ctx->nctx, instr); > break; > case nir_intrinsic_barrier: > emit_barrier(ctx->nctx); > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] ac/nir: add support for all intrinsics. (v2)
From: Dave AirlieThis is derived from tgsi/radeonsi code from the GLSL intrinsics. This should pre-fix radv for the upcoming spirv patches. v2: actually use wait_cnt, sleep deprived dad time! (Bas) Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 32 +++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 2ae656693fc..f922b32bf7b 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3654,6 +3654,31 @@ static void emit_waitcnt(struct nir_to_llvm_context *ctx, ctx->ac.voidt, args, 1, 0); } +static void emit_membar(struct nir_to_llvm_context *ctx, + const nir_intrinsic_instr *instr) +{ + unsigned waitcnt = NOOP_WAITCNT; + + switch (instr->intrinsic) { + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + waitcnt &= VM_CNT & LGKM_CNT; + break; + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + waitcnt &= VM_CNT; + break; + case nir_intrinsic_memory_barrier_shared: + waitcnt &= LGKM_CNT; + break; + default: + break; + } + if (waitcnt != NOOP_WAITCNT) + emit_waitcnt(ctx, waitcnt); +} + static void emit_barrier(struct nir_to_llvm_context *ctx) { /* SI only (thanks to a hw bug workaround): @@ -4144,7 +4169,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx, emit_discard_if(ctx, instr); break; case nir_intrinsic_memory_barrier: - emit_waitcnt(ctx->nctx, VM_CNT); + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: + emit_membar(ctx->nctx, instr); break; case nir_intrinsic_barrier: emit_barrier(ctx->nctx); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] ac/nir: add support for all intrinsics.
On Thu, Nov 9, 2017 at 2:04 AM, Dave Airliewrote: > From: Dave Airlie > > This is derived from tgsi/radeonsi code from the GLSL intrinsics. > > This should pre-fix radv for the upcoming spirv patches. > > Signed-off-by: Dave Airlie > --- > src/amd/common/ac_nir_to_llvm.c | 32 +++- > 1 file changed, 31 insertions(+), 1 deletion(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 2ae656693fc..405581f61da 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -3654,6 +3654,31 @@ static void emit_waitcnt(struct nir_to_llvm_context > *ctx, >ctx->ac.voidt, args, 1, 0); > } > > +static void emit_membar(struct nir_to_llvm_context *ctx, > + const nir_intrinsic_instr *instr) > +{ > + unsigned waitcnt = NOOP_WAITCNT; > + > + switch (instr->intrinsic) { > + case nir_intrinsic_memory_barrier: > + case nir_intrinsic_group_memory_barrier: > + waitcnt &= VM_CNT & LGKM_CNT; > + break; > + case nir_intrinsic_memory_barrier_atomic_counter: > + case nir_intrinsic_memory_barrier_buffer: > + case nir_intrinsic_memory_barrier_image: > + waitcnt &= VM_CNT; > + break; > + case nir_intrinsic_memory_barrier_shared: > + waitcnt &= LGKM_CNT; > + break; > + default: > + break; > + } > + if (waitcnt != NOOP_WAITCNT) > + emit_waitcnt(ctx, VM_CNT); Why VM_CNT instead of waitcnt? > +} > + > static void emit_barrier(struct nir_to_llvm_context *ctx) > { > /* SI only (thanks to a hw bug workaround): > @@ -4144,7 +4169,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx, > emit_discard_if(ctx, instr); > break; > case nir_intrinsic_memory_barrier: > - emit_waitcnt(ctx->nctx, VM_CNT); > + case nir_intrinsic_group_memory_barrier: > + case nir_intrinsic_memory_barrier_atomic_counter: > + case nir_intrinsic_memory_barrier_buffer: > + case nir_intrinsic_memory_barrier_image: > + case nir_intrinsic_memory_barrier_shared: > + emit_membar(ctx->nctx, instr); > break; > case nir_intrinsic_barrier: > emit_barrier(ctx->nctx); > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] ac/nir: add support for all intrinsics.
From: Dave AirlieThis is derived from tgsi/radeonsi code from the GLSL intrinsics. This should pre-fix radv for the upcoming spirv patches. Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 32 +++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 2ae656693fc..405581f61da 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3654,6 +3654,31 @@ static void emit_waitcnt(struct nir_to_llvm_context *ctx, ctx->ac.voidt, args, 1, 0); } +static void emit_membar(struct nir_to_llvm_context *ctx, + const nir_intrinsic_instr *instr) +{ + unsigned waitcnt = NOOP_WAITCNT; + + switch (instr->intrinsic) { + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + waitcnt &= VM_CNT & LGKM_CNT; + break; + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + waitcnt &= VM_CNT; + break; + case nir_intrinsic_memory_barrier_shared: + waitcnt &= LGKM_CNT; + break; + default: + break; + } + if (waitcnt != NOOP_WAITCNT) + emit_waitcnt(ctx, VM_CNT); +} + static void emit_barrier(struct nir_to_llvm_context *ctx) { /* SI only (thanks to a hw bug workaround): @@ -4144,7 +4169,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx, emit_discard_if(ctx, instr); break; case nir_intrinsic_memory_barrier: - emit_waitcnt(ctx->nctx, VM_CNT); + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: + emit_membar(ctx->nctx, instr); break; case nir_intrinsic_barrier: emit_barrier(ctx->nctx); -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Enable disk shader cache by default
f9d5a7add42af5a2e4410526d1480a08f41317ae along with a16dc04ad51c32e5c7d136e4dd6273d983385d3f appears to have fixed the one known regression with shader cache. (Deus Ex instability.) We should enable the shader cache by default to stabilize it before the next major Mesa release. Signed-off-by: Jordan Justen--- docs/relnotes/17.4.0.html | 2 +- src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html index f81b5bd62d3..48dcd5cce38 100644 --- a/docs/relnotes/17.4.0.html +++ b/docs/relnotes/17.4.0.html @@ -44,7 +44,7 @@ Note: some of the new features are only available with certain drivers. -Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE environment variable is set to "0" or "false" +Disk shader cache support for i965 Bug fixes diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c index 853ea98af03..cd0524c5cbf 100644 --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c @@ -420,9 +420,6 @@ void brw_disk_cache_init(struct brw_context *brw) { #ifdef ENABLE_SHADER_CACHE - if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", true)) - return; - char renderer[10]; MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "i965_%04x", brw->screen->deviceID); -- 2.15.0.rc2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] swr: Fixed an uncommon freed-memory access during state validation
State validation is performed during clear and draw calls. Validation during clear was still accessing vertex buffer state. When the currently set vertex buffers are client arrays, this could lead to accessing freed memory. Such is the case with the VMD application. Previously, vertex buffer validation depended on a dirty bit or the draw info indicating an indexed draw. This required special handling for clears. But, vertex buffer validation still occurred which was unnecessary and wrong. Now, only minimal validation is performed during clear, deferring the remainder to the next draw. And, by setting the dirty bit in swr_draw_vbo for indexed draws, vertex buffer validation is only dependent upon a single dirty bit. This fixes a bug exposed by the VMD application when changing models. --- src/gallium/drivers/swr/swr_draw.cpp | 7 ++- src/gallium/drivers/swr/swr_state.cpp | 35 +++ 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp index 57660c7464..a94cdd6da0 100644 --- a/src/gallium/drivers/swr/swr_draw.cpp +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -52,7 +52,12 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) return; } - /* Update derived state, pass draw info to update function */ + /* If indexed draw, force vertex validation since index buffer comes +* from draw info. */ + if (info->index_size) + ctx->dirty |= SWR_NEW_VERTEX; + + /* Update derived state, pass draw info to update function. */ swr_update_derived(pipe, info); swr_update_draw_context(ctx); diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index c6da4fcb8e..4530d377ee 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -1204,11 +1204,6 @@ swr_update_derived(struct pipe_context *pipe, ctx->api.pfnSwrSetRastState(ctx->swrContext, rastState); } - /* Scissor */ - if (ctx->dirty & SWR_NEW_SCISSOR) { - ctx->api.pfnSwrSetScissorRects(ctx->swrContext, 1, >swr_scissor); - } - /* Viewport */ if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER | SWR_NEW_RASTERIZER)) { @@ -1249,18 +1244,26 @@ swr_update_derived(struct pipe_context *pipe, ctx->api.pfnSwrSetViewports(ctx->swrContext, 1, vp, vpm); } - /* Set vertex & index buffers -* (using draw info if called by swr_draw_vbo) -* If indexed draw, revalidate since index buffer comes from -* pipe_draw_info. -*/ - if (ctx->dirty & SWR_NEW_VERTEX || - (p_draw_info && p_draw_info->index_size)) { + /* When called from swr_clear (p_draw_info = null), render targets, +* rasterState and viewports (dependent on render targets) are the only +* necessary validation. Defer remaining validation by setting +* post_update_dirty_flags and clear all dirty flags. BackendState is +* still unconditionally validated below */ + if (!p_draw_info) { + post_update_dirty_flags = ctx->dirty & ~(SWR_NEW_FRAMEBUFFER | + SWR_NEW_RASTERIZER | + SWR_NEW_VIEWPORT); + ctx->dirty = 0; + } + + /* Scissor */ + if (ctx->dirty & SWR_NEW_SCISSOR) { + ctx->api.pfnSwrSetScissorRects(ctx->swrContext, 1, >swr_scissor); + } - /* If being called by swr_draw_vbo, copy draw details */ - struct pipe_draw_info info = {0}; - if (p_draw_info) - info = *p_draw_info; + /* Set vertex & index buffers */ + if (ctx->dirty & SWR_NEW_VERTEX) { + const struct pipe_draw_info = *p_draw_info; /* vertex buffers */ SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS]; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 102891] [radv] glitches on rpcs3 emulator (green zones)
https://bugs.freedesktop.org/show_bug.cgi?id=102891 --- Comment #6 from Dave Airlie--- Did someone already try RADV_DEBUG=zerovram to see if it helps? The trace replays badly on amdgpu-pro which suggests the bad stuff is in ram before recording. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: implement (un)mapImage
Ah yes you are right, my mistake. I will update the patch after some more testing. Thx. On 8 November 2017 at 17:21, Chris Wilsonwrote: > Quoting Julien Isorce (2017-11-08 16:55:05) > > v2: add early return if (flag & MAP_INTERNAL_MASK) > > > > Already implemented for Gallium drivers. > > > > Useful for gbm_bo_(un)map. > > > > Tested by porting wayland/weston/clients/simple-dmabuf-drm.c to GBM. > > > > Signed-off-by: Julien Isorce > > --- > > src/mesa/drivers/dri/i965/intel_screen.c | 51 > ++-- > > 1 file changed, 49 insertions(+), 2 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c > b/src/mesa/drivers/dri/i965/intel_screen.c > > index cdc36ad..b7b0e25 100644 > > --- a/src/mesa/drivers/dri/i965/intel_screen.c > > +++ b/src/mesa/drivers/dri/i965/intel_screen.c > > @@ -755,6 +755,53 @@ intel_create_image(__DRIscreen *dri_screen, > > loaderPrivate); > > } > > > > +static void * > > +intel_map_image(__DRIcontext *context, __DRIimage *image, > > +int x0, int y0, int width, int height, > > +unsigned int flags, int *stride, void **map_info) > > +{ > > + struct brw_context *brw = NULL; > > + struct brw_bo *bo = NULL; > > + void *raw_data = NULL; > > + > > + if (!context || !image || !stride || !map_info || *map_info) > > + return NULL; > > + > > + if (flags & MAP_INTERNAL_MASK) > > + return NULL; > > + > > + brw = context->driverPrivate; > > + bo = image->bo; > > + > > + assert(brw); > > + assert(bo); > > + > > + /* DRI flags and GL_MAP.*_BIT flags are the same, so just pass them > on. */ > > + raw_data = brw_bo_map(brw, bo, flags); > > + > > + if (raw_data) { > > +*map_info = raw_data; > > +*stride = image->pitch; > > + } > > + > > + return raw_data; > > Did you not say the returned address is to pixel0 of the (x,y)x(w,h) rect > within the image? So raw_data + y0*image->pitch + x0*image->cpp? > Or something more like raw_data + > y0/util_format_get_blockheight(image->format) > * image->pitch + > x0/util_format_get_blockwidth(image->format) * util_format_get_blocksize( > image->format); > -Chris > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv/meson: Generate dev_icd.json
On Wed, Nov 8, 2017 at 3:42 PM, Chad Versacewrote: > On Wed 08 Nov 2017, Jason Ekstrand wrote: > > On Wed, Nov 8, 2017 at 1:40 PM, Chad Versace <[1] > chadvers...@chromium.org> > > wrote: > > > > On Tue 07 Nov 2017, Dylan Baker wrote: > > > Quoting Eric Engestrom (2017-11-07 07:25:53) > > > > On Wednesday, 2017-11-01 13:49:03 -0700, Chad Versace wrote: > > > > > I tested this in a setup where the builddir was outside of the > > srcdir. > > > > > --- > > > > > src/intel/vulkan/meson.build | 12 > > > > > 1 file changed, 12 insertions(+) > > > > > > > > > > diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/ > > meson.build > > > > > index ff24e304ef5..e8b7f407507 100644 > > > > > --- a/src/intel/vulkan/meson.build > > > > > +++ b/src/intel/vulkan/meson.build > > > > > @@ -48,6 +48,18 @@ intel_icd = custom_target( > > > > >install : true, > > > > > ) > > > > > > > > > > +dev_icd = custom_target( > > > > > + 'dev_icd', > > > > > + input : 'anv_icd.py', > > > > > + output : 'dev_icd.@0@.json'.format(target_machine.cpu()), > > > > > > > > Strictly speaking, shouldn't that be `host_machine` [1] ? > > > > I don't see how one would do a canadian build of mesa though, so > > > > host == target should always be true. > > > > > > That's my fault. There are (or were) a number of cases where I used > > target > > > instead of host, that can also be a follow up. > > > > > > In any case: > > > Acked-by: Dylan Baker <[2]dy...@pnwbakers.com> > > > > I build Mesa (with autotools) where host == x86_64 but target == > armv7a. > > > > > > You're using dev_icd with a cross-compile? Yikes! I mean, it can work, > but > > that's not what I would have expected. Also, why are you building anv > and > > targetting armv7a Does that even work? > > Two many interacting topics! > > Eric said: "I don't see how one would do a canadian build of mesa > though, so host == target should always be true". Mesa, not anvil. > I replied that I cross-compile Mesa for ARM. > Ok, that makes more sense. > On host-vs-target: My host machine and target machine are rarely the > same. But, for Anvil, of course, the two machines do always have the > same architecture. > > Regardless, the icd filename should always contain the target > architecture, because run the icd *on the target*. > Fair enough. Pedantry in these areas is perfectly reasonable. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv/meson: Generate dev_icd.json
On Wed 08 Nov 2017, Jason Ekstrand wrote: > On Wed, Nov 8, 2017 at 1:40 PM, Chad Versace <[1]chadvers...@chromium.org> > wrote: > > On Tue 07 Nov 2017, Dylan Baker wrote: > > Quoting Eric Engestrom (2017-11-07 07:25:53) > > > On Wednesday, 2017-11-01 13:49:03 -0700, Chad Versace wrote: > > > > I tested this in a setup where the builddir was outside of the > srcdir. > > > > --- > > > > src/intel/vulkan/meson.build | 12 > > > > 1 file changed, 12 insertions(+) > > > > > > > > diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/ > meson.build > > > > index ff24e304ef5..e8b7f407507 100644 > > > > --- a/src/intel/vulkan/meson.build > > > > +++ b/src/intel/vulkan/meson.build > > > > @@ -48,6 +48,18 @@ intel_icd = custom_target( > > > > install : true, > > > > ) > > > > > > > > +dev_icd = custom_target( > > > > + 'dev_icd', > > > > + input : 'anv_icd.py', > > > > + output : 'dev_icd.@0@.json'.format(target_machine.cpu()), > > > > > > Strictly speaking, shouldn't that be `host_machine` [1] ? > > > I don't see how one would do a canadian build of mesa though, so > > > host == target should always be true. > > > > That's my fault. There are (or were) a number of cases where I used > target > > instead of host, that can also be a follow up. > > > > In any case: > > Acked-by: Dylan Baker <[2]dy...@pnwbakers.com> > > I build Mesa (with autotools) where host == x86_64 but target == armv7a. > > > You're using dev_icd with a cross-compile? Yikes! I mean, it can work, but > that's not what I would have expected. Also, why are you building anv and > targetting armv7a Does that even work? Two many interacting topics! Eric said: "I don't see how one would do a canadian build of mesa though, so host == target should always be true". Mesa, not anvil. I replied that I cross-compile Mesa for ARM. On host-vs-target: My host machine and target machine are rarely the same. But, for Anvil, of course, the two machines do always have the same architecture. Regardless, the icd filename should always contain the target architecture, because run the icd *on the target*. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Mesa 17.2.5 release candidate
Hello list, The candidate for the Mesa 17.2.5 is now available. Currently we have: - 30 queued - 16 nominated (outstanding) - and 3 rejected patches In the current queue we have: In Mesa Core a GL error related to the ARB_ES3_1_compatibility spec noticed with the GFXBench 5 Aztec Ruins has been corrected. The GLSL compiler is not giving a linker error for mismatching uniform precision with GLSL ES 1.00 any more. This enables, specially, several Android applications which violate this rule, like Forge of Empires, for example. The SPIR-V compiler has corrected an assert triggered when support for the simple memory model was claimed. NIR has also received a correction related with gl_SubGroupG{e,t}MaskARB. The Intel drivers, specially i965, have received several fixes, including a plug for a memory leak and another one in the compiler to avoid GPU hangs on Broxton. The gallium i915g driver for Intel has also seen an important correction. AMD drivers, mostly radv, have also received several fixes, including a correction for a DCC corruption that was visible with Rust, breaking an endless loop in r600, avoiding a GPU hang with vulkan dota2 in VR mode and a plug for a memory leak. Broadcom's vc4 has gotten a fix to compile out some non-debug code that shouldn't be there for the release build. Take a look at section "Mesa stable queue" for more information. Testing reports/general approval Any testing reports (or general approval of the state of the branch) will be greatly appreciated. The plan is to have 17.2.5 next Friday (10th of November), around or shortly after 23:00 GMT. If you have any questions or suggestions - be that about the current patch queue or otherwise, please go ahead. Trivial merge conflicts --- commit e7c9892810b65bda232a00f798aa4a258fc8 Author: Jason Ekstrandi965/miptree: Take an isl_format in render_aux_usage (cherry picked from commit 39c5c12f8fbee9eec26a627f247d1f3ef7d4bf39) commit bd2037da82ec8abc1db8834f67496b4b3cd74504 Author: Bas Nieuwenhuizen radv: Don't expose heaps with 0 memory. (cherry picked from commit 806721429afa090380bf39a4958fe4e21c63816c) commit 23eaeeb88ad787b0e3253807fb6f7180a0cb0711 Author: Dave Airlie radv: free attachments on end command buffer. (cherry picked from commit f0ae06a13c1a60f58de77401f705eaf620b5b822) commit 9ba45e7d33bb91d7b0fc65ecae9cbc8a2ba68593 Author: Bas Nieuwenhuizen radv: Don't use vgpr indexing for outputs on GFX9. (cherry picked from commit 6ce550453f1df64caeb956f215d32da96b89f2b1) commit 6a73458510c124856ade7e5a7e805fb08ae13671 Author: Bas Nieuwenhuizen radv: Disallow indirect outputs for GS on GFX9 as well. (cherry picked from commit c07d719e8b683e1bf78f187dd17fe4716f4e5e9c) Cheers, Andres Mesa stable queue - Nominated (16) = Emil Velikov (1): targets/opencl: don't hardcode the icd file install to /etc/... Jason Ekstrand (15): intel/fs: Use a pure vertical stride for large register strides intel/fs: Pass builders instead of blocks into emit_[un]zip intel/fs: Be more explicit about our placement of [un]zip intel/fs: Use ANY/ALL32 predicates in SIMD32 intel/fs: Don't stomp f0.1 in SIMD16 ballot intel/fs: Use an explicit D type for vote any/all/eq intrinsics intel/fs: Use a pair of 1-wide MOVs instead of SEL for any/all intel/eu/reg: Add a subscript() helper intel/fs: Fix MOV_INDIRECT for 64-bit values on little-core intel/fs: Fix integer multiplication lowering for src/dst hazards intel/fs: Use the original destination region for int MUL lowering intel/fs: Mark 64-bit values as being contiguous intel/fs: Rework zero-length URB write handling intel/nir: Add a helper for getting the NoIndirect mask inte l/nir: Break the linking code into a helper in brw_nir.c Queued (30) === Andres Gomez (7): docs: add sha256 checksums for 17.2.4 cherr y-ignore: radv: copy indirect lowering settings from radeonsi cher ry-ignore: i965: fix blorp stage_prog_data->param leak cherry- ignore: etnaviv: don't do resolve-in-place without valid TS cherry -ignore: intel/fs: Alloc pull constants off mem_ctx cherry-ignore: added 17.3 nominations. cherry-ignore: automake: include git_sha1.h.in in release tarball Bas Nieuwenhuizen (3): radv: Don't expose heaps with 0 memory. radv: Don't use vgpr indexing for outputs on GFX9. radv: Disallow indirect outputs for GS on GFX9 as well. Dave Airlie (3): i915g: make gears run again. radv: free attachments on end command buffer. radv: add initial copy descriptor support. (v2) Eric Engestrom (1): vc4: fix release build Gert Wollny
[Mesa-dev] [PATCH 4/4] i965: Use prepare_external instead of make_shareable in setTexBuffer2
The setTexBuffer2 hook from GLX is used to implement glxBindTexImageEXT which has tighter restrictions than just "it's shared". In particular, it says that any rendering to the image while it is bound causes the contents to become undefined. The GLX_EXT_texture_from_pixmap extension provides us with an acquire and release in the form of glXBindTexImageEXT and glXReleaseTexImageEXT. The extension spec says, "Rendering to the drawable while it is bound to a texture will leave the contents of the texture in an undefined state. However, no synchronization between rendering and texturing is done by GLX. It is the application's responsibility to implement any synchronization required." From the EGL 1.4 spec for eglBindTexImage: "After eglBindTexImage is called, the specified surface is no longer available for reading or writing. Any read operation, such as glReadPixels or eglCopyBuffers, which reads values from any of the surface’s color buffers or ancillary buffers will produce indeterminate results. In addition, draw operations that are done to the surface before its color buffer is released from the texture produce indeterminate results In other words, between the bind and release calls, we effectively own those pixels and can assume, so long as we don't crash, that no one else is reading from/writing to the surface. The GLX and EGL implementations call the setTexBuffer2 and releaseTexBuffer function pointers that the driver can hook. In theory, this means that, between BindTexImage and ReleaseTexImage, we own the pixels and it should be safe to track aux usage so we can avoid redundant resolves so long as we start off with the right assumption at the start of the bind/release pair. In practice, however, X11 has slightly different expectations. It's expected that the server may be drawing to the image at the same time as the compositor is texturing from it. In that case, the worst expected outcome should be tearing or partial rendering and not random corruption like we see when rendering races with scanout with CCS. Fortunately, the GEM rules about texture/render dependencies save us here. If X11 submits work to write to a pixmap after the compositor has submitted work to texture from it, GEM inserts a dependency between the compositor and X11. If X11 is using a high-priority context, this will cause the compositor to get a temporarily boosted priority while the batch from X11 is waiting on it. This means that we will never have an actual race between X11 and the compositor so no corruption can happen. Unfortunately, however, this means that X11 will likely be rendering to it between the compositor's BindTexImage and ReleaseTexImage calls. If we want to avoid strange issues, we need to be a bit careful about resolves because we can't really transition it away from the "default" aux usage. The only case where this would practically be a problem is with image_load_store where we have to do a full resolve in order to use the image via the data port. Even there it would only be a problem if batches were split such that X11's rendering happens between the resolve and the use of it as a storage image. However, the chances of this happening are very slim so we just emit a warning and hope for the best. This commit adds a new helper intel_miptree_finish_external which resets all aux state to whatever ISL says is the right worst-case "default" for the given modifier. It feels a little awkward to call it "finish" because it's actually an acquire from the perspective of the driver, but it matches the semantics of the other prepare/finish functions. This new helper gets called in intelSetTexBuffer2 instead of make_shareable. We also add an intelReleaseTexBuffer (we passed NULL to releaseTexBuffer before) and call intel_miptree_prepare_external in it. This probably does nothing most of the time but it means that the prepare/finish calls are properly matched. Cc: "17.3"Cc: Chad Versace Cc: Daniel Stone Cc: Louis-Francis Ratté-Boulianne Cc: Adam Jackson Cc: Chris Wilson Cc: Keith Packard Cc: Eric Anholt --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 18 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 3 ++ src/mesa/drivers/dri/i965/intel_screen.c | 2 +- src/mesa/drivers/dri/i965/intel_tex.h | 2 + src/mesa/drivers/dri/i965/intel_tex_image.c | 61 ++- 5 files changed, 84 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 47cfccc..a95b67c 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2791,6 +2791,24 @@ intel_miptree_prepare_external(struct
[Mesa-dev] [PATCH 2/4] i965/tex_image: Reference the renderbuffer miptree in setTexBuffer2
The old code made a new miptree that referenced the same BO as the renderbuffer and just trusted in the memory aliasing to work. There are only two ways in which the new miptree is liable to differ from the one in the renderbuffer and neither of them matter: 1) It may have a different target. The only targets that we can ever see in intelSetTexBuffer2 are GL_TEXTURE_2D and GL_TEXTURE_RECTANGLE and the difference between the two doesn't matter as far as the miptree is concerned; genX(update_sampler_state) only looks at the gl_texture_object and not the miptree when determining whether or not to use normalized coordinates. 2) It may have a very slightly different format. Again, this doesn't matter because we've supported texture views for quite some time so we always look at the gl_texture_object format instead of the miptree format for hardware setup anyway. On the other hand, because we were recreating the miptree, we were using intel_miptree_create_for_bo which doesn't understand modifiers. We really want this function to work without doing a resolve so long as you have modifiers so we need to fix that. Cc: "17.3"--- src/mesa/drivers/dri/i965/intel_tex_image.c | 21 +++-- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 37c8e24..c52992a 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -405,6 +405,7 @@ static void intel_set_texture_image_mt(struct brw_context *brw, struct gl_texture_image *image, GLenum internal_format, + mesa_format format, struct intel_mipmap_tree *mt) { @@ -415,7 +416,7 @@ intel_set_texture_image_mt(struct brw_context *brw, _mesa_init_teximage_fields(>ctx, image, mt->surf.logical_level0_px.width, mt->surf.logical_level0_px.height, 1, - 0, internal_format, mt->format); + 0, internal_format, format); brw->ctx.Driver.FreeTextureImageBuffer(>ctx, image); @@ -442,7 +443,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, struct gl_texture_object *texObj; struct gl_texture_image *texImage; mesa_format texFormat = MESA_FORMAT_NONE; - struct intel_mipmap_tree *mt; GLenum internal_format = 0; texObj = _mesa_get_current_tex_object(ctx, target); @@ -464,31 +464,24 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, if (rb->mt->cpp == 4) { if (texture_format == __DRI_TEXTURE_FORMAT_RGB) { internal_format = GL_RGB; - texFormat = MESA_FORMAT_B8G8R8X8_UNORM; + texFormat = MESA_FORMAT_B8G8R8A8_UNORM; } else { internal_format = GL_RGBA; texFormat = MESA_FORMAT_B8G8R8A8_UNORM; } } else if (rb->mt->cpp == 2) { + /* This is 565 */ internal_format = GL_RGB; texFormat = MESA_FORMAT_B5G6R5_UNORM; } intel_miptree_make_shareable(brw, rb->mt); - mt = intel_miptree_create_for_bo(brw, rb->mt->bo, texFormat, 0, -rb->Base.Base.Width, -rb->Base.Base.Height, -1, rb->mt->surf.row_pitch, -MIPTREE_CREATE_DEFAULT); - if (mt == NULL) - return; - mt->target = target; _mesa_lock_texture(>ctx, texObj); texImage = _mesa_get_tex_image(ctx, texObj, target, 0); - intel_set_texture_image_mt(brw, texImage, internal_format, mt); - intel_miptree_release(); + intel_set_texture_image_mt(brw, texImage, internal_format, + texFormat, rb->mt); _mesa_unlock_texture(>ctx, texObj); } @@ -581,7 +574,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, const GLenum internal_format = image->internal_format != 0 ? image->internal_format : _mesa_get_format_base_format(mt->format); - intel_set_texture_image_mt(brw, texImage, internal_format, mt); + intel_set_texture_image_mt(brw, texImage, internal_format, mt->format, mt); intel_miptree_release(); } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] i965/tex_image: Pull the tex format from the renderbuffer in intelSetTexBuffer2
Cc: "17.3"--- src/mesa/drivers/dri/i965/intel_tex_image.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index c52992a..28800f6 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -462,20 +462,23 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, return; if (rb->mt->cpp == 4) { - if (texture_format == __DRI_TEXTURE_FORMAT_RGB) { + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) internal_format = GL_RGB; - texFormat = MESA_FORMAT_B8G8R8A8_UNORM; - } - else { + else internal_format = GL_RGBA; - texFormat = MESA_FORMAT_B8G8R8A8_UNORM; - } } else if (rb->mt->cpp == 2) { /* This is 565 */ internal_format = GL_RGB; - texFormat = MESA_FORMAT_B5G6R5_UNORM; } + /* The interactions between GLX_EXT_texture_from_pixmap and sRGB are not +* defined at all. However, since X has classically assumed that your data +* is just bits and sRGB rendering was added on, the assumption is that the +* the result of glXBindTexImageEXT will be a texture with a linear format +* even if it was rendered with sRGB encoding enabled. +*/ + texFormat = _mesa_get_srgb_format_linear(intel_rb_format(rb)); + intel_miptree_make_shareable(brw, rb->mt); _mesa_lock_texture(>ctx, texObj); -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] i965/miptree: Loosen the format check in miptree_match_image
This function is used to determine when we need to re-allocate a miptree. Since we do nothing different in miptree allocation for sRGB vs. linear, loosening this should be safe and may lead to less copying and reallocating in some odd cases. Cc: "17.3"Cc: Kenneth Graunke --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 6 -- src/mesa/drivers/dri/i965/intel_tex.c | 2 +- src/mesa/drivers/dri/i965/intel_tex_obj.h | 4 ++-- src/mesa/drivers/dri/i965/intel_tex_validate.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 82f5a81..47cfccc 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1298,7 +1298,8 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt, if (mt->etc_format != MESA_FORMAT_NONE) mt_format = mt->etc_format; - if (image->TexFormat != mt_format) + if (_mesa_get_srgb_format_linear(image->TexFormat) != + _mesa_get_srgb_format_linear(mt_format)) return false; intel_get_image_dims(image, , , ); @@ -1537,7 +1538,8 @@ intel_miptree_copy_slice(struct brw_context *brw, assert(src_layer < get_num_phys_layers(_mt->surf, src_level - src_mt->first_level)); - assert(src_mt->format == dst_mt->format); + assert(_mesa_get_srgb_format_linear(src_mt->format) == + _mesa_get_srgb_format_linear(dst_mt->format)); if (dst_mt->compressed) { unsigned int i, j; diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index 65a1cb3..0650b6e 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -176,7 +176,7 @@ intel_alloc_texture_storage(struct gl_context *ctx, intel_texobj->needs_validate = false; intel_texobj->validated_first_level = 0; intel_texobj->validated_last_level = levels - 1; - intel_texobj->_Format = intel_texobj->mt->format; + intel_texobj->_Format = first_image->TexFormat; return true; } diff --git a/src/mesa/drivers/dri/i965/intel_tex_obj.h b/src/mesa/drivers/dri/i965/intel_tex_obj.h index 27c18b7..526f5ce 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_obj.h +++ b/src/mesa/drivers/dri/i965/intel_tex_obj.h @@ -57,8 +57,8 @@ struct intel_texture_object bool needs_validate; /* Mesa format for the validated texture object. For non-views this -* will always be the same as mt->format. For views, it may differ -* since the mt is shared across views with differing formats. +* will always be the same as texObj->Image[0][0].TexFormat. For views, it +* may differ since the mt is shared across views with differing formats. */ mesa_format _Format; diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index 2b7798c..ef7f907 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -174,7 +174,7 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit) intelObj->validated_first_level = validate_first_level; intelObj->validated_last_level = validate_last_level; - intelObj->_Format = intelObj->mt->format; + intelObj->_Format = firstImage->base.Base.TexFormat, intelObj->needs_validate = false; } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] meson: Enable VC4's NEON assembly support.
Dylan Bakerwrites: > [ Unknown signature status ] > Quoting Eric Anholt (2017-11-08 14:14:57) >> --- >> meson.build | 5 +++-- >> src/gallium/drivers/vc4/meson.build | 13 + >> 2 files changed, 16 insertions(+), 2 deletions(-) >> >> diff --git a/meson.build b/meson.build >> index 0118c9a7c5ef..189c9be5b59c 100644 >> --- a/meson.build >> +++ b/meson.build >> @@ -485,8 +485,9 @@ endif >> >> # FIXME: enable asm when cross compiler >> # This is doable (autotools does it), but it's not of immediate concern >> -if meson.is_cross_build() >> - message('Cross compiling, disabling asm') >> +if meson.is_cross_build() and (host_machine.cpu_family() == 'x86' or >> + host_machine.cpu_family() == 'x86_64') > > How about: > if meson.is_cross_build() and host_machine.cpu_family().startswith('x86') > > Other than that, for the series: > Reviewed-by: Dylan Baker I like it. Thanks! signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/4] meson: vc4 ARMHF cross-build support
On 09/11/17 09:14, Eric Anholt wrote: Timothy Arceri noted that vc4 didn't seem to have the NEON stuff hooked up, so I worked on getting vc4 cross builds working for me finally. I haven't tested the result on HW quite yet. I can now build vc4 with asm enable with this series so: Tested-by: Timothy ArceriEric Anholt (4): meson: Leave dep_llvm empty if !with_llvm meson: Drop stale comment about making valgrind conditional. meson: Always link libgallium_dri.so against dep_thread. meson: Enable VC4's NEON assembly support. meson.build | 13 +++-- src/gallium/drivers/vc4/meson.build | 13 + src/gallium/targets/dri/meson.build | 1 + 3 files changed, 21 insertions(+), 6 deletions(-) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)
https://bugs.freedesktop.org/show_bug.cgi?id=103586 --- Comment #9 from Dave Gilbert--- (In reply to Jan Vesely from comment #8) > (In reply to Dave Gilbert from comment #6) > > (In reply to Jan Vesely from comment #5) > > > (In reply to Dave Gilbert from comment #4) > > > > Created attachment 135313 [details] > > > > foo.link-0.ll > > > > > > > > That's all 3 of the debug files it produced. > > > > (I wasn't sure which were the llvm and which the isa dumps; I guess the > > > > asm > > > > is the isa? and the ll's are both llvm dumps?) > > > > > > yes. the first .ll is from compilation step, the other one is from linking > > > step. > > > > > > .ll dump looks correct. > > > .asm also looks correct. > > > > > > you can try producing multiple asm dumps for working and non-working runs. > > > But I don't think that the llvm is the culprit here. > > > > > > Can you try waiting for the kernel execution to complete explicitly before > > > mapping the buffer? > > > Ideally call clFinish() on line 63. > > > > Since I'm on the C++ binding (probably a mistake) I used: > > queue.finish(); > > > > and it seems to be working. > > > > (This also corresponds possibly to what I'm seeing on a more complex kernel; > > with a more complex kernel I'm seeing on a whole pile of data on the last > > few Z slices as being bogus suggesting it's not finished). > > > > Dave > > thanks for testing. I see you are using mesa 17.2. > > there were few changes to blocking call synchronization that went to mesa > 17.3: > 02f8ac6b70033a1b240d497c4664c359d2398cc3 (clover: Wrap event::wait_count in > a method taking care of the required locking.) > bc4000ee40c78efe1e5e8a6244d4bb55389d8418 (clover: Run the associated action > before an event is signalled.) > 3a5b69c09ba355c616c274b0c7f5aba3bd21fd54 (clover: Wait for requested > operation if blocking flag is set) > > which might help address the issue. Can you test mesa 17.3? Yeh, I'll figure out how to get 17.3 built on this box. -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] glx: Prepare the DRI backends for GLX_EXT_no_config_context
Adam Jacksonwrites: > This should be safe as these backends already support the EGL version of > this extension. DRI1 is not affected because it does not support > GLX_ARB_create_context anyway. DRI-Windows is not prepared to implement > this as there's no equivalent WGL extension, and wglCreateContextAttribs > seems to really want the HDC's pixel format to be set. Patch 1-2 are: Reviewed-by: Eric Anholt
Re: [Mesa-dev] [PATCH] amd/addrlib: update to latest version
On Wed, Nov 8, 2017 at 4:13 AM, Nicolai Hähnlewrote: > On 08.11.2017 09:53, Michel Dänzer wrote: >> >> On 07/11/17 10:58 PM, Marek Olšák wrote: >>> >>> On Tue, Nov 7, 2017 at 9:01 PM, Nicolai Hähnle >>> wrote: On 07.11.2017 18:35, Michel Dänzer wrote: > > > On 07/11/17 06:28 PM, Marek Olšák wrote: >> >> >> Hi, >> >> This patch is too large for the mailing list: >> >> >> >> https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib=0e0f044268d3c1af2e78f161aaa2d92c30167cc1 > > > > From the commit log: > >> I just overwrote all Mesa files with internal addrlib and discarded >> hunks that we should probably keep, but I might have missed something. > > > > FWIW, if a separate branch was used for importing addrlib changes, Git > could keep track of our changes to it in the Mesa tree. I concur in principle. In practice, I explored doing that, but the commit discipline on the internal addrlib repository is pretty crappy, so we'd end up having to massage commits anyway. Maybe we can find a sweet spot somewhere by updating slightly more regularly, perhaps once a month. >>> >>> >>> That's too much time-consuming work with no benefit. I used to do >>> that, but it sucked. I prefer 1 commit with everything - easy conflict >>> resolution, not having to rebase 60 commits that don't make sense. >> >> >> FWIW, I didn't mean importing individual commits of the addrlib >> repository into Mesa. Just having a separate branch[0] where addrlib >> snapshots are imported and which is then merged to master. That way Git >> will keep track of changes in both repositories and automatically merge >> them as much as possible. Just using Git for what it was made for. :) > > > What do you mean precisely? I did some experiments with a structure like > this: > > Mesa master o--o--o--o--o--o--o > // > addrlibo--o--o--oo > > where addrlib is a branch that *only* contains addrlib and has a completely > separate initial commit. This works somewhat reasonably, except I was > worried that it might break bisecting Mesa by trying some of the commits > that only exist in the addrlib branch. > > Though now that I think about it again, maybe bisecting is fine because none > of the addrlib commits are ever in the "future cone" of any Mesa master > commit. If you want to avoid some of the merge pain without creating a totally separate universe, why not do something like addrlib ooo / \\ Mesa master o--o--o--o--o--o--o--o--o Just a thought. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] amd/addrlib: update to latest version
Nicolai Hähnlewrites: > On 08.11.2017 09:53, Michel Dänzer wrote: >> On 07/11/17 10:58 PM, Marek Olšák wrote: >>> On Tue, Nov 7, 2017 at 9:01 PM, Nicolai Hähnle wrote: On 07.11.2017 18:35, Michel Dänzer wrote: > > On 07/11/17 06:28 PM, Marek Olšák wrote: >> >> Hi, >> >> This patch is too large for the mailing list: >> >> >> https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib=0e0f044268d3c1af2e78f161aaa2d92c30167cc1 > > > From the commit log: > >> I just overwrote all Mesa files with internal addrlib and discarded >> hunks that we should probably keep, but I might have missed something. > > > FWIW, if a separate branch was used for importing addrlib changes, Git > could keep track of our changes to it in the Mesa tree. I concur in principle. In practice, I explored doing that, but the commit discipline on the internal addrlib repository is pretty crappy, so we'd end up having to massage commits anyway. Maybe we can find a sweet spot somewhere by updating slightly more regularly, perhaps once a month. >>> >>> That's too much time-consuming work with no benefit. I used to do >>> that, but it sucked. I prefer 1 commit with everything - easy conflict >>> resolution, not having to rebase 60 commits that don't make sense. >> >> FWIW, I didn't mean importing individual commits of the addrlib >> repository into Mesa. Just having a separate branch[0] where addrlib >> snapshots are imported and which is then merged to master. That way Git >> will keep track of changes in both repositories and automatically merge >> them as much as possible. Just using Git for what it was made for. :) > > What do you mean precisely? I did some experiments with a structure like > this: > > Mesa master o--o--o--o--o--o--o >// > addrlibo--o--o--oo > > where addrlib is a branch that *only* contains addrlib and has a > completely separate initial commit. This works somewhat reasonably, > except I was worried that it might break bisecting Mesa by trying some > of the commits that only exist in the addrlib branch. > > Though now that I think about it again, maybe bisecting is fine because > none of the addrlib commits are ever in the "future cone" of any Mesa > master commit. I don't believe bisect will pick commits on addrlib if good and bad were both on master. I've used this git structure for maintaining GL xml files in libepoxy, and it's really nice. signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)
https://bugs.freedesktop.org/show_bug.cgi?id=103586 --- Comment #8 from Jan Vesely--- (In reply to Dave Gilbert from comment #6) > (In reply to Jan Vesely from comment #5) > > (In reply to Dave Gilbert from comment #4) > > > Created attachment 135313 [details] > > > foo.link-0.ll > > > > > > That's all 3 of the debug files it produced. > > > (I wasn't sure which were the llvm and which the isa dumps; I guess the > > > asm > > > is the isa? and the ll's are both llvm dumps?) > > > > yes. the first .ll is from compilation step, the other one is from linking > > step. > > > > .ll dump looks correct. > > .asm also looks correct. > > > > you can try producing multiple asm dumps for working and non-working runs. > > But I don't think that the llvm is the culprit here. > > > > Can you try waiting for the kernel execution to complete explicitly before > > mapping the buffer? > > Ideally call clFinish() on line 63. > > Since I'm on the C++ binding (probably a mistake) I used: > queue.finish(); > > and it seems to be working. > > (This also corresponds possibly to what I'm seeing on a more complex kernel; > with a more complex kernel I'm seeing on a whole pile of data on the last > few Z slices as being bogus suggesting it's not finished). > > Dave thanks for testing. I see you are using mesa 17.2. there were few changes to blocking call synchronization that went to mesa 17.3: 02f8ac6b70033a1b240d497c4664c359d2398cc3 (clover: Wrap event::wait_count in a method taking care of the required locking.) bc4000ee40c78efe1e5e8a6244d4bb55389d8418 (clover: Run the associated action before an event is signalled.) 3a5b69c09ba355c616c274b0c7f5aba3bd21fd54 (clover: Wait for requested operation if blocking flag is set) which might help address the issue. Can you test mesa 17.3? -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] meson: Enable VC4's NEON assembly support.
Quoting Eric Anholt (2017-11-08 14:14:57) > --- > meson.build | 5 +++-- > src/gallium/drivers/vc4/meson.build | 13 + > 2 files changed, 16 insertions(+), 2 deletions(-) > > diff --git a/meson.build b/meson.build > index 0118c9a7c5ef..189c9be5b59c 100644 > --- a/meson.build > +++ b/meson.build > @@ -485,8 +485,9 @@ endif > > # FIXME: enable asm when cross compiler > # This is doable (autotools does it), but it's not of immediate concern > -if meson.is_cross_build() > - message('Cross compiling, disabling asm') > +if meson.is_cross_build() and (host_machine.cpu_family() == 'x86' or > + host_machine.cpu_family() == 'x86_64') How about: if meson.is_cross_build() and host_machine.cpu_family().startswith('x86') Other than that, for the series: Reviewed-by: Dylan Baker> + message('Cross compiling, disabling x86/x86_64 asm') >with_asm = false > endif > > diff --git a/src/gallium/drivers/vc4/meson.build > b/src/gallium/drivers/vc4/meson.build > index 38b47fbdd977..572d4b4fa9aa 100644 > --- a/src/gallium/drivers/vc4/meson.build > +++ b/src/gallium/drivers/vc4/meson.build > @@ -81,6 +81,18 @@ files_libvc4 = files( >'vc4_uniforms.c', > ) > > +libvc4_neon = [] > +if with_asm_arch == 'arm' > + libvc4_neon = static_library( > +'vc4_neon', > +'vc4_tiling_lt_neon.c', > +include_directories : [ > + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom > +], > +c_args : '-mfpu=neon', > + ) > +endif > + > simpenrose_c_args = [] > dep_simpenrose = dependency('simpenrose', required : false) > if dep_simpenrose.found() > @@ -94,6 +106,7 @@ libvc4 = static_library( > inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, > inc_gallium_drivers, inc_drm_uapi, >], > + link_with: libvc4_neon, >c_args : [c_vis_args, simpenrose_c_args], >cpp_args : [cpp_vis_args], >dependencies : [dep_simpenrose, dep_libdrm, dep_valgrind], > -- > 2.15.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] meson: Drop stale comment about making valgrind conditional.
It was fixed in 5c2ff5773a707519f6a773126f201c4e1e8a42d7. --- meson.build | 1 - 1 file changed, 1 deletion(-) diff --git a/meson.build b/meson.build index 117ed7c087f4..0118c9a7c5ef 100644 --- a/meson.build +++ b/meson.build @@ -691,7 +691,6 @@ if with_glvnd pre_args += '-DUSE_LIBGLVND=1' endif -# TODO: make this conditional if with_valgrind != 'false' dep_valgrind = dependency('valgrind', required : with_valgrind == 'true') if dep_valgrind.found() -- 2.15.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/4] meson: vc4 ARMHF cross-build support
Timothy Arceri noted that vc4 didn't seem to have the NEON stuff hooked up, so I worked on getting vc4 cross builds working for me finally. I haven't tested the result on HW quite yet. Eric Anholt (4): meson: Leave dep_llvm empty if !with_llvm meson: Drop stale comment about making valgrind conditional. meson: Always link libgallium_dri.so against dep_thread. meson: Enable VC4's NEON assembly support. meson.build | 13 +++-- src/gallium/drivers/vc4/meson.build | 13 + src/gallium/targets/dri/meson.build | 1 + 3 files changed, 21 insertions(+), 6 deletions(-) -- 2.15.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] meson: Leave dep_llvm empty if !with_llvm
The gallium auxiliary build would link against llvm, for the gallivm code that it didn't build. This broke the build on my armhf cross, where libLLVM-3.9.so is not multiarch and thus points to x86-64 libs. --- meson.build | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/meson.build b/meson.build index 44e062e01e9f..117ed7c087f4 100644 --- a/meson.build +++ b/meson.build @@ -657,10 +657,11 @@ llvm_modules = ['bitwriter', 'engine', 'mcdisassembler', 'mcjit'] if with_amd_vk llvm_modules += ['amdgpu', 'bitreader', 'ipo'] endif -dep_llvm = dependency( - 'llvm', version : '>= 3.9.0', required : with_amd_vk, modules : llvm_modules, -) +dep_llvm = [] if with_llvm + dep_llvm = dependency( +'llvm', version : '>= 3.9.0', required : with_amd_vk, modules : llvm_modules, + ) if dep_llvm.found() _llvm_version = dep_llvm.version().split('.') # Development versions of LLVM have an 'svn' suffix, we don't want that for -- 2.15.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] meson: Enable VC4's NEON assembly support.
--- meson.build | 5 +++-- src/gallium/drivers/vc4/meson.build | 13 + 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/meson.build b/meson.build index 0118c9a7c5ef..189c9be5b59c 100644 --- a/meson.build +++ b/meson.build @@ -485,8 +485,9 @@ endif # FIXME: enable asm when cross compiler # This is doable (autotools does it), but it's not of immediate concern -if meson.is_cross_build() - message('Cross compiling, disabling asm') +if meson.is_cross_build() and (host_machine.cpu_family() == 'x86' or + host_machine.cpu_family() == 'x86_64') + message('Cross compiling, disabling x86/x86_64 asm') with_asm = false endif diff --git a/src/gallium/drivers/vc4/meson.build b/src/gallium/drivers/vc4/meson.build index 38b47fbdd977..572d4b4fa9aa 100644 --- a/src/gallium/drivers/vc4/meson.build +++ b/src/gallium/drivers/vc4/meson.build @@ -81,6 +81,18 @@ files_libvc4 = files( 'vc4_uniforms.c', ) +libvc4_neon = [] +if with_asm_arch == 'arm' + libvc4_neon = static_library( +'vc4_neon', +'vc4_tiling_lt_neon.c', +include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom +], +c_args : '-mfpu=neon', + ) +endif + simpenrose_c_args = [] dep_simpenrose = dependency('simpenrose', required : false) if dep_simpenrose.found() @@ -94,6 +106,7 @@ libvc4 = static_library( inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, inc_gallium_drivers, inc_drm_uapi, ], + link_with: libvc4_neon, c_args : [c_vis_args, simpenrose_c_args], cpp_args : [cpp_vis_args], dependencies : [dep_simpenrose, dep_libdrm, dep_valgrind], -- 2.15.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] meson: Always link libgallium_dri.so against dep_thread.
Somehow on my cross build the -pthread is getting lost. All the other deps seem to work out fine. --- src/gallium/targets/dri/meson.build | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build index 0ce088e1aca6..c591b75d0379 100644 --- a/src/gallium/targets/dri/meson.build +++ b/src/gallium/targets/dri/meson.build @@ -134,6 +134,7 @@ libgallium_dri = shared_library( ], dependencies : [ gallium_dri_depends, dep_selinux, dep_expat, dep_libdrm, dep_llvm, +dep_thread, ], ) -- 2.15.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] egl/wayland: Support for KHR_partial_update
Hi Emil, Emil Velikovwrites: > On 27 October 2017 at 05:54, Harish Krupo wrote: >> Hi Eric, >> >> Eric Engestrom writes: >> >>> On Monday, 2017-10-23 16:20:54 +0530, Harish Krupo wrote: This passes 33/37 deqp tests related to partial_update, 4 are not supported. Tests not supported: dEQP-EGL.functional.negative_partial_update.not_postable_surface dEQP-EGL.functional.negative_partial_update.not_current_surface dEQP-EGL.functional.negative_partial_update.buffer_preserved dEQP-EGL.functional.negative_partial_update.not_current_surface2 Reason: No matching egl config found. v2: Remove unnecessary return statement. Keep function names consistent. (Emil Velikov) Add not supported list to commit message. (Eric Engestrom) v3: Remove explicit with_damage variable. (Eric Engestrom) Signed-off-by: Harish Krupo --- src/egl/drivers/dri2/platform_wayland.c | 54 ++--- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index b38eb1c335..8846099d57 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -790,27 +790,44 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy, return ret; } +/** + * Called via eglSetDamageRegionKHR(), drv->API.SetDamageRegion(). + */ static EGLBoolean -try_damage_buffer(struct dri2_egl_surface *dri2_surf, - const EGLint *rects, - EGLint n_rects) +dri2_wl_set_damage_region(_EGLDriver *drv, + _EGLDisplay *dpy, + _EGLSurface *surf, + const EGLint *rects, + EGLint n_rects) { - if (wl_proxy_get_version((struct wl_proxy *) dri2_surf->wl_surface_wrapper) - < WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION) - return EGL_FALSE; + struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); - for (int i = 0; i < n_rects; i++) { - const int *rect = [i * 4]; + /* The spec doesn't mention what should be returned in case of +* failure in setting the damage buffer with the window system, so +* setting the damage to maximum surface area +*/ + if (!n_rects || + wl_proxy_get_version((struct wl_proxy *) dri2_surf->wl_surface_wrapper) + < WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION) { + wl_surface_damage(dri2_surf->wl_surface_wrapper, +0, 0, INT32_MAX, INT32_MAX); + } else { >>> >>> I know Emil suggested you remove the `return` in an earlier version, but >>> if you add it back here you can drop the else, and the diff will look >>> much cleaner, keeping only the version check getting an `|| !n_rects` >>> and `return false` becoming `damage(everything)`. >>> >>> Other than that, it looks good to me. Thanks :) >>> >> >> Ok, will do that change. >> It would be something like this: >> if (!n_rects || >>wl_proxy_get_version((struct wl_proxy *) >> dri2_surf->wl_surface_wrapper) >>< WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION) { >> wl_surface_damage(dri2_surf->wl_surface_wrapper, >> 0, 0, INT32_MAX, INT32_MAX); >> if (!n_rects) >> return EGL_TRUE; >> >> return EGL_FALSE; >> } >> >> I have a small confusion though: >> As per spec [1]: >> * If eglSetDamageRegionKHR has already been called on since the >> most recent frame boundary, an EGL_BAD_ACCESS error is generated >> >> The "already been called" part is confusing. Should it be interpreted >> as already been called and the previous call returned a true value or it >> has already been called irrespective of the previous return value? >> >> AFAICT from deqp [2]: it expects true on the first call, false on the >> second and expects EGL_BAD_ACCESS (it follows the 2nd approach where >> irrespective of the return value, calling eglSetDamageRegionKHR twice is >> an error). But in the current implementation the SetDamageRegionCalled >> variable will be set only when we are successful in setting the damage >> with the window system. In my case I always get a false return value (I >> am testing on gnome wayland). Thus it ends up not returning >> EGL_BAD_ACCESS and the test fails. >> >> To avoid this problem in the previous patch I set the return value to >> true and set the damage region to full when version doesn't match. :) >> >> One way to fix this would be to set SetDamageRegionCalled to true >> irrespective of the return value. >> >> Is this okay? I am still trying to see if this would cause
Re: [Mesa-dev] [PATCH] meson: Add script to use VERSION file for getting version
Quoting Eric Engestrom (2017-11-08 12:38:26) > > > On 8 November 2017 19:32:22 GMT, Dylan Bakerwrote: > > Quoting Eric Engestrom (2017-11-08 04:21:41) > > > On Wednesday, 2017-11-01 11:58:16 -0700, Dylan Baker wrote: > > > > Meson has up until this point set it's version in the root > > meson.build > > > > script. While there are other build systems them creates "one more > > > > thing" to duplicate between meson and every other build system, > > this > > > > script is a simple "read, strip, print" sort of deal to allow > > meson to > > > > read the VERSION file. > > > > > > > > I chose to implement this in python since python is portable, and > > to > > > > keep the meson.build script clean. This is also complicated by the > > fact > > > > that the project() call *must* be the first non-comment,non-blank > > in the > > > > toplevel meson.build script. > > > > > > > > Signed-off-by: Dylan Baker > > > > --- > > > > meson.build | 2 +- > > > > scripts/meson_get_version.py | 35 > > +++ > > > > 2 files changed, 36 insertions(+), 1 deletion(-) > > > > create mode 100644 scripts/meson_get_version.py > > > > > > > > diff --git a/meson.build b/meson.build > > > > index 6ad8c8bbf4b..3f77380f7df 100644 > > > > --- a/meson.build > > > > +++ b/meson.build > > > > @@ -21,7 +21,7 @@ > > > > project( > > > >'mesa', > > > >['c', 'cpp'], > > > > - version : '17.3.0-devel', > > > > + version : > > run_command(find_program('scripts/meson_get_version.py')).stdout(), > > > > And actually, thinking about windows, this should be: > > run_command( > > [find_program('python', 'python2', 'python3'), > > 'bin/meson_get_version.py'] > > ).stdout(), > > > > Since windows doesn't support shabangs link nix does. > > Indeed; r-b still stands with this change :) > > Speaking of, did you manage to get mesa building with meson on windows? Not yet, it's on my list of things to do soon, but I'd like to get as much of the Linux/unix-like stuff building first as possible. macOS is my first adventure outside of the comfortable Linux/BSD world. signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv/meson: Generate dev_icd.json
On Wed, Nov 8, 2017 at 1:40 PM, Chad Versacewrote: > On Tue 07 Nov 2017, Dylan Baker wrote: > > Quoting Eric Engestrom (2017-11-07 07:25:53) > > > On Wednesday, 2017-11-01 13:49:03 -0700, Chad Versace wrote: > > > > I tested this in a setup where the builddir was outside of the > srcdir. > > > > --- > > > > src/intel/vulkan/meson.build | 12 > > > > 1 file changed, 12 insertions(+) > > > > > > > > diff --git a/src/intel/vulkan/meson.build > b/src/intel/vulkan/meson.build > > > > index ff24e304ef5..e8b7f407507 100644 > > > > --- a/src/intel/vulkan/meson.build > > > > +++ b/src/intel/vulkan/meson.build > > > > @@ -48,6 +48,18 @@ intel_icd = custom_target( > > > >install : true, > > > > ) > > > > > > > > +dev_icd = custom_target( > > > > + 'dev_icd', > > > > + input : 'anv_icd.py', > > > > + output : 'dev_icd.@0@.json'.format(target_machine.cpu()), > > > > > > Strictly speaking, shouldn't that be `host_machine` [1] ? > > > I don't see how one would do a canadian build of mesa though, so > > > host == target should always be true. > > > > That's my fault. There are (or were) a number of cases where I used > target > > instead of host, that can also be a follow up. > > > > In any case: > > Acked-by: Dylan Baker > > I build Mesa (with autotools) where host == x86_64 but target == armv7a. > You're using dev_icd with a cross-compile? Yikes! I mean, it can work, but that's not what I would have expected. Also, why are you building anv and targetting armv7a Does that even work? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH v1 00/30] anv: dma-buf and DRM format modifiers
On Tue 07 Nov 2017, Jason Ekstrand wrote: > All of the pre-work patches have been reviewed by myself and Lionel. I've > also > read through the rest of the series and things look pretty good to me. I did > make some scattered comments but they shouldn't be a big deal. > > My primary concern with the series is the lack of CCS support. Getting that > working correctly is clearly the biggest question mark in all of modifiers so > I'm hesitant to pass judgment on this as a patch series (I think the spec is > ok) with that piece still missing. That's reasonable. I began implementing CCS support, but removed it from the series due to lack of tests. Any CCS code I write it, I need to test it by sharing the VkImage with KMS before I feel confident in it. I'll do that and resend. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Adding support for EXT_sRGB for Opengl ES
Hi Emil, Emil Velikovwrites: > Hi Harish, > > This seems to have fallen through the cracks, right? Thanks for bringing this up again :) > Keep in mind that I've not checked all the existing code paths - just > skimming through the patch itself. > > s/Adding support for EXT_sRGB for Opengl ES/mesa: add support for GL_EXT_sRGB/ > This was my first patch in mesa, so the rookie mistakes. :) > On 7 April 2017 at 05:20, Harish Krupo wrote: >> This addes support for the GL_EXT_sRGB extension for OpengGL ES 1.0 and >> above. >> With this patch this test passes in dEQP: >> dEQP-GLES2.capability.extensions.uncompressed_texture_formats.GL_EXT_sRGB >> > Having a more comprehensive list [as mentioned by Tapani] would be great here. > X tests, A pass, B fail, C not supported. > >> Signed-off-by: Harish Krupo >> --- >> src/mapi/glapi/gen/es_EXT.xml| 8 >> src/mesa/drivers/dri/i965/intel_extensions.c | 1 + >> src/mesa/main/extensions_table.h | 1 + >> src/mesa/main/fbobject.c | 2 +- >> src/mesa/main/genmipmap.c| 3 +++ >> src/mesa/main/mtypes.h | 1 + >> 6 files changed, 15 insertions(+), 1 deletion(-) >> >> diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml >> index 3e705eb409..a6fd7c755a 100644 >> --- a/src/mapi/glapi/gen/es_EXT.xml >> +++ b/src/mapi/glapi/gen/es_EXT.xml >> @@ -795,6 +795,14 @@ >> >> >> >> + >> + > Extension number seems to be 105 > > >> --- a/src/mesa/main/genmipmap.c >> +++ b/src/mesa/main/genmipmap.c >> @@ -96,6 +96,9 @@ >> _mesa_is_valid_generate_texture_mipmap_internalformat(struct gl_context *ctx, >> (_mesa_is_es3_color_renderable(internalformat) && >>_mesa_is_es3_texture_filterable(ctx, internalformat)); >> } >> + else if (!_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_sRGB && >> (internalformat == GL_SRGB)) { >> + return GL_INVALID_OPERATION; >> + } > I'm not sure how this would trigger - the function returns bool, thus > the GL_INVALID_OPERATION will be promoted to true. > Yes, this is wrong, will look into it. IIRC, there was some other bigger issue with this extension due to which I was not able to proceed. I will gather all the related information and will ask here. I am sure somebody will be able to help. Thank you Regards Harish Krupo > Hence the caller (generate_texture_mipmap) will continue instead of > flagging an error. > > HTH > Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv/meson: Generate dev_icd.json
On Tue 07 Nov 2017, Dylan Baker wrote: > Quoting Eric Engestrom (2017-11-07 07:25:53) > > On Wednesday, 2017-11-01 13:49:03 -0700, Chad Versace wrote: > > > I tested this in a setup where the builddir was outside of the srcdir. > > > --- > > > src/intel/vulkan/meson.build | 12 > > > 1 file changed, 12 insertions(+) > > > > > > diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build > > > index ff24e304ef5..e8b7f407507 100644 > > > --- a/src/intel/vulkan/meson.build > > > +++ b/src/intel/vulkan/meson.build > > > @@ -48,6 +48,18 @@ intel_icd = custom_target( > > >install : true, > > > ) > > > > > > +dev_icd = custom_target( > > > + 'dev_icd', > > > + input : 'anv_icd.py', > > > + output : 'dev_icd.@0@.json'.format(target_machine.cpu()), > > > > Strictly speaking, shouldn't that be `host_machine` [1] ? > > I don't see how one would do a canadian build of mesa though, so > > host == target should always be true. > > That's my fault. There are (or were) a number of cases where I used target > instead of host, that can also be a follow up. > > In any case: > Acked-by: Dylan BakerI build Mesa (with autotools) where host == x86_64 but target == armv7a. The icd filename should have the same architecture as the driver it loads, and that's the target_machine. You never need to access the dev_icd.*.json on the host machine (that is, unless your target machine and host machine are the same machine). ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] meson: build gallium-xlib based glx
Dylan Bakerwrites: > Signed-off-by: Dylan Baker > --- > meson.build | 16 +++--- > src/gallium/meson.build | 11 +++- > src/gallium/state_trackers/glx/xlib/meson.build | 27 ++ > src/gallium/targets/libgl-xlib/meson.build | 68 > + > src/gallium/winsys/sw/xlib/meson.build | 27 ++ > 5 files changed, 139 insertions(+), 10 deletions(-) > create mode 100644 src/gallium/state_trackers/glx/xlib/meson.build > create mode 100644 src/gallium/targets/libgl-xlib/meson.build > create mode 100644 src/gallium/winsys/sw/xlib/meson.build > > diff --git a/src/gallium/targets/libgl-xlib/meson.build > b/src/gallium/targets/libgl-xlib/meson.build > new file mode 100644 > index 000..825b7b4a0f1 > --- /dev/null > +++ b/src/gallium/targets/libgl-xlib/meson.build > @@ -0,0 +1,68 @@ > +# Copyright © 2017 Intel Corporation > + > +# Permission is hereby granted, free of charge, to any person obtaining a > copy > +# of this software and associated documentation files (the "Software"), to > deal > +# in the Software without restriction, including without limitation the > rights > +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > +# copies of the Software, and to permit persons to whom the Software is > +# furnished to do so, subject to the following conditions: > + > +# The above copyright notice and this permission notice shall be included in > +# all copies or substantial portions of the Software. > + > +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > THE > +# SOFTWARE. > + > +# TODO: support non-static targets > +# Static targets are always enabled in autotools (unless you modify > +# configure.ac) > + > +gallium_xlib_c_args = [ > + '-DGALLIUM_SOFTPIPE', > + '-DGALLIUM_RBUG', > + '-DGALLIUME_TRACE', > +] > +gallium_xlib_ld_args = [] > +gallium_xlib_link_with = [] > +gallium_xlib_depends = [] > +gallium_xlib_link_depends = [] > +gallium_xlib_xlibvers = [] > +gallium_xlib_sources = [] > + > +if with_ld_version_script > + gallium_xlib_ld_args += ['-Wl,--version-script', > join_paths(meson.current_source_dir(), 'libgl-xlib.sym')] > + gallium_xlib_link_depends += files('libgl-xlib.sym') > +endif > +if with_shared_glapi > + gallium_xlib_link_with += libglapi > +endif > +if with_llvm > + gallium_xlib_c_args += '-DGALLIUM_LLVMPIPE' > + gallium_xlib_depends += dep_llvm > + gallium_xlib_link_with += libllvmpipe > +endif > +# TODO: SWR > + > +libgl = shared_library( > + 'GL', > + files('xlib.c'), > + include_directories : [ > +inc_common, inc_gallium_winsys, inc_gallium_drivers, > +include_directories('../../state_trackers/glx/xlib'), > + ], > + c_args : [c_vis_args, gallium_xlib_c_args], > + link_args : [ld_args_bsymbolic, ld_args_gc_sections, gallium_xlib_ld_args], > + link_depends : gallium_xlib_link_depends, > + link_with : [ > +libxlib, libws_xlib, libsoftpipe, libtrace, librbug, libglapi_static, > +libgallium, libmesa_util, libmesa_gallium, gallium_xlib_link_with, > + ], > + dependencies : [dep_clock, dep_unwind, gallium_xlib_depends], We shouldn't have to manually specify most of these deps, I think, since they should be transitively pulled in by the static libraries using them, right? It's fine either way, though. > + install : true, > + version : '1.5.0', Looks like this drops the MESA_MAJOR/MINOR/TINY version handling of the automake version. Other than this, and needing the build fix in patch 3, the series is: Reviewed-by: Eric Anholt signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)
https://bugs.freedesktop.org/show_bug.cgi?id=103586 --- Comment #7 from Jan Vesely--- Created attachment 135318 --> https://bugs.freedesktop.org/attachment.cgi?id=135318=edit annotated asm dump -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)
https://bugs.freedesktop.org/show_bug.cgi?id=103586 --- Comment #6 from Dave Gilbert--- (In reply to Jan Vesely from comment #5) > (In reply to Dave Gilbert from comment #4) > > Created attachment 135313 [details] > > foo.link-0.ll > > > > That's all 3 of the debug files it produced. > > (I wasn't sure which were the llvm and which the isa dumps; I guess the asm > > is the isa? and the ll's are both llvm dumps?) > > yes. the first .ll is from compilation step, the other one is from linking > step. > > .ll dump looks correct. > .asm also looks correct. > > you can try producing multiple asm dumps for working and non-working runs. > But I don't think that the llvm is the culprit here. > > Can you try waiting for the kernel execution to complete explicitly before > mapping the buffer? > Ideally call clFinish() on line 63. Since I'm on the C++ binding (probably a mistake) I used: queue.finish(); and it seems to be working. (This also corresponds possibly to what I'm seeing on a more complex kernel; with a more complex kernel I'm seeing on a whole pile of data on the last few Z slices as being bogus suggesting it's not finished). Dave -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)
https://bugs.freedesktop.org/show_bug.cgi?id=103586 --- Comment #5 from Jan Vesely--- (In reply to Dave Gilbert from comment #4) > Created attachment 135313 [details] > foo.link-0.ll > > That's all 3 of the debug files it produced. > (I wasn't sure which were the llvm and which the isa dumps; I guess the asm > is the isa? and the ll's are both llvm dumps?) yes. the first .ll is from compilation step, the other one is from linking step. .ll dump looks correct. .asm also looks correct. you can try producing multiple asm dumps for working and non-working runs. But I don't think that the llvm is the culprit here. Can you try waiting for the kernel execution to complete explicitly before mapping the buffer? Ideally call clFinish() on line 63. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] meson: Add script to use VERSION file for getting version
On 8 November 2017 19:32:22 GMT, Dylan Bakerwrote: > Quoting Eric Engestrom (2017-11-08 04:21:41) > > On Wednesday, 2017-11-01 11:58:16 -0700, Dylan Baker wrote: > > > Meson has up until this point set it's version in the root > meson.build > > > script. While there are other build systems them creates "one more > > > thing" to duplicate between meson and every other build system, > this > > > script is a simple "read, strip, print" sort of deal to allow > meson to > > > read the VERSION file. > > > > > > I chose to implement this in python since python is portable, and > to > > > keep the meson.build script clean. This is also complicated by the > fact > > > that the project() call *must* be the first non-comment,non-blank > in the > > > toplevel meson.build script. > > > > > > Signed-off-by: Dylan Baker > > > --- > > > meson.build | 2 +- > > > scripts/meson_get_version.py | 35 > +++ > > > 2 files changed, 36 insertions(+), 1 deletion(-) > > > create mode 100644 scripts/meson_get_version.py > > > > > > diff --git a/meson.build b/meson.build > > > index 6ad8c8bbf4b..3f77380f7df 100644 > > > --- a/meson.build > > > +++ b/meson.build > > > @@ -21,7 +21,7 @@ > > > project( > > >'mesa', > > >['c', 'cpp'], > > > - version : '17.3.0-devel', > > > + version : > run_command(find_program('scripts/meson_get_version.py')).stdout(), > > And actually, thinking about windows, this should be: > run_command( > [find_program('python', 'python2', 'python3'), > 'bin/meson_get_version.py'] > ).stdout(), > > Since windows doesn't support shabangs link nix does. Indeed; r-b still stands with this change :) Speaking of, did you manage to get mesa building with meson on windows? > > > >license : 'MIT', > > >meson_version : '>= 0.42', > > >default_options : ['c_std=c99', 'cpp_std=c++11'] > > > diff --git a/scripts/meson_get_version.py > b/scripts/meson_get_version.py > > > new file mode 100644 > > > index 000..a221e26f250 > > > --- /dev/null > > > +++ b/scripts/meson_get_version.py > > > > get_reviewers.pl is the only script in `scripts/`, everything else > is in > > `bin/`. I would suggest putting your script in `bin/` for now, we > might > > merge `bin/` into `scripts/` later. > > > > > @@ -0,0 +1,35 @@ > > > +#!/usr/bin/env python > > > +# encoding=utf-8 > > > +# Copyright © 2017 Intel Corporation > > > + > > > +# Permission is hereby granted, free of charge, to any person > obtaining a copy > > > +# of this software and associated documentation files (the > "Software"), to deal > > > +# in the Software without restriction, including without > limitation the rights > > > +# to use, copy, modify, merge, publish, distribute, sublicense, > and/or sell > > > +# copies of the Software, and to permit persons to whom the > Software is > > > +# furnished to do so, subject to the following conditions: > > > + > > > +# The above copyright notice and this permission notice shall be > included in > > > +# all copies or substantial portions of the Software. > > > + > > > +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > > > +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > > > +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > > > +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES > OR OTHER > > > +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING FROM, > > > +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS IN THE > > > +# SOFTWARE. > > > + > > > +from __future__ import print_function > > > +import os > > > + > > > + > > > +def main(): > > > +filename = os.path.join(os.environ['MESON_SOURCE_ROOT'], > 'VERSION') > > > +with open(filename) as f: > > > +version = f.read().strip() > > > +print(version, end='') > > > + > > > + > > > +if __name__ == '__main__': > > > +main() > > > > Seems like overkill, but why not. Change `main()` to > `print_mesa_version()` > > or something though, if the idea is to have it available as a > module? > > > > Reviewed-by: Eric Engestrom > > > > > -- > > > 2.14.3 > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/18] radeon/vcn: add vcn encode interface
Am 08.11.2017 um 19:08 schrieb boyuan.zh...@amd.com: From: Boyuan ZhangAdd a new header file for vcn encode interface Signed-off-by: Boyuan Zhang Only briefly skimmed over it, but what I saw looks mostly sane. Maybe nice to have is to have the code for encoding of the SPS/PPS and slice header not in the driver, but in some helpers in /src/gallium/vl. But that is only a nice to have feature. Apart from that the series is Acked-by: Christian König . Regards, Christian. --- src/gallium/drivers/radeon/radeon_vcn_enc.h | 325 1 file changed, 325 insertions(+) create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc.h diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h b/src/gallium/drivers/radeon/radeon_vcn_enc.h new file mode 100644 index 000..f9fa168 --- /dev/null +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h @@ -0,0 +1,325 @@ +/** + * + * Copyright 2017 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ + +#ifndef _RADEON_VCN_ENC_H +#define _RADEON_VCN_ENC_H + +#define RENCODE_FW_INTERFACE_MAJOR_VERSION 1 +#define RENCODE_FW_INTERFACE_MINOR_VERSION 2 + +#define RENCODE_IB_PARAM_SESSION_INFO 0x0001 +#define RENCODE_IB_PARAM_TASK_INFO 0x0002 +#define RENCODE_IB_PARAM_SESSION_INIT 0x0003 +#define RENCODE_IB_PARAM_LAYER_CONTROL 0x0004 +#define RENCODE_IB_PARAM_LAYER_SELECT 0x0005 +#define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x0006 +#define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x0007 +#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x0008 +#define RENCODE_IB_PARAM_QUALITY_PARAMS0x0009 +#define RENCODE_IB_PARAM_SLICE_HEADER 0x000a +#define RENCODE_IB_PARAM_ENCODE_PARAMS 0x000b +#define RENCODE_IB_PARAM_INTRA_REFRESH 0x000c +#define RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x000d +#define RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER0x000e +#define RENCODE_IB_PARAM_FEEDBACK_BUFFER 0x0010 +#define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU0x0020 + +#define RENCODE_H264_IB_PARAM_SLICE_CONTROL0x0021 +#define RENCODE_H264_IB_PARAM_SPEC_MISC0x0022 +#define RENCODE_H264_IB_PARAM_ENCODE_PARAMS0x0023 +#define RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER0x0024 + +#define RENCODE_IB_OP_INITIALIZE 0x0101 +#define RENCODE_IB_OP_CLOSE_SESSION0x0102 +#define RENCODE_IB_OP_ENCODE 0x0103 +#define RENCODE_IB_OP_INIT_RC 0x0104 +#define RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x0105 +#define RENCODE_IB_OP_SET_SPEED_ENCODING_MODE 0x0106 +#define RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE0x0107 +#define RENCODE_IB_OP_SET_QUALITY_ENCODING_MODE0x0108 + +#define RENCODE_IF_MAJOR_VERSION_MASK 0x +#define RENCODE_IF_MAJOR_VERSION_SHIFT 16 +#define RENCODE_IF_MINOR_VERSION_MASK 0x +#define RENCODE_IF_MINOR_VERSION_SHIFT 0 + +#define RENCODE_ENCODE_STANDARD_H264 1 + +#define
Re: [Mesa-dev] [PATCH] docs: add documentation for building with meson
Quoting Ian Romanick (2017-11-08 11:05:24) > On 11/08/2017 10:59 AM, Ian Romanick wrote: > > Is there a way to get a list of options before having any success? I > > want to disable using LLVM, but I can't get the list of options to do so > > because I don't have libelf (required for LLVM... which I don't want): > > > > Dependency libelf found: NO > > > > Meson encountered an error in file meson.build, line 628, column 2: > > C library 'elf' not found > > I guess the answer is 'less meson-options.txt'. :) Should probably > document that for us n00bs. That's in the v5 :) > > > On 11/07/2017 09:28 AM, Dylan Baker wrote: > >> v2: - Add information about CC, CXX, CFLAGS, and CXXFLAGS (Nicolai) > >> - Add message at top that meson for mesa is still a work in progress > >> - Add trailing "/" to directories (Eric E.) > >> - Fix a number of spelling/grammar/style suggestions from Eric E. > >> - Make a number of changes as suggested by Emil. > >> v3: - Fix order of commands in example (Eric E.) > >> - Add documentation for overriding LLVM version (Eric E.) > >> v4: - Rebase on master > >> - update default buildtype > >> - add note about b_ndebug > >> - Clarify meson configure a bit > >> > >> Signed-off-by: Dylan Baker> >> Reviewed-by: Eric Engestrom (v3) > >> --- > >> docs/contents.html | 1 + > >> docs/meson.html| 151 > >> + > >> 2 files changed, 152 insertions(+) > >> create mode 100644 docs/meson.html > >> > >> diff --git a/docs/contents.html b/docs/contents.html > >> index d5455421091..9a86019e2f6 100644 > >> --- a/docs/contents.html > >> +++ b/docs/contents.html > >> @@ -43,6 +43,7 @@ > >> Compiling / Installing > >> > >> Autoconf > >> +Meson > >> > >> > >> Precompiled Libraries > >> diff --git a/docs/meson.html b/docs/meson.html > >> new file mode 100644 > >> index 000..ee505b1d5ee > >> --- /dev/null > >> +++ b/docs/meson.html > >> @@ -0,0 +1,151 @@ > >> + >> "http://www.w3.org/TR/html4/loose.dtd;> > >> + > >> + > >> + > >> + Compilation and Installation using Meson > >> + > >> + > >> + > >> + > >> + > >> + The Mesa 3D Graphics Library > >> + > >> + > >> + > >> + > >> + > >> +Compilation and Installation using Meson > >> + > >> +1. Basic Usage > >> + > >> +The Meson build system for Mesa is still under active > >> development, > >> +and should not be used in production environments. > >> + > >> +The meson build is currently only tested on linux, and is known to not > >> work > >> +on macOS, Windows, and haiku. This will be fixed. > >> + > >> + > >> +The meson program is used to configure the source directory and generates > >> +either a ninja build file, or Visual Studio® build files. The latter, and > >> must > >> +be enabled via the --backend switch, as ninja is always the default. > >> Meson only > >> +supports out-of-tree builds, and must be passed a directory to put built > >> and > >> +generated sources into. We'll call that directory "build" for examples. > >> + > >> + > >> + > >> +meson build/ > >> + > >> + > >> + > >> +To see a description of your options you can run "meson configure" along > >> with a > >> +build directory to view the selected options for. This will show your > >> meson > >> +global arguments and project arguments, along with their defaults and your > >> +local settings. > >> + > >> + > >> + > >> +meson configure build/ > >> + > >> + > >> + > >> +With additional arguments "meson configure" is used to change options on > >> +already configured build directory. All options passed to this command > >> are in > >> +the form -D"command"="value". > >> + > >> + > >> + > >> +meson configure build/ -Dprefix=/tmp/install -Dglx=true > >> + > >> + > >> + > >> +Once you've run meson successfully you can use your configured backend to > >> build > >> +the project. With ninja, the -C option can be be used to point at a > >> directory > >> +to build. > >> + > >> + > >> + > >> +ninja -C build/ > >> + > >> + > >> + > >> +Without arguments, it will produce libGL.so and/or several other libraries > >> +depending on the options you have chosen. Later, if you want to rebuild > >> for a > >> +different configuration, you should run ninja clean before > >> +rebuilding, or create a new out of tree build directory (meson supports an > >> +unlimited number of them) for each configuration you want to build. > >> + > >> + > >> +CC, CFLAGS, CXX, CXXFLAGS > >> +These environment variables > >> +control the C and C++ compilers used during the build. The default > >> compilers > >> +depends on your operating system. Meson supports most of the popular > >> compilers, > >> +a complete list is available > >> + >> href="http://mesonbuild.com/Reference-tables.html#compiler-ids;>here. > >> + > >> +These arguments are consumed and stored by meson when it is initialized or > >> +re-initialized. Therefore passing