Re: [Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop
On 15/06/17 15:34, Jason Ekstrand wrote: On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri> wrote: If all the swizzles are inside the src channels range than we can just grab the srcs we need rather than converting everything. perf report convert_float() going from ~10% -> ~7% for the when running the following glean test: glean -o -v -v -v -t +pointAtten Cc: Jason Ekstrand > --- Hi Jason, I've only perf tested the above glean test. What did you use to benchmark this when you wrote it? The teximage-colors test has a benchmark flag which I added at the time. I trust that a lot more than some random glean test. :-) Cool thanks :) I'm seeing upto x5 improvement in some tests otherwise largely unchanged :) --Jason Thanks, Tim src/mesa/main/format_utils.c | 84 +--- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c index 65e65d4..1649ac0 100644 --- a/src/mesa/main/format_utils.c +++ b/src/mesa/main/format_utils.c @@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst, * * \param DST_TYPEthe C datatype of the destination * \param DST_CHANS the number of destination channels * \param SRC_TYPEthe C datatype of the source * \param SRC_CHANS the number of source channels * \param CONVan expression for converting from the source data, * storred in the variable "src", to the destination * format */ #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS, CONV) \ - do { \ - int s, j; \ - for (s = 0; s < count; ++s) { \ - for (j = 0; j < SRC_CHANS; ++j) {\ -SRC_TYPE src = typed_src[j]; \ -tmp[j] = CONV;\ - }\ - \ - typed_dst[0] = tmp[swizzle_x]; \ - if (DST_CHANS > 1) { \ -typed_dst[1] = tmp[swizzle_y];\ -if (DST_CHANS > 2) { \ - typed_dst[2] = tmp[swizzle_z]; \ - if (DST_CHANS > 3) { \ - typed_dst[3] = tmp[swizzle_w]; \ - } \ -} \ - }\ - typed_src += SRC_CHANS; \ - typed_dst += DST_CHANS; \ - } \ + do { \ + bool fast_path = false;\ + if (DST_CHANS == 1 && swizzle_x < SRC_CHANS) \ + fast_path = true; \ + if (DST_CHANS == 2 && swizzle_x < SRC_CHANS && \ + swizzle_y < SRC_CHANS) \ + fast_path = true; \ + if (DST_CHANS == 3 && swizzle_x < SRC_CHANS && \ + swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\ + fast_path = true; \ + if (DST_CHANS == 4 && swizzle_x < SRC_CHANS && \ + swizzle_y < SRC_CHANS && \ + swizzle_z < SRC_CHANS && \ + swizzle_w < SRC_CHANS) \ + fast_path = true; \ + \ + /* The fast path avoids copying/converting srcs we \ + * will never use. \ + */\ + if (fast_path) { \ + for (int s = 0; s < count; ++s) { \ +SRC_TYPE src = typed_src[swizzle_x]; \ +tmp[swizzle_x] = CONV; \ +typed_dst[0] = tmp[swizzle_x]; \ +if (DST_CHANS > 1) { \ + SRC_TYPE src = typed_src[swizzle_y]; \ + tmp[swizzle_y] = CONV;\ + typed_dst[1] = tmp[swizzle_y];\ + if (DST_CHANS > 2) { \ + SRC_TYPE src = typed_src[swizzle_z]; \
Re: [Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop
On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceriwrote: > If all the swizzles are inside the src channels range than we can just > grab the srcs we need rather than converting everything. > > perf report convert_float() going from ~10% -> ~7% for the when > running the following glean test: > > glean -o -v -v -v -t +pointAtten > > Cc: Jason Ekstrand > --- > > Hi Jason, > > I've only perf tested the above glean test. What did you use to benchmark > this when you wrote it? > The teximage-colors test has a benchmark flag which I added at the time. I trust that a lot more than some random glean test. :-) --Jason > Thanks, > Tim > > src/mesa/main/format_utils.c | 84 ++ > +++--- > 1 file changed, 63 insertions(+), 21 deletions(-) > > diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c > index 65e65d4..1649ac0 100644 > --- a/src/mesa/main/format_utils.c > +++ b/src/mesa/main/format_utils.c > @@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst, > * > * \param DST_TYPEthe C datatype of the destination > * \param DST_CHANS the number of destination channels > * \param SRC_TYPEthe C datatype of the source > * \param SRC_CHANS the number of source channels > * \param CONVan expression for converting from the source data, > * storred in the variable "src", to the destination > * format > */ > #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS, > CONV) \ > - do { \ > - int s, j; \ > - for (s = 0; s < count; ++s) { \ > - for (j = 0; j < SRC_CHANS; ++j) {\ > -SRC_TYPE src = typed_src[j]; \ > -tmp[j] = CONV;\ > - }\ > - \ > - typed_dst[0] = tmp[swizzle_x]; \ > - if (DST_CHANS > 1) { \ > -typed_dst[1] = tmp[swizzle_y];\ > -if (DST_CHANS > 2) { \ > - typed_dst[2] = tmp[swizzle_z]; \ > - if (DST_CHANS > 3) { \ > - typed_dst[3] = tmp[swizzle_w]; \ > - } \ > -} \ > - }\ > - typed_src += SRC_CHANS; \ > - typed_dst += DST_CHANS; \ > - } \ > + do { \ > + bool fast_path = false;\ > + if (DST_CHANS == 1 && swizzle_x < SRC_CHANS) \ > + fast_path = true; \ > + if (DST_CHANS == 2 && swizzle_x < SRC_CHANS && \ > + swizzle_y < SRC_CHANS) \ > + fast_path = true; \ > + if (DST_CHANS == 3 && swizzle_x < SRC_CHANS && \ > + swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\ > + fast_path = true; \ > + if (DST_CHANS == 4 && swizzle_x < SRC_CHANS && \ > + swizzle_y < SRC_CHANS && \ > + swizzle_z < SRC_CHANS && \ > + swizzle_w < SRC_CHANS) \ > + fast_path = true; \ > + \ > + /* The fast path avoids copying/converting srcs we \ > + * will never use. \ > + */\ > + if (fast_path) { \ > + for (int s = 0; s < count; ++s) { \ > +SRC_TYPE src = typed_src[swizzle_x]; \ > +tmp[swizzle_x] = CONV; \ > +typed_dst[0] = tmp[swizzle_x]; \ > +if (DST_CHANS > 1) { \ > + SRC_TYPE src = typed_src[swizzle_y]; \ > + tmp[swizzle_y] = CONV;\ > + typed_dst[1] = tmp[swizzle_y];\ > + if (DST_CHANS > 2) { \ > + SRC_TYPE src = typed_src[swizzle_z]; \ > + tmp[swizzle_z] = CONV; \ > + typed_dst[2] = tmp[swizzle_z]; \ > + if (DST_CHANS > 3) { \ > + SRC_TYPE src = typed_src[swizzle_w];\ > + tmp[swizzle_w] = CONV; \ > + typed_dst[3] = tmp[swizzle_w]; \ > +
[Mesa-dev] [PATCH 1/2] mesa: make _mesa_swizzle_and_convert() static
--- src/mesa/main/format_utils.c | 91 src/mesa/main/format_utils.h | 9 - 2 files changed, 49 insertions(+), 51 deletions(-) diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c index d16d69c..65e65d4 100644 --- a/src/mesa/main/format_utils.c +++ b/src/mesa/main/format_utils.c @@ -33,20 +33,27 @@ const mesa_array_format RGBA32_FLOAT = const mesa_array_format RGBA8_UBYTE = MESA_ARRAY_FORMAT(1, 0, 0, 1, 4, 0, 1, 2, 3); const mesa_array_format RGBA32_UINT = MESA_ARRAY_FORMAT(4, 0, 0, 0, 4, 0, 1, 2, 3); const mesa_array_format RGBA32_INT = MESA_ARRAY_FORMAT(4, 1, 0, 0, 4, 0, 1, 2, 3); static void +swizzle_and_convert(void *void_dst, enum mesa_array_format_datatype dst_type, +int num_dst_channels, const void *void_src, +enum mesa_array_format_datatype src_type, +int num_src_channels, const uint8_t swizzle[4], +bool normalized, int count); + +static void invert_swizzle(uint8_t dst[4], const uint8_t src[4]) { int i, j; dst[0] = MESA_FORMAT_SWIZZLE_NONE; dst[1] = MESA_FORMAT_SWIZZLE_NONE; dst[2] = MESA_FORMAT_SWIZZLE_NONE; dst[3] = MESA_FORMAT_SWIZZLE_NONE; for (i = 0; i < 4; ++i) @@ -408,23 +415,23 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride, } if (src_array_format && dst_array_format) { assert(_mesa_array_format_is_normalized(src_array_format) == _mesa_array_format_is_normalized(dst_array_format)); compute_src2dst_component_mapping(src2rgba, rgba2dst, rebase_swizzle, src2dst); for (row = 0; row < height; ++row) { - _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels, - src, src_type, src_num_channels, - src2dst, normalized, width); + swizzle_and_convert(dst, dst_type, dst_num_channels, + src, src_type, src_num_channels, + src2dst, normalized, width); src += src_stride; dst += dst_stride; } return; } /* At this point, we're fresh out of fast-paths and we need to convert * to float, uint32, or, if we're lucky, uint8. */ dst_integer = false; @@ -497,134 +504,134 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride, * of the packed formats are unsigned, so we can just always use * _mesa_swizzle_and_convert for signed formats, which is aware of the * truncation problem. */ common_type = is_signed ? MESA_ARRAY_FORMAT_TYPE_INT : MESA_ARRAY_FORMAT_TYPE_UINT; if (src_array_format) { compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle, rebased_src2rgba); for (row = 0; row < height; ++row) { -_mesa_swizzle_and_convert(tmp_uint + row * width, common_type, 4, - src, src_type, src_num_channels, - rebased_src2rgba, normalized, width); +swizzle_and_convert(tmp_uint + row * width, common_type, 4, +src, src_type, src_num_channels, +rebased_src2rgba, normalized, width); src += src_stride; } } else { for (row = 0; row < height; ++row) { _mesa_unpack_uint_rgba_row(src_format, width, src, tmp_uint + row * width); if (rebase_swizzle) - _mesa_swizzle_and_convert(tmp_uint + row * width, common_type, 4, - tmp_uint + row * width, common_type, 4, - rebase_swizzle, false, width); + swizzle_and_convert(tmp_uint + row * width, common_type, 4, + tmp_uint + row * width, common_type, 4, + rebase_swizzle, false, width); src += src_stride; } } /* At this point, we have already done the truncation if the source is * signed but the destination is unsigned, so no need to force the * _mesa_swizzle_and_convert path. */ if (dst_format_is_mesa_array_format) { for (row = 0; row < height; ++row) { -_mesa_swizzle_and_convert(dst, dst_type, dst_num_channels, - tmp_uint + row * width, common_type, 4, - rgba2dst, normalized, width); +swizzle_and_convert(dst, dst_type, dst_num_channels, +tmp_uint + row * width, common_type, 4, +rgba2dst, normalized, width); dst +=
[Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop
If all the swizzles are inside the src channels range than we can just grab the srcs we need rather than converting everything. perf report convert_float() going from ~10% -> ~7% for the when running the following glean test: glean -o -v -v -v -t +pointAtten Cc: Jason Ekstrand--- Hi Jason, I've only perf tested the above glean test. What did you use to benchmark this when you wrote it? Thanks, Tim src/mesa/main/format_utils.c | 84 +--- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c index 65e65d4..1649ac0 100644 --- a/src/mesa/main/format_utils.c +++ b/src/mesa/main/format_utils.c @@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst, * * \param DST_TYPEthe C datatype of the destination * \param DST_CHANS the number of destination channels * \param SRC_TYPEthe C datatype of the source * \param SRC_CHANS the number of source channels * \param CONVan expression for converting from the source data, * storred in the variable "src", to the destination * format */ #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS, CONV) \ - do { \ - int s, j; \ - for (s = 0; s < count; ++s) { \ - for (j = 0; j < SRC_CHANS; ++j) {\ -SRC_TYPE src = typed_src[j]; \ -tmp[j] = CONV;\ - }\ - \ - typed_dst[0] = tmp[swizzle_x]; \ - if (DST_CHANS > 1) { \ -typed_dst[1] = tmp[swizzle_y];\ -if (DST_CHANS > 2) { \ - typed_dst[2] = tmp[swizzle_z]; \ - if (DST_CHANS > 3) { \ - typed_dst[3] = tmp[swizzle_w]; \ - } \ -} \ - }\ - typed_src += SRC_CHANS; \ - typed_dst += DST_CHANS; \ - } \ + do { \ + bool fast_path = false;\ + if (DST_CHANS == 1 && swizzle_x < SRC_CHANS) \ + fast_path = true; \ + if (DST_CHANS == 2 && swizzle_x < SRC_CHANS && \ + swizzle_y < SRC_CHANS) \ + fast_path = true; \ + if (DST_CHANS == 3 && swizzle_x < SRC_CHANS && \ + swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\ + fast_path = true; \ + if (DST_CHANS == 4 && swizzle_x < SRC_CHANS && \ + swizzle_y < SRC_CHANS && \ + swizzle_z < SRC_CHANS && \ + swizzle_w < SRC_CHANS) \ + fast_path = true; \ + \ + /* The fast path avoids copying/converting srcs we \ + * will never use. \ + */\ + if (fast_path) { \ + for (int s = 0; s < count; ++s) { \ +SRC_TYPE src = typed_src[swizzle_x]; \ +tmp[swizzle_x] = CONV; \ +typed_dst[0] = tmp[swizzle_x]; \ +if (DST_CHANS > 1) { \ + SRC_TYPE src = typed_src[swizzle_y]; \ + tmp[swizzle_y] = CONV;\ + typed_dst[1] = tmp[swizzle_y];\ + if (DST_CHANS > 2) { \ + SRC_TYPE src = typed_src[swizzle_z]; \ + tmp[swizzle_z] = CONV; \ + typed_dst[2] = tmp[swizzle_z]; \ + if (DST_CHANS > 3) { \ + SRC_TYPE src = typed_src[swizzle_w];\ + tmp[swizzle_w] = CONV; \ + typed_dst[3] = tmp[swizzle_w]; \ + } \ + } \ +}\ +typed_src += SRC_CHANS; \ +typed_dst += DST_CHANS; \ + } \ + } else { \ +
[Mesa-dev] [PATCH] automake: increase the MESA_GIT_SHA1 hash id length from 7 to 10 digits
The SCons build has been using 10 digits of the git hash id for the MESA_GIT_SHA1 string in git_sha1.h for about a year now. I bumped it up after running into a case where a 7-digit hash ID was ambiguous. This patch makes the same change for the autotools build. The command "git log | grep "^commit" | cut -b 8-14 | sort | uniq -d" shows there are currently 17 cases where 7 digits of hash id are ambiguous on master (probably quite a few more if we'd consider other branches). Instead of using "git log -n 1 --oneline" use "git rev-parse --short=10 HEAD" to get the HEAD hash id. --- src/Makefile.am | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index aa5f8aa..401e632 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -28,9 +28,9 @@ git_sha1.h.tmp: @touch git_sha1.h.tmp @if test -e $(top_srcdir)/.git; then \ if which git > /dev/null; then \ - git --git-dir=$(top_srcdir)/.git log -n 1 --oneline | \ - sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \ - > git_sha1.h.tmp ; \ + git --git-dir=$(top_srcdir)/.git rev-parse --short=10 HEAD | \ + sed 's/^\(.*\)/#define MESA_GIT_SHA1 "git-\1"/' \ + > git_sha1.h.tmp ; \ fi \ fi -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] r600: remove unnecessary NULL check in r600_shader_select
r600_shader_select is always called through the macro SELECT_SHADER_OR_FAIL, which never passes NULL pointers as parameter 'dirty'. --- src/gallium/drivers/r600/r600_state_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 8ace7793f0..51c4c6dc30 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -803,8 +803,7 @@ static int r600_shader_select(struct pipe_context *ctx, sel->num_shaders++; } - if (dirty) - *dirty = true; + *dirty = true; shader->next_variant = sel->current; sel->current = shader; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH V2] mesa: stop assigning unused storage for non-bindless opaque types
The storage was once used by get_sampler_uniform_value() but that was fixed long ago to use the uniform storage assigned by the linker. By not assigning storage for images/samplers the constant buffer for gallium drivers will be reduced which could result in small perf improvements. V2: rebase on ARB_bindless_texture --- src/mesa/program/ir_to_mesa.cpp | 42 ++--- 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 775211c..54b848a 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2440,62 +2440,32 @@ private: } /* anonymous namespace */ void add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, bool /* row_major */, const glsl_type * /* record_type */, const enum glsl_interface_packing, bool /* last_field */) { - /* atomics don't get real storage */ - if (type->contains_atomic()) + /* opaque types don't use storage in the param list unless they are +* bindless textures or images. +*/ + if (type->contains_opaque() && !var->data.bindless) return; - gl_register_file file; - if (type->without_array()->is_sampler() && !var->data.bindless) { - file = PROGRAM_SAMPLER; - } else { - file = PROGRAM_UNIFORM; - } - int index = _mesa_lookup_parameter_index(params, name); if (index < 0) { unsigned size = type_size(type) * 4; - index = _mesa_add_parameter(params, file, name, size, type->gl_type, - NULL, NULL); - - /* Sampler uniform values are stored in prog->SamplerUnits, - * and the entry in that array is selected by this index we - * store in ParameterValues[]. - */ - if (file == PROGRAM_SAMPLER) { -unsigned location; -const bool found = - this->shader_program->UniformHash->get(location, - params->Parameters[index].Name); -assert(found); - -if (!found) - return; - -struct gl_uniform_storage *storage = ->shader_program->data->UniformStorage[location]; - - assert(storage->type->is_sampler() && -storage->opaque[shader_type].active); - -for (unsigned int j = 0; j < size / 4; j++) -params->ParameterValues[index + j][0].f = - storage->opaque[shader_type].index + j; - } + index = _mesa_add_parameter(params, PROGRAM_UNIFORM, name, size, + type->gl_type, NULL, NULL); } /* The first part of the uniform that's processed determines the base * location of the whole uniform (for structures). */ if (this->idx < 0) this->idx = index; } /** -- 2.9.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] i965: Only do depth resolves prior to clearing when needed
--- src/mesa/drivers/dri/i965/brw_clear.c | 35 +-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index f5a990d..99ddc4e 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -164,8 +164,39 @@ brw_fast_clear_depth(struct gl_context *ctx) * flags out of the HiZ buffer into the real depth buffer. */ if (mt->fast_clear_color.f32[0] != ctx->Depth.Clear) { - intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS, - 0, INTEL_REMAINING_LAYERS, true, false); + for (uint32_t level = mt->first_level; level <= mt->last_level; level++) { + if (!intel_miptree_level_has_hiz(mt, level)) +continue; + + for (uint32_t layer = 0; layer < mt->level[level].depth; layer++) { +if (level == depth_irb->mt_level && +layer >= depth_irb->mt_layer && +layer < depth_irb->mt_layer + num_layers) { + /* We're going to clear this layer anyway. Leave it alone. */ + continue; +} + +enum isl_aux_state aux_state = + intel_miptree_get_aux_state(mt, level, layer); + +if (aux_state != ISL_AUX_STATE_CLEAR && +aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) { + /* This slice doesn't have any fast-cleared bits. */ + continue; +} + +/* If we got here, then the level may have fast-clear bits that + * use the old clear value. We need to do a depth resolve to get + * rid of their use of the clear value before we can change it. + * Fortunately, few applications ever change their depth clear + * value so this shouldn't happen often. + */ +intel_hiz_exec(brw, mt, level, layer, 1, + BLORP_HIZ_OP_DEPTH_RESOLVE); +intel_miptree_set_aux_state(brw, mt, level, layer, 1, +ISL_AUX_STATE_RESOLVED); + } + } mt->fast_clear_color.f32[0] = ctx->Depth.Clear; } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] i965: Move surface resolves back to draw/dispatch time
This is effectively a revert of 388f02729bbf88ba104f4f8ee1fdf005a240969c though much code has been added since. Kristian initially moved it to try and avoid locking problems with meta-based resolves. Now that meta is gone from the resolve path (for good this time, we hope), we can move it back. The problem with having it in intel_update_state was that the UpdateState hook gets called by core mesa directly and all sorts of things will cause a UpdateState to get called which may trigger resolves at inopportune times. In particular, it gets called by _mesa_Clear and, if we have a HiZ buffer in the INVALID_AUX state, causes a HiZ resolve right before the clear which is pointless. By moving it back to try_draw_prims time, we know it will only get called right before a draw which is where we want it. --- src/mesa/drivers/dri/i965/brw_compute.c | 2 + src/mesa/drivers/dri/i965/brw_context.c | 123 src/mesa/drivers/dri/i965/brw_context.h | 2 + src/mesa/drivers/dri/i965/brw_draw.c| 139 4 files changed, 143 insertions(+), 123 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 8046153..2867a14 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -188,6 +188,8 @@ brw_dispatch_compute_common(struct gl_context *ctx) brw_validate_textures(brw); + brw_predraw_resolve_inputs(brw); + const int sampler_state_size = 16; /* 16 bytes */ estimated_buffer_space_needed = 512; /* batchbuffer commands */ estimated_buffer_space_needed += (BRW_MAX_TEX_UNIT * diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 5433f90..b8db9d0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -168,140 +168,17 @@ intel_update_framebuffer(struct gl_context *ctx, fb->DefaultGeometry.NumSamples); } -static bool -intel_disable_rb_aux_buffer(struct brw_context *brw, const struct brw_bo *bo) -{ - const struct gl_framebuffer *fb = brw->ctx.DrawBuffer; - bool found = false; - - for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { - const struct intel_renderbuffer *irb = - intel_renderbuffer(fb->_ColorDrawBuffers[i]); - - if (irb && irb->mt->bo == bo) { - found = brw->draw_aux_buffer_disabled[i] = true; - } - } - - return found; -} - static void intel_update_state(struct gl_context * ctx) { GLuint new_state = ctx->NewState; struct brw_context *brw = brw_context(ctx); - struct intel_texture_object *tex_obj; - struct intel_renderbuffer *depth_irb; if (ctx->swrast_context) _swrast_InvalidateState(ctx, new_state); brw->NewGLState |= new_state; - _mesa_unlock_context_textures(ctx); - - intel_prepare_render(brw); - - /* Resolve the depth buffer's HiZ buffer. */ - depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - if (depth_irb && depth_irb->mt) { - intel_miptree_prepare_depth(brw, depth_irb->mt, - depth_irb->mt_level, - depth_irb->mt_layer, - depth_irb->layer_count); - } - - memset(brw->draw_aux_buffer_disabled, 0, - sizeof(brw->draw_aux_buffer_disabled)); - - /* Resolve depth buffer and render cache of each enabled texture. */ - int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; - for (int i = 0; i <= maxEnabledUnit; i++) { - if (!ctx->Texture.Unit[i]._Current) -continue; - tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); - if (!tex_obj || !tex_obj->mt) -continue; - - /* We need inte_texture_object::_Format to be valid */ - intel_finalize_mipmap_tree(brw, i); - - bool aux_supported; - intel_miptree_prepare_texture(brw, tex_obj->mt, tex_obj->_Format, -_supported); - - if (!aux_supported && brw->gen >= 9 && - intel_disable_rb_aux_buffer(brw, tex_obj->mt->bo)) { - perf_debug("Sampling renderbuffer with non-compressible format - " -"turning off compression"); - } - - brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); - - if (tex_obj->base.StencilSampling || - tex_obj->mt->format == MESA_FORMAT_S_UINT8) { - intel_update_r8stencil(brw, tex_obj->mt); - } - } - - /* Resolve color for each active shader image. */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - const struct gl_program *prog = ctx->_Shader->CurrentProgram[i]; - - if (unlikely(prog && prog->info.num_images)) { - for (unsigned j = 0; j < prog->info.num_images; j++) { -struct gl_image_unit *u = - >ImageUnits[prog->sh.ImageUnits[j]]; -tex_obj = intel_texture_object(u->TexObj); - -
[Mesa-dev] [PATCH 3/5] i965: Simplify HiZ clears a bit
No need for all that switching when we can just assign a nice little variable with the number of layers. --- src/mesa/drivers/dri/i965/brw_clear.c | 25 - 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 138997d..f5a990d 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -158,6 +158,8 @@ brw_fast_clear_depth(struct gl_context *ctx) break; } + const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1; + /* If we're clearing to a new clear value, then we need to resolve any clear * flags out of the HiZ buffer into the real depth buffer. */ @@ -167,27 +169,16 @@ brw_fast_clear_depth(struct gl_context *ctx) mt->fast_clear_color.f32[0] = ctx->Depth.Clear; } - if (depth_att->Layered) { - intel_hiz_exec(brw, mt, depth_irb->mt_level, - depth_irb->mt_layer, depth_irb->layer_count, - BLORP_HIZ_OP_DEPTH_CLEAR); - } else { - intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer, 1, - BLORP_HIZ_OP_DEPTH_CLEAR); - } + intel_hiz_exec(brw, mt, depth_irb->mt_level, + depth_irb->mt_layer, num_layers, + BLORP_HIZ_OP_DEPTH_CLEAR); /* Now, the HiZ buffer contains data that needs to be resolved to the depth * buffer. */ - if (depth_att->Layered) { - intel_miptree_set_aux_state(brw, mt, depth_irb->mt_level, - depth_irb->mt_layer, depth_irb->layer_count, - ISL_AUX_STATE_CLEAR); - } else { - intel_miptree_set_aux_state(brw, mt, depth_irb->mt_level, - depth_irb->mt_layer, 1, - ISL_AUX_STATE_CLEAR); - } + intel_miptree_set_aux_state(brw, mt, depth_irb->mt_level, + depth_irb->mt_layer, num_layers, + ISL_AUX_STATE_CLEAR); return true; } -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] i965/clear: Don't perform redundant depth clears
We already have this little optimization for color clears. Now that we're actually tracking whether or not a slice has any fast-clear blocks, It's easy enough to add for depth clears too. --- src/mesa/drivers/dri/i965/brw_clear.c | 34 --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 ++- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 99ddc4e..5fa4ae7 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -200,9 +200,37 @@ brw_fast_clear_depth(struct gl_context *ctx) mt->fast_clear_color.f32[0] = ctx->Depth.Clear; } - intel_hiz_exec(brw, mt, depth_irb->mt_level, - depth_irb->mt_layer, num_layers, - BLORP_HIZ_OP_DEPTH_CLEAR); + bool need_clear = false; + for (unsigned a = 0; a < num_layers; a++) { + enum isl_aux_state aux_state = + intel_miptree_get_aux_state(mt, depth_irb->mt_level, + depth_irb->mt_layer + a); + + if (aux_state != ISL_AUX_STATE_CLEAR) { + need_clear = true; + break; + } + } + + if (!need_clear) { + /* If all of the layers we intend to clear are already in the clear + * state then simply updating the miptree fast clear value is sufficient + * to change their clear value. + */ + return true; + } + + for (unsigned a = 0; a < num_layers; a++) { + enum isl_aux_state aux_state = + intel_miptree_get_aux_state(mt, depth_irb->mt_level, + depth_irb->mt_layer + a); + + if (aux_state != ISL_AUX_STATE_CLEAR) { + intel_hiz_exec(brw, mt, depth_irb->mt_level, +depth_irb->mt_layer + a, 1, +BLORP_HIZ_OP_DEPTH_CLEAR); + } + } /* Now, the HiZ buffer contains data that needs to be resolved to the depth * buffer. diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index c19d2d5..8b893dd 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2217,8 +2217,20 @@ intel_miptree_prepare_hiz_access(struct brw_context *brw, uint32_t level, uint32_t layer, bool hiz_supported, bool fast_clear_supported) { + enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, layer); + + /* On Sandy Bridge, any usage of depth with HiZ enabled is liable to flush +* out clear color blocks. If the slice is in the clear state, it should +* now be considered to be in the compressed with clear state. +*/ + if (brw->gen == 6 && aux_state == ISL_AUX_STATE_CLEAR && hiz_supported) { + assert(fast_clear_supported); + intel_miptree_set_aux_state(brw, mt, level, layer, 1, + ISL_AUX_STATE_COMPRESSED_CLEAR); + } + enum blorp_hiz_op hiz_op = BLORP_HIZ_OP_NONE; - switch (intel_miptree_get_aux_state(mt, level, layer)) { + switch (aux_state) { case ISL_AUX_STATE_CLEAR: case ISL_AUX_STATE_COMPRESSED_CLEAR: if (!hiz_supported || !fast_clear_supported) -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] i965: Enable non-CCS_E fast-clears on gen9+
Sky Lake and above can fast-clear exactly the same set of formats as older hardware. The only restriction is that you can't *texture* from it unless the format supports CCS_E but you can fast-clear and render to it just fine. All of the code exists and now that we have sane resolves, we can trivially turn it on. Reviewed-by: Topi Pohjolainen--- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 18 -- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 02e74ca..c19d2d5 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -207,13 +207,7 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, if (!brw->format_supported_as_render_target[mt->format]) return false; - if (brw->gen >= 9) { - mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format); - const enum isl_format isl_format = - brw_isl_format_for_mesa_format(linear_format); - return isl_format_supports_ccs_e(>screen->devinfo, isl_format); - } else - return true; + return true; } /* On Gen9 support for color buffer compression was extended to single @@ -257,16 +251,12 @@ intel_miptree_supports_lossless_compressed(struct brw_context *brw, if (_mesa_get_format_datatype(mt->format) == GL_FLOAT) return false; - /* Fast clear mechanism and lossless compression go hand in hand. */ + /* Fast clear support is a pre-requisite for lossless compression */ if (!intel_miptree_supports_non_msrt_fast_clear(brw, mt)) return false; - /* Fast clear can be also used to clear srgb surfaces by using equivalent -* linear format. This trick, however, can't be extended to be used with -* lossless compression and therefore a check is needed to see if the format -* really is linear. -*/ - return _mesa_get_srgb_format_linear(mt->format) == mt->format; + enum isl_format isl_format = brw_isl_format_for_mesa_format(mt->format); + return isl_format_supports_ccs_e(>screen->devinfo, isl_format); } /** -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 101432] make check DispatchSanity_test.GL31_CORE regression
https://bugs.freedesktop.org/show_bug.cgi?id=101432 Michel Dänzerchanged: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #1 from Michel Dänzer --- Thanks for the report, fixed in Git: Module: Mesa Branch: master Commit: 1c00af4264d795bf1fb3d13b7a966722a5984c4a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c00af4264d795bf1fb3d13b7a966722a5984c4a Author: Samuel Pitoiset Date: Wed Jun 14 18:08:09 2017 +0200 mesa: fix 'make check' by moving bindless functions at the right place -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: fix 'make check' by moving bindless functions at the right place
On 15/06/17 01:08 AM, Samuel Pitoiset wrote: > Fixes: 5f249b9f05e ("mapi: add GL_ARB_bindless_texture entry points") > Reported-by: Mark Janes> Signed-off-by: Samuel Pitoiset Pushed, thanks! -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 101432] make check DispatchSanity_test.GL31_CORE regression
https://bugs.freedesktop.org/show_bug.cgi?id=101432 Bug ID: 101432 Summary: make check DispatchSanity_test.GL31_CORE regression Product: Mesa Version: git Hardware: x86-64 (AMD64) OS: All Status: NEW Keywords: regression Severity: normal Priority: medium Component: Mesa core Assignee: mesa-dev@lists.freedesktop.org Reporter: v...@freedesktop.org QA Contact: mesa-dev@lists.freedesktop.org mesa: b6d56c747cbce7b9ca297be1c6f2c2b7ca91842d (master 17.2.0-devel) -- You are receiving this mail because: You are the assignee for the bug. You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] anv/i965: drop libdrm_intel dependency completely
On Wed, Jun 14, 2017 at 5:22 PM, Lionel Landwerlin < lionel.g.landwer...@intel.com> wrote: > With Ken's work to drop the library dependency on libdrm_intel, we now > only depend on libdrm for the kernel uapi headers it provides. It > seems like we're better off just embeddeding those headers ourselves, > making the lives of people developping news features tightly > integrated with the kernel a tiny bit easier. > > This change also makes it a bit more obvious what cflags/libs are > required by the i915 drivers vs i965, by renaming INTEL_CFLAGS/LIBS > into I915_CFLAGS/LIBS. > > Headers were generated from drm-tip on the following commit : > >commit cafd1e4df1e6e039268c4e4b1a55c88915d21f2e >Author: Rodrigo Vivi>Date: Wed Jun 14 12:56:57 2017 -0700 > >drm-tip: 2017y-06m-14d-19h-56m-24s UTC integration manifest > > v2: Use installed files from the kernel (Daniel Vetter) > I'm not even going to pretend to review the build system patches. However, I do think that this is something we should do. Both are Acked-by: Jason Ekstrand > > Signed-off-by: Lionel Landwerlin > --- > configure.ac|6 +- > src/gallium/drivers/i915/Automake.inc |2 +- > src/gallium/targets/pipe-loader/Makefile.am |2 +- > src/gallium/winsys/i915/drm/Makefile.am |2 +- > src/intel/Makefile.drm.am | 22 + > src/intel/Makefile.sources |6 + > src/intel/Makefile.tools.am |3 +- > src/intel/Makefile.vulkan.am|4 +- > src/intel/drm/drm.h | 925 + > src/intel/drm/drm_fourcc.h | 358 +++ > src/intel/drm/drm_mode.h| 739 ++ > src/intel/drm/i915_drm.h| 1459 > +++ > src/mesa/drivers/dri/i915/Makefile.am |4 +- > src/mesa/drivers/dri/i965/Makefile.am |2 +- > 14 files changed, 3520 insertions(+), 14 deletions(-) > create mode 100644 src/intel/Makefile.drm.am > create mode 100644 src/intel/drm/drm.h > create mode 100644 src/intel/drm/drm_fourcc.h > create mode 100644 src/intel/drm/drm_mode.h > create mode 100644 src/intel/drm/i915_drm.h > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10.5/24] st/mesa: don't set sampler states for TBOs
From: Marek Olšák--- This prevents a crash later in the series, and it's generally a good thing to do. src/mesa/state_tracker/st_atom_sampler.c | 11 +-- src/mesa/state_tracker/st_texture.c | 5 +++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 21af5ef..7a85a55 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -227,20 +227,21 @@ void st_convert_sampler_from_unit(const struct st_context *st, struct pipe_sampler_state *sampler, GLuint texUnit) { const struct gl_texture_object *texobj; struct gl_context *ctx = st->ctx; const struct gl_sampler_object *msamp; texobj = ctx->Texture.Unit[texUnit]._Current; assert(texobj); + assert(texobj->Target != GL_TEXTURE_BUFFER); msamp = _mesa_get_samplerobj(ctx, texUnit); st_convert_sampler(st, texobj, msamp, sampler); sampler->lod_bias += ctx->Texture.Unit[texUnit].LodBias; sampler->seamless_cube_map |= ctx->Texture.CubeMapSeamless; } @@ -248,37 +249,43 @@ st_convert_sampler_from_unit(const struct st_context *st, * Update the gallium driver's sampler state for fragment, vertex or * geometry shader stage. */ static void update_shader_samplers(struct st_context *st, enum pipe_shader_type shader_stage, const struct gl_program *prog, struct pipe_sampler_state *samplers, unsigned *out_num_samplers) { + struct gl_context *ctx = st->ctx; GLbitfield samplers_used = prog->SamplersUsed; GLbitfield free_slots = ~prog->SamplersUsed; GLbitfield external_samplers_used = prog->ExternalSamplersUsed; unsigned unit, num_samplers; const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS]; if (samplers_used == 0x0) return; num_samplers = util_last_bit(samplers_used); /* loop over sampler units (aka tex image units) */ for (unit = 0; samplers_used; unit++, samplers_used >>= 1) { struct pipe_sampler_state *sampler = samplers + unit; + unsigned tex_unit = prog->SamplerUnits[unit]; - if (samplers_used & 1) { - st_convert_sampler_from_unit(st, sampler, prog->SamplerUnits[unit]); + /* Don't update the sampler for TBOs. cso_context will not bind sampler + * states that are NULL. + */ + if (samplers_used & 1 && + ctx->Texture.Unit[tex_unit]._Current->Target != GL_TEXTURE_BUFFER) { + st_convert_sampler_from_unit(st, sampler, tex_unit); states[unit] = sampler; } else { states[unit] = NULL; } } /* For any external samplers with multiplaner YUV, stuff the additional * sampler states we need at the end. * * Just re-use the existing sampler-state from the primary slot. diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 9de3b9a..07c3844 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -505,26 +505,27 @@ st_destroy_bound_image_handles(struct st_context *st) * Create a texture handle from a texture unit. */ static GLuint64 st_create_texture_handle_from_unit(struct st_context *st, struct gl_program *prog, GLuint texUnit) { struct gl_context *ctx = st->ctx; struct gl_texture_object *texObj; struct pipe_context *pipe = st->pipe; struct pipe_sampler_view *view; - struct pipe_sampler_state sampler; + struct pipe_sampler_state sampler = {0}; if (!st_update_single_texture(st, , texUnit, prog->sh.data->Version)) return 0; - st_convert_sampler_from_unit(st, , texUnit); + if (view->target != PIPE_BUFFER) + st_convert_sampler_from_unit(st, , texUnit); texObj = ctx->Texture.Unit[texUnit]._Current; assert(texObj); return pipe->create_texture_handle(pipe, view, ); } /** * Create an image handle from an image unit. -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/10] i965: perf: keep on reading reports until delimiting timestamp
Due to an underlying hardware race condition, we have no guarantee that all the reports coming from the OA buffer related to the workload we're trying to measure have landed to memory by the time all the work submitted has completed. That means we need to keep on reading the OA stream until we read a report with a timestamp older than the timestamp recored by the MI_REPORT_PERF_COUNT at the end of the performance query. v2: fix uninitialized offset variable to 0 (Lionel) v3: rework the reading to avoid blocking the user of the API unless requested (Rob) v4: fix a bug that makes the i965 driver reading the perf stream when not necessary, leading to very long counter accumulation times (Lionel) Signed-off-by: Lionel LandwerlinCc: Robert Bragg --- src/mesa/drivers/dri/i965/brw_performance_query.c | 133 ++ 1 file changed, 113 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index d10141bf07a..d11784c0352 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -219,6 +219,7 @@ struct brw_oa_sample_buf { int refcount; int len; uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10]; + uint32_t last_timestamp; }; /** @@ -244,6 +245,11 @@ struct brw_perf_query_object struct brw_bo *bo; /** + * Address of mapped of @bo + */ + void *map; + + /** * The MI_REPORT_PERF_COUNT command lets us specify a unique * ID that will be reflected in the resulting OA report * that's written by the GPU. This is the ID we're expecting @@ -712,11 +718,26 @@ discard_all_queries(struct brw_context *brw) } } -static bool -read_oa_samples(struct brw_context *brw) +enum OaReadStatus { + OA_READ_STATUS_ERROR, + OA_READ_STATUS_UNFINISHED, + OA_READ_STATUS_FINISHED, +}; + +static enum OaReadStatus +read_oa_samples_until(struct brw_context *brw, + uint32_t start_timestamp, + uint32_t end_timestamp) { + struct exec_node *tail_node = + exec_list_get_tail(>perfquery.sample_buffers); + struct brw_oa_sample_buf *tail_buf = + exec_node_data(struct brw_oa_sample_buf, tail_node, link); + uint32_t last_timestamp = tail_buf->last_timestamp; + while (1) { struct brw_oa_sample_buf *buf = get_free_sample_buf(brw); + uint32_t offset; int len; while ((len = read(brw->perfquery.oa_stream_fd, buf->buf, @@ -728,28 +749,94 @@ read_oa_samples(struct brw_context *brw) if (len < 0) { if (errno == EAGAIN) - return true; + return ((last_timestamp - start_timestamp) >= + (end_timestamp - start_timestamp)) ? + OA_READ_STATUS_FINISHED : + OA_READ_STATUS_UNFINISHED; else { DBG("Error reading i915 perf samples: %m\n"); - return false; } - } else { + } else DBG("Spurious EOF reading i915 perf samples\n"); -return false; - } + + return OA_READ_STATUS_ERROR; } buf->len = len; exec_list_push_tail(>perfquery.sample_buffers, >link); + + /* Go through the reports and update the last timestamp. */ + offset = 0; + while (offset < buf->len) { + const struct drm_i915_perf_record_header *header = +(const struct drm_i915_perf_record_header *) >buf[offset]; + uint32_t *report = (uint32_t *) (header + 1); + + if (header->type == DRM_I915_PERF_RECORD_SAMPLE) +last_timestamp = report[1]; + + offset += header->size; + } + + buf->last_timestamp = last_timestamp; } unreachable("not reached"); + return OA_READ_STATUS_ERROR; +} + +/** + * Try to read all the reports until either the delimiting timestamp + * or an error arises. + */ +static bool +read_oa_samples_for_query(struct brw_context *brw, + struct brw_perf_query_object *obj) +{ + uint32_t *start; + uint32_t *last; + uint32_t *end; + + /* We need the MI_REPORT_PERF_COUNT to land before we can start +* accumulate. */ + assert(!brw_batch_references(>batch, obj->oa.bo) && + !brw_bo_busy(obj->oa.bo)); + + /* Map the BO once here and let accumulate_oa_reports() unmap +* it. */ + if (obj->oa.map == NULL) + obj->oa.map = brw_bo_map(brw, obj->oa.bo, MAP_READ); + + start = last = obj->oa.map; + end = obj->oa.map + MI_RPC_BO_END_OFFSET_BYTES; + + if (start[0] != obj->oa.begin_report_id) { + DBG("Spurious start report id=%"PRIu32"\n", start[0]); + return true; + } + if (end[0] != (obj->oa.begin_report_id + 1)) { + DBG("Spurious end report id=%"PRIu32"\n", end[0]); +
[Mesa-dev] [PATCH 07/10] i965: ensure isolated timer reports while idle don't confuse filtering
From: Robert BraggFrom experimentation in IGT, we found that the OA unit might label some report as "idle" (using an invalid context ID), right after a report for a given context. Deltas generated by those reports actually belong to the previous context, even though they're not labelled as such. This change makes ensure that while reading OA reports, we only consider the GPU actually idle after 2 reports with an invalid context ID. Signed-off-by: Lionel Landwerlin --- src/mesa/drivers/dri/i965/brw_performance_query.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index d11784c0352..c6574df302a 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -863,6 +863,7 @@ accumulate_oa_reports(struct brw_context *brw, struct exec_node *first_samples_node; bool in_ctx = true; uint32_t ctx_id; + int out_duration = 0; assert(o->Ready); assert(obj->oa.map != NULL); @@ -937,10 +938,16 @@ accumulate_oa_reports(struct brw_context *brw, if (in_ctx && report[2] != ctx_id) { DBG("i915 perf: Switch AWAY (observed by ID change)\n"); in_ctx = false; + out_duration = 0; } else if (in_ctx == false && report[2] == ctx_id) { DBG("i915 perf: Switch TO\n"); in_ctx = true; - add = false; + + /* We didn't *really* Switch AWAY in the case that we + * e.g. saw a single periodic report while idle... + */ + if (out_duration >= 1) + add = false; } else if (in_ctx) { assert(report[2] == ctx_id); DBG("i915 perf: Continuation IN\n"); @@ -948,6 +955,7 @@ accumulate_oa_reports(struct brw_context *brw, assert(report[2] != ctx_id); DBG("i915 perf: Continuation OUT\n"); add = false; + out_duration++; } } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/10] i965: use gen_device_info rather then brw_context
Signed-off-by: Lionel Landwerlin--- src/mesa/drivers/dri/i965/brw_performance_query.c | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index c6574df302a..45be9b1a988 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -503,9 +503,11 @@ emit_mi_report_perf_count(struct brw_context *brw, uint32_t offset_in_bytes, uint32_t report_id) { + const struct gen_device_info *devinfo = >screen->devinfo; + assert(offset_in_bytes % 64 == 0); - if (brw->gen < 8) { + if (devinfo->gen < 8) { BEGIN_BATCH(3); OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT); OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, @@ -856,6 +858,7 @@ static void accumulate_oa_reports(struct brw_context *brw, struct brw_perf_query_object *obj) { + const struct gen_device_info *devinfo = >screen->devinfo; struct gl_perf_query_object *o = >base; uint32_t *start; uint32_t *last; @@ -934,7 +937,7 @@ accumulate_oa_reports(struct brw_context *brw, * For Haswell we can rely on the HW to stop the progress * of OA counters while any other context is acctive. */ -if (brw->gen >= 8) { +if (devinfo->gen >= 8) { if (in_ctx && report[2] != ctx_id) { DBG("i915 perf: Switch AWAY (observed by ID change)\n"); in_ctx = false; @@ -1603,6 +1606,7 @@ add_basic_stat_reg(struct brw_perf_query_info *query, static void init_pipeline_statistic_query_registers(struct brw_context *brw) { + const struct gen_device_info *devinfo = >screen->devinfo; struct brw_perf_query_info *query = append_query_info(brw); query->kind = PIPELINE_STATS; @@ -1618,7 +1622,7 @@ init_pipeline_statistic_query_registers(struct brw_context *brw) add_basic_stat_reg(query, VS_INVOCATION_COUNT, "N vertex shader invocations"); - if (brw->gen == 6) { + if (devinfo->gen == 6) { add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, "SO_PRIM_STORAGE_NEEDED", "N geometry shader stream-out primitives (total)"); @@ -1667,7 +1671,7 @@ init_pipeline_statistic_query_registers(struct brw_context *brw) add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, "N primitives leaving clipping"); - if (brw->is_haswell || brw->gen == 8) + if (devinfo->is_haswell || devinfo->gen == 8) add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, "N fragment shader invocations", "N fragment shader invocations"); @@ -1677,7 +1681,7 @@ init_pipeline_statistic_query_registers(struct brw_context *brw) add_basic_stat_reg(query, PS_DEPTH_COUNT, "N z-pass fragments"); - if (brw->gen >= 7) + if (devinfo->gen >= 7) add_basic_stat_reg(query, CS_INVOCATION_COUNT, "N compute shader invocations"); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/10] i965: Add Gen8+ INTEL_performance_query support
From: Robert BraggEnables access to OA unit metrics on Gen8+ via INTEL_performance_query. Signed-off-by: Robert Bragg --- src/mesa/drivers/dri/i965/Makefile.am | 8 +- src/mesa/drivers/dri/i965/brw_defines.h | 6 + src/mesa/drivers/dri/i965/brw_performance_query.c | 276 -- 3 files changed, 266 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 31ba460b1f5..3a749cb6d74 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -116,7 +116,7 @@ EXTRA_DIST = \ # .c and .h files in one go so we don't hit problems with parallel # make and multiple invocations of the same script trying to write # to the same files. -brw_oa_hsw.h: brw_oa.py brw_oa_hsw.xml - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py --header=$(builddir)/brw_oa_hsw.h --chipset=hsw $(srcdir)/brw_oa_hsw.xml -brw_oa_hsw.c: brw_oa.py brw_oa_hsw.xml - $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py --code=$(builddir)/brw_oa_hsw.c --chipset=hsw $(srcdir)/brw_oa_hsw.xml +brw_oa_%.h: brw_oa.py brw_oa_%.xml Makefile.am + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py --header=$(builddir)/brw_oa_$(*).h --chipset=$(*) $(srcdir)/brw_oa_$(*).xml +brw_oa_%.c: brw_oa.py brw_oa_%.xml Makefile.am + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py --code=$(builddir)/brw_oa_$(*).c --chipset=$(*) $(srcdir)/brw_oa_$(*).xml diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 312dddafd77..c98f4a699ce 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1350,6 +1350,12 @@ enum brw_pixel_shader_coverage_mask_mode { #define GEN6_MI_REPORT_PERF_COUNT ((0x28 << 23) | (3 - 2)) +#define GEN8_MI_REPORT_PERF_COUNT ((0x28 << 23) | (4 - 2)) + +/* Bitfields for the URB_WRITE message, DW2 of message header: */ +#define URB_WRITE_PRIM_END 0x1 +#define URB_WRITE_PRIM_START 0x2 +#define URB_WRITE_PRIM_TYPE_SHIFT 2 /* Maximum number of entries that can be addressed using a binding table * pointer of type SURFTYPE_BUFFER diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 1c9ddf52ea3..d10141bf07a 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -72,16 +72,33 @@ #include "brw_defines.h" #include "brw_performance_query.h" #include "brw_oa_hsw.h" +#include "brw_oa_bdw.h" +#include "brw_oa_chv.h" +#include "brw_oa_sklgt2.h" +#include "brw_oa_sklgt3.h" +#include "brw_oa_sklgt4.h" +#include "brw_oa_bxt.h" #include "intel_batchbuffer.h" #define FILE_DEBUG_FLAG DEBUG_PERFMON /* - * The largest OA format we can use on Haswell includes: - * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. + * The largest OA formats we can use include: + * For Haswell: + * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. + * For Gen8+ + * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters */ #define MAX_OA_REPORT_COUNTERS 62 +#define OAREPORT_REASON_MASK 0x3f +#define OAREPORT_REASON_SHIFT 19 +#define OAREPORT_REASON_TIMER (1<<0) +#define OAREPORT_REASON_TRIGGER1 (1<<1) +#define OAREPORT_REASON_TRIGGER2 (1<<2) +#define OAREPORT_REASON_CTX_SWITCH (1<<3) +#define OAREPORT_REASON_GO_TRANSITION (1<<4) + #define I915_PERF_OA_SAMPLE_SIZE (8 + /* drm_i915_perf_record_header */ \ 256) /* OA counter report */ @@ -482,12 +499,21 @@ emit_mi_report_perf_count(struct brw_context *brw, { assert(offset_in_bytes % 64 == 0); - BEGIN_BATCH(3); - OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT); - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset_in_bytes); - OUT_BATCH(report_id); - ADVANCE_BATCH(); + if (brw->gen < 8) { + BEGIN_BATCH(3); + OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, +offset_in_bytes); + OUT_BATCH(report_id); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(GEN8_MI_REPORT_PERF_COUNT); + OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + offset_in_bytes); + OUT_BATCH(report_id); + ADVANCE_BATCH(); + } } /** @@ -571,6 +597,28 @@ accumulate_uint32(const uint32_t *report0, *accumulator += (uint32_t)(*report1 - *report0); } +static void +accumulate_uint40(int a_index, + const uint32_t *report0, + const uint32_t *report1, + uint64_t *accumulator) +{ + const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); + const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); +
[Mesa-dev] [PATCH 03/10] i965: perf: fix codegen with single operand equation
We did support single value operand equations, but not single variable operand ones. In particular we were failing on "$Sampler0Bottleneck". Signed-off-by: Lionel Landwerlin--- src/mesa/drivers/dri/i965/brw_oa.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index bf950b140da..254c512a7da 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -214,7 +214,9 @@ def output_rpn_equation_code(set, counter, equation, counter_vars): value = stack[-1] if value in hw_vars: -value = hw_vars[value]; +value = hw_vars[value] +if value in counter_vars: +value = read_funcs[value[1:]] + "(brw, query, accumulator)" c("\nreturn " + value + ";") -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/10] i965: Add Gen8+ sys_vars for generated OA code
From: Robert BraggIn preparation for adding XML OA metric set descriptions for Gen 8 and 9 which will result in auto generated code that depends on a number of new system variables ($EuSubslicesTotalCount, $EuThreadsCount and $SliceMask) this adds corresponding members to brw->perf.sys_vars. Signed-off-by: Robert Bragg --- src/mesa/drivers/dri/i965/brw_context.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c15c0193584..6cce2e536ef 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1088,6 +1088,9 @@ struct brw_context uint64_t timestamp_frequency; /** $GpuTimestampFrequency */ uint64_t n_eus; /** $EuCoresTotalCount */ uint64_t n_eu_slices; /** $EuSlicesTotalCount */ + uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */ + uint64_t eu_threads_count;/** $EuThreadsCount */ + uint64_t slice_mask; /** $SliceMask */ uint64_t subslice_mask; /** $SubsliceMask */ uint64_t gt_min_freq; /** $GpuMinFrequency */ uint64_t gt_max_freq; /** $GpuMaxFrequency */ -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 00/10] i965: Add performance query OA support on Gen 8 & 9
Hi, This series has been used by a few people for some time already. Now that the kernel patches are making their way to upstream, here are the userspace side ones. This series depends on [1] which includes kernel header files to the tree, so we don't have to depend on a specific version of libdrm to have this compile. Cheers, [1] : https://patchwork.freedesktop.org/series/25801/ Lionel Landwerlin (6): intel: common: add flag to identify platforms by name i965: perf: fix codegen with single operand equation i965: perf: keep on reading reports until delimiting timestamp i965: use gen_device_info rather then brw_context i965: perf: add support for Kabylake i965: perf: add support for Geminilake Robert Bragg (4): i965: Add Gen8+ sys_vars for generated OA code i965: Add XML OA metric sets for Gen8+ i965: Add Gen8+ INTEL_performance_query support i965: ensure isolated timer reports while idle don't confuse filtering src/intel/common/gen_device_info.c|23 +- src/intel/common/gen_device_info.h| 3 + src/mesa/drivers/dri/i965/Makefile.am |17 +- src/mesa/drivers/dri/i965/Makefile.sources|20 +- src/mesa/drivers/dri/i965/brw_context.h | 3 + src/mesa/drivers/dri/i965/brw_defines.h | 6 + src/mesa/drivers/dri/i965/brw_oa.py | 4 +- src/mesa/drivers/dri/i965/brw_oa_bdw.xml | 15051 src/mesa/drivers/dri/i965/brw_oa_bxt.xml | 9211 src/mesa/drivers/dri/i965/brw_oa_chv.xml | 9569 + src/mesa/drivers/dri/i965/brw_oa_glk.xml | 9124 src/mesa/drivers/dri/i965/brw_oa_hsw.xml |26 +- src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml | 10455 ++ src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml | 10500 ++ src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml | 10925 ++ src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml | 10499 ++ src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml | 10522 ++ src/mesa/drivers/dri/i965/brw_performance_query.c | 436 +- 18 files changed, 96326 insertions(+), 68 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_oa_bdw.xml create mode 100644 src/mesa/drivers/dri/i965/brw_oa_bxt.xml create mode 100644 src/mesa/drivers/dri/i965/brw_oa_chv.xml create mode 100644 src/mesa/drivers/dri/i965/brw_oa_glk.xml create mode 100644 src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml create mode 100644 src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml create mode 100644 src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml create mode 100644 src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml create mode 100644 src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/10] intel: common: add flag to identify platforms by name
The perf infrastructure needs to identify specific platforms, not just generations. Signed-off-by: Lionel Landwerlin--- src/intel/common/gen_device_info.c | 23 +-- src/intel/common/gen_device_info.h | 3 +++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/intel/common/gen_device_info.c b/src/intel/common/gen_device_info.c index 75284a66419..fee112ce25b 100644 --- a/src/intel/common/gen_device_info.c +++ b/src/intel/common/gen_device_info.c @@ -412,7 +412,6 @@ static const struct gen_device_info gen_device_info_chv = { #define GEN9_LP_FEATURES \ GEN9_FEATURES, \ - .is_broxton = 1,\ .gt = 1,\ .has_llc = false, \ .num_slices = 1,\ @@ -463,6 +462,7 @@ static const struct gen_device_info gen_device_info_chv = { static const struct gen_device_info gen_device_info_skl_gt1 = { GEN9_FEATURES, .gt = 1, + .is_skylake = true, .num_slices = 1, .l3_banks = 2, .urb.size = 192, @@ -470,18 +470,21 @@ static const struct gen_device_info gen_device_info_skl_gt1 = { static const struct gen_device_info gen_device_info_skl_gt2 = { GEN9_FEATURES, .gt = 2, + .is_skylake = true, .num_slices = 1, .l3_banks = 4, }; static const struct gen_device_info gen_device_info_skl_gt3 = { GEN9_FEATURES, .gt = 3, + .is_skylake = true, .num_slices = 2, .l3_banks = 8, }; static const struct gen_device_info gen_device_info_skl_gt4 = { GEN9_FEATURES, .gt = 4, + .is_skylake = true, .num_slices = 3, .l3_banks = 12, /* From the "L3 Allocation and Programming" documentation: @@ -497,11 +500,13 @@ static const struct gen_device_info gen_device_info_skl_gt4 = { static const struct gen_device_info gen_device_info_bxt = { GEN9_LP_FEATURES, + .is_broxton = true, .l3_banks = 2, }; static const struct gen_device_info gen_device_info_bxt_2x6 = { GEN9_LP_FEATURES_2X6, + .is_broxton = true, .l3_banks = 1, }; /* @@ -570,12 +575,14 @@ static const struct gen_device_info gen_device_info_kbl_gt4 = { static const struct gen_device_info gen_device_info_glk = { GEN9_LP_FEATURES, + .is_geminilake = true, .l3_banks = 2, }; /*TODO: Initialize l3_banks when we know the number. */ static const struct gen_device_info gen_device_info_glk_2x6 = { - GEN9_LP_FEATURES_2X6 + GEN9_LP_FEATURES_2X6, + .is_geminilake = true, }; #define GEN10_HW_INFO \ @@ -606,22 +613,26 @@ static const struct gen_device_info gen_device_info_glk_2x6 = { static const struct gen_device_info gen_device_info_cnl_2x8 = { /* GT0.5 */ - GEN10_FEATURES(1, 1, 2) + GEN10_FEATURES(1, 1, 2), + .is_cannonlake = true, }; static const struct gen_device_info gen_device_info_cnl_3x8 = { /* GT1 */ - GEN10_FEATURES(1, 1, 3) + GEN10_FEATURES(1, 1, 3), + .is_cannonlake = true, }; static const struct gen_device_info gen_device_info_cnl_4x8 = { /* GT 1.5 */ - GEN10_FEATURES(1, 2, 6) + GEN10_FEATURES(1, 2, 6), + .is_cannonlake = true, }; static const struct gen_device_info gen_device_info_cnl_5x8 = { /* GT2 */ - GEN10_FEATURES(2, 2, 6) + GEN10_FEATURES(2, 2, 6), + .is_cannonlake = true, }; bool diff --git a/src/intel/common/gen_device_info.h b/src/intel/common/gen_device_info.h index 62076305194..2dd3d1b7688 100644 --- a/src/intel/common/gen_device_info.h +++ b/src/intel/common/gen_device_info.h @@ -40,8 +40,11 @@ struct gen_device_info bool is_baytrail; bool is_haswell; bool is_cherryview; + bool is_skylake; bool is_broxton; bool is_kabylake; + bool is_geminilake; + bool is_cannonlake; bool has_hiz_and_separate_stencil; bool must_use_separate_stencil; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] aubinator: import intel_aub.h from libdrm
This enables us to compile aubinator without the libdrm dependency. Signed-off-by: Lionel Landwerlin--- src/intel/tools/intel_aub.h | 153 1 file changed, 153 insertions(+) create mode 100644 src/intel/tools/intel_aub.h diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h new file mode 100644 index 000..5f0aba8e68e --- /dev/null +++ b/src/intel/tools/intel_aub.h @@ -0,0 +1,153 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Eric Anholt + * + */ + +/** @file intel_aub.h + * + * The AUB file is a file format used by Intel's internal simulation + * and other validation tools. It can be used at various levels by a + * driver to input state to the simulated hardware or a replaying + * debugger. + * + * We choose to dump AUB files using the trace block format for ease + * of implementation -- dump out the blocks of memory as plain blobs + * and insert ring commands to execute the batchbuffer blob. + */ + +#ifndef _INTEL_AUB_H +#define _INTEL_AUB_H + +#define AUB_MI_NOOP(0) +#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) +#define AUB_PIPE_CONTROL (0x7a02) + +/* DW0: instruction type. */ + +#define CMD_AUB(7 << 29) + +#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) +/* DW1 */ +# define AUB_HEADER_MAJOR_SHIFT24 +# define AUB_HEADER_MINOR_SHIFT16 + +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) +#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) + +/* DW1 */ +#define AUB_TRACE_OPERATION_MASK 0x00ff +#define AUB_TRACE_OP_COMMENT 0x +#define AUB_TRACE_OP_DATA_WRITE0x0001 +#define AUB_TRACE_OP_COMMAND_WRITE 0x0002 +#define AUB_TRACE_OP_MMIO_WRITE0x0003 +// operation = TRACE_DATA_WRITE, Type +#define AUB_TRACE_TYPE_MASK0xff00 +#define AUB_TRACE_TYPE_NOTYPE (0 << 8) +#define AUB_TRACE_TYPE_BATCH (1 << 8) +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) +#define AUB_TRACE_TYPE_2D_MAP (6 << 8) +#define AUB_TRACE_TYPE_CUBE_MAP(7 << 8) +#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) +#define AUB_TRACE_TYPE_1D_MAP (10 << 8) +#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) +#define AUB_TRACE_TYPE_CONSTANT_URB(12 << 8) +#define AUB_TRACE_TYPE_INDEX_BUFFER(13 << 8) +#define AUB_TRACE_TYPE_GENERAL (14 << 8) +#define AUB_TRACE_TYPE_SURFACE (15 << 8) + + +// operation = TRACE_COMMAND_WRITE, Type = +#define AUB_TRACE_TYPE_RING_HWB(1 << 8) +#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) +#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) +#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) + +// Address space +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff +#define AUB_TRACE_MEMTYPE_GTT (0 << 16) +#define AUB_TRACE_MEMTYPE_LOCAL(1 << 16) +#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) +#define AUB_TRACE_MEMTYPE_PCI (3 << 16) +#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) + +/* DW2 */ + +/** + * aub_state_struct_type enum values are encoded with the top 16 bits + * representing the type to be delivered to the .aub file, and the bottom 16 + * bits representing the subtype. This macro performs the encoding. + */ +#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) + +enum aub_state_struct_type { + AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), + AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), + AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), + AUB_TRACE_SF_STATE =
[Mesa-dev] [PATCH 0/2] Anv/i965: Drop dependency on libdrm's kernel headers
Hi, While working with changes that span from kernel to user space, I've been wondering whether we need to depend on libdrm's header files at all for the anv & i965 drivers. Indeed with Ken's recent changes, we depend on libdrm for 1 or 2 functions wrapping an ioctl (with drmGetDevices2 being the only function actually containing so logic) and for its kernel header files. The latter which we could just embed ourselves given how the userspace & kernelspace drivers closely interact. I've only included the minimal set of header files we need from the kernel for anv & i965. Maybe other drivers would be interested and maybe we should put all the kernel drm uapi headers into include? Cheers, Lionel Landwerlin (2): aubinator: import intel_aub.h from libdrm anv/i965: drop libdrm_intel dependency completely configure.ac|6 +- src/gallium/drivers/i915/Automake.inc |2 +- src/gallium/targets/pipe-loader/Makefile.am |2 +- src/gallium/winsys/i915/drm/Makefile.am |2 +- src/intel/Makefile.drm.am | 22 + src/intel/Makefile.sources |6 + src/intel/Makefile.tools.am |3 +- src/intel/Makefile.vulkan.am|4 +- src/intel/drm/drm.h | 925 + src/intel/drm/drm_fourcc.h | 358 +++ src/intel/drm/drm_mode.h| 739 ++ src/intel/drm/i915_drm.h| 1459 +++ src/intel/tools/intel_aub.h | 153 +++ src/mesa/drivers/dri/i915/Makefile.am |4 +- src/mesa/drivers/dri/i965/Makefile.am |2 +- 15 files changed, 3673 insertions(+), 14 deletions(-) create mode 100644 src/intel/Makefile.drm.am create mode 100644 src/intel/drm/drm.h create mode 100644 src/intel/drm/drm_fourcc.h create mode 100644 src/intel/drm/drm_mode.h create mode 100644 src/intel/drm/i915_drm.h create mode 100644 src/intel/tools/intel_aub.h -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/9] i965: Use snoop bo for accessing query results on !llc
On 06/09/2017 06:01 AM, Chris Wilson wrote: > Ony non-llc architectures where we are primarily reading back the On > results of the GPU queries, then we can improve performance by using a > cacheable mapping of the results. Unfortunately, enabling snooping makes > the writes from the GPU slower, which may adversely affect pipelined > query operations (where the results are used directly by the GPU and not > CPU). > > Signed-off-by: Chris Wilson> Cc: Kenneth Graunke > Cc: Matt Turner > --- > src/mesa/drivers/dri/i965/brw_bufmgr.c| 21 + > src/mesa/drivers/dri/i965/brw_bufmgr.h| 2 ++ > src/mesa/drivers/dri/i965/gen6_queryobj.c | 2 ++ > 3 files changed, 25 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > index 9028b538c6..824bc55fb2 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > @@ -626,6 +626,27 @@ brw_bo_unreference(struct brw_bo *bo) > } > } > > +static bool __brw_bo_set_caching(struct brw_bo *bo, int caching) > +{ > + struct drm_i915_gem_caching arg = { > + .handle = bo->gem_handle, > + .caching = caching > + }; > + return drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, ) == > 0; > +} > + > +void brw_bo_set_cache_coherent(struct brw_bo *bo) > +{ > + if (bo->cache_coherent) > + return; > + > + if (!__brw_bo_set_caching(bo, I915_CACHING_CACHED)) > + return; > + > + bo->reusable = false; > + bo->cache_coherent = true; > +} > + > static void > set_domain(struct brw_context *brw, const char *action, > struct brw_bo *bo, uint32_t read_domains, uint32_t write_domain) > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h > b/src/mesa/drivers/dri/i965/brw_bufmgr.h > index 214b75bf1a..188d6c5ee0 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h > @@ -188,6 +188,8 @@ void brw_bo_unreference(struct brw_bo *bo); > #define MAP_INTERNAL_MASK (0xff << 24) > #define MAP_RAW (0x01 << 24) > > +void brw_bo_set_cache_coherent(struct brw_bo *bo); > + > /** > * Maps the buffer into userspace. > * > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c > b/src/mesa/drivers/dri/i965/gen6_queryobj.c > index 18af608166..5c95a4bae9 100644 > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c > @@ -316,6 +316,8 @@ static int gen6_alloc_query(struct brw_context *brw, >brw_bo_unreference(query->bo); > > query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096); > + brw_bo_set_cache_coherent(query->bo); > + > query->results = brw_bo_map(brw, query->bo, > MAP_READ | MAP_COHERENT | MAP_ASYNC); > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/9] i965: Replace open-coded gen6 queryobj offsets with simple helpers
On 06/09/2017 06:01 AM, Chris Wilson wrote: > Lots of places open-coded the assumed layout of the predicate/results > within the query object, replace those with simple helpers. > > Signed-off-by: Chris Wilson> Cc: Kenneth Graunke > Cc: Matt Turner > --- > src/mesa/drivers/dri/i965/brw_conditional_render.c | 4 ++-- > src/mesa/drivers/dri/i965/brw_context.h| 14 ++ > src/mesa/drivers/dri/i965/gen6_queryobj.c | 6 +++--- > src/mesa/drivers/dri/i965/hsw_queryobj.c | 18 +- > 4 files changed, 28 insertions(+), 14 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c > b/src/mesa/drivers/dri/i965/brw_conditional_render.c > index 046a42b5f5..197c35efe2 100644 > --- a/src/mesa/drivers/dri/i965/brw_conditional_render.c > +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c > @@ -66,13 +66,13 @@ set_predicate_for_occlusion_query(struct brw_context *brw, > query->bo, > I915_GEM_DOMAIN_INSTRUCTION, > 0, /* write domain */ > - 0 /* offset */); > + gen6_query_results_offset(query, 0)); > brw_load_register_mem64(brw, > MI_PREDICATE_SRC1, > query->bo, > I915_GEM_DOMAIN_INSTRUCTION, > 0, /* write domain */ > - 8 /* offset */); > + gen6_query_results_offset(query, 1)); > } > > static void > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index d1503312d4..c5acb83ad0 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -427,6 +427,20 @@ struct brw_query_object { > bool flushed; > }; > > +#define GEN6_QUERY_PREDICATE (2) > +#define GEN6_QUERY_RESULTS (0) > + > +static inline unsigned gen6_query_predicate_offset(const struct > brw_query_object *query) static inline unsigned gen6_query_predicate_offset(const struct brw_query_object *query) > +{ > + return GEN6_QUERY_PREDICATE * sizeof(uint64_t); > +} > + > +static inline unsigned gen6_query_results_offset(const struct > brw_query_object *query, > +unsigned idx) ditto. > +{ > + return (GEN6_QUERY_RESULTS + idx) * sizeof(uint64_t); > +} > + > enum brw_gpu_ring { > UNKNOWN_RING, > RENDER_RING, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] anv: FORMAT_FEATURE_TRANSFER_SRC/DST_BIT_KHR not used with VkFormatProperties.bufferFeatures
Out of curiosity, does this fix a test (maybe upcoming one)? Otherwise sounds fair : Reviewed-by: Lionel LandwerlinThanks! On 14/06/17 17:55, Andres Gomez wrote: VK_FORMAT_FEATURE_TRANSFER_[SRC|DST]_BIT_KHR is a flag value of the VkFormatFeatureFlagBits enum that can only be hold and checked against the linearTilingFeatures or optimalTilingFeatures members of the VkFormatProperties struct but not the bufferFeatures member. >From the Vulkan® 1.0.51, with the VK_KHR_maintenance1 extension, section 32.3.2 docs for VkFormatProperties: "* linearTilingFeatures is a bitmask of VkFormatFeatureFlagBits specifying features supported by images created with a tiling parameter of VK_IMAGE_TILING_LINEAR. * optimalTilingFeatures is a bitmask of VkFormatFeatureFlagBits specifying features supported by images created with a tiling parameter of VK_IMAGE_TILING_OPTIMAL. * bufferFeatures is a bitmask of VkFormatFeatureFlagBits specifying features supported by buffers." ... Bits which can be set in the VkFormatProperties features linearTilingFeatures, optimalTilingFeatures, and bufferFeatures are: typedef enum VkFormatFeatureFlagBits { ... VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR = 0x4000, VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR = 0x8000, ... } VkFormatFeatureFlagBits; ... The following bits may be set in linearTilingFeatures and optimalTilingFeatures, specifying that the features are supported by images or image views created with the queried vkGetPhysicalDeviceFormatProperties::format: ... * VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR specifies that an image can be used as a source image for copy commands. * VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR specifies that an image can be used as a destination image for copy commands and clear commands." Cc: Jason Ekstrand Cc: Iago Toral Quiroga Cc: Lionel Landwerlin Signed-off-by: Andres Gomez --- src/intel/vulkan/anv_formats.c | 5 - 1 file changed, 5 deletions(-) diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 104d4f7a5fe..0bc81d12dab 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -378,11 +378,6 @@ get_buffer_format_properties(const struct gen_device_info *devinfo, if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT) flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; - if (flags) { - flags |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; - } - return flags; } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 9/9] i965: Pack simple pipelined query objects into the same buffer
On Friday, June 9, 2017 6:01:40 AM PDT Chris Wilson wrote: > Reuse the same query object buffer for multiple queries within the same > batch. > > A task for the future is propagating the GL_NO_MEMORY errors. > > Signed-off-by: Chris Wilson> Cc: Kenneth Graunke > Cc: Matt Turner > --- > src/mesa/drivers/dri/i965/brw_context.c | 3 +++ > src/mesa/drivers/dri/i965/brw_context.h | 10 --- > src/mesa/drivers/dri/i965/brw_queryobj.c | 16 +-- > src/mesa/drivers/dri/i965/gen6_queryobj.c | 44 > ++- > 4 files changed, 55 insertions(+), 18 deletions(-) The benefit is saving memory, right? The downside seems to be increased WaitQuery() latencies: - Start Query A - End Query A - Start Query B - Batch Flush - End Query B - WaitQuery for A The query BO also contains B, and both batches refer to it, so it seems like WaitQuery() would wait for two batches to complete instead of one. --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/9] i965: Use snoop bo for accessing query results on !llc
On Friday, June 9, 2017 6:01:38 AM PDT Chris Wilson wrote: > Ony non-llc architectures where we are primarily reading back the > results of the GPU queries, then we can improve performance by using a > cacheable mapping of the results. Unfortunately, enabling snooping makes > the writes from the GPU slower, which may adversely affect pipelined > query operations (where the results are used directly by the GPU and not > CPU). We're essentially writing two DWords, and reading two DWords - so we aren't primarily reading. However, with your next patch, where we want to be able to asynchronously poll the status via CheckQuery()...we'll be reading a bunch more. It might make sense to mention this polling in the commit message. > > Signed-off-by: Chris Wilson> Cc: Kenneth Graunke > Cc: Matt Turner > --- > src/mesa/drivers/dri/i965/brw_bufmgr.c| 21 + > src/mesa/drivers/dri/i965/brw_bufmgr.h| 2 ++ > src/mesa/drivers/dri/i965/gen6_queryobj.c | 2 ++ > 3 files changed, 25 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > index 9028b538c6..824bc55fb2 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > @@ -626,6 +626,27 @@ brw_bo_unreference(struct brw_bo *bo) > } > } > > +static bool __brw_bo_set_caching(struct brw_bo *bo, int caching) static bool __brw_bo_set_caching(struct brw_bo *bo, int caching) > +{ > + struct drm_i915_gem_caching arg = { > + .handle = bo->gem_handle, > + .caching = caching > + }; > + return drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, ) == > 0; > +} > + > +void brw_bo_set_cache_coherent(struct brw_bo *bo) void brw_bo_set_cache_coherent(struct brw_bo *bo) With those three changes, Reviewed-by: Kenneth Graunke > +{ > + if (bo->cache_coherent) > + return; > + > + if (!__brw_bo_set_caching(bo, I915_CACHING_CACHED)) > + return; > + > + bo->reusable = false; > + bo->cache_coherent = true; > +} > + > static void > set_domain(struct brw_context *brw, const char *action, > struct brw_bo *bo, uint32_t read_domains, uint32_t write_domain) > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h > b/src/mesa/drivers/dri/i965/brw_bufmgr.h > index 214b75bf1a..188d6c5ee0 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h > @@ -188,6 +188,8 @@ void brw_bo_unreference(struct brw_bo *bo); > #define MAP_INTERNAL_MASK (0xff << 24) > #define MAP_RAW (0x01 << 24) > > +void brw_bo_set_cache_coherent(struct brw_bo *bo); > + > /** > * Maps the buffer into userspace. > * > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c > b/src/mesa/drivers/dri/i965/gen6_queryobj.c > index 18af608166..5c95a4bae9 100644 > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c > @@ -316,6 +316,8 @@ static int gen6_alloc_query(struct brw_context *brw, >brw_bo_unreference(query->bo); > > query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096); > + brw_bo_set_cache_coherent(query->bo); > + > query->results = brw_bo_map(brw, query->bo, > MAP_READ | MAP_COHERENT | MAP_ASYNC); > > signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 8/9] i965: Use 'available' fence for polling query results
On Friday, June 9, 2017 6:01:39 AM PDT Chris Wilson wrote: > If we always write the 'available' flag after writing the final result > of the query, we can probe that predicate to quickly query whether the > result is ready from userspace. The primary advantage of checking the > predicate is that it allows for more fine-grained queries, we do not > have to wait for the batch to finish before the query is marked as > ready. > > We still do check the status of the batch after probing the query so > that if the worst happens and the batch did hang without completing the > query, we do not spin forever (although it is not as nice as completely > eliminating the ioctl, the busy-ioctl is lightweight!). > > Signed-off-by: Chris Wilson> Cc: Kenneth Graunke > Cc: Matt Turner > --- > src/mesa/drivers/dri/i965/brw_context.h | 4 +-- > src/mesa/drivers/dri/i965/gen6_queryobj.c | 54 > +-- > 2 files changed, 25 insertions(+), 33 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 117b1ecdca..44e0d31c6d 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -428,8 +428,8 @@ struct brw_query_object { > bool flushed; > }; > > -#define GEN6_QUERY_PREDICATE (2) > -#define GEN6_QUERY_RESULTS (0) > +#define GEN6_QUERY_PREDICATE (0) > +#define GEN6_QUERY_RESULTS (1) > > static inline unsigned gen6_query_predicate_offset(const struct > brw_query_object *query) > { > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c > b/src/mesa/drivers/dri/i965/gen6_queryobj.c > index 5c95a4bae9..ae7fd06c1c 100644 > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c > @@ -40,8 +40,7 @@ > #include "intel_buffer_objects.h" > > static inline void > -set_query_availability(struct brw_context *brw, struct brw_query_object > *query, > - bool available) > +set_query_available(struct brw_context *brw, struct brw_query_object *query) > { > /* For platforms that support ARB_query_buffer_object, we write the > * query availability for "pipelined" queries. > @@ -58,22 +57,12 @@ set_query_availability(struct brw_context *brw, struct > brw_query_object *query, > * PIPE_CONTROL with an immediate write will synchronize with > * those earlier writes, so we write 1 when the value has landed. > */ > - if (brw->ctx.Extensions.ARB_query_buffer_object && > - brw_is_query_pipelined(query)) { > - unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE; > > - if (available) { > - /* Order available *after* the query results. */ > - flags |= PIPE_CONTROL_FLUSH_ENABLE; > - } else { > - /* Make it unavailable *before* any pipelined reads. */ > - flags |= PIPE_CONTROL_CS_STALL; > - } > - > - brw_emit_pipe_control_write(brw, flags, > - query->bo, > gen6_query_predicate_offset(query), > - available, 0); > - } > + brw_emit_pipe_control_write(brw, > + PIPE_CONTROL_WRITE_IMMEDIATE | > + PIPE_CONTROL_FLUSH_ENABLE, > + query->bo, gen6_query_predicate_offset(query), > + true, 0); > } > > static void > @@ -139,12 +128,12 @@ write_xfb_overflow_streams(struct gl_context *ctx, > } > > static bool > -check_xfb_overflow_streams(uint64_t *results, int count) > +check_xfb_overflow_streams(const uint64_t *results, int count) > { > bool overflow = false; > > for (int i = 0; i < count; i++) { > - uint64_t *result_i = [4 * i]; > + const uint64_t *result_i = [4 * i]; > >if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) { > overflow = true; > @@ -214,16 +203,14 @@ emit_pipeline_stat(struct brw_context *brw, struct > brw_bo *bo, > */ > static void > gen6_queryobj_get_results(struct gl_context *ctx, > - struct brw_query_object *query) > + struct brw_query_object *query, > + const uint64_t *results) > { > struct brw_context *brw = brw_context(ctx); > > if (query->bo == NULL) >return; > > - brw_bo_map_sync(brw, query->bo, MAP_READ | MAP_COHERENT); > - uint64_t *results = query->results; > - > switch (query->Base.Target) { > case GL_TIME_ELAPSED: >/* The query BO contains the starting and ending timestamps. > @@ -319,10 +306,10 @@ static int gen6_alloc_query(struct brw_context *brw, > brw_bo_set_cache_coherent(query->bo); > > query->results = brw_bo_map(brw, query->bo, > - MAP_READ | MAP_COHERENT | MAP_ASYNC); > + MAP_READ | MAP_WRITE | MAP_COHERENT | >
Re: [Mesa-dev] [PATCH 04/15] i965: Prepare up/downsampling for isl based miptrees
On Tue, Jun 13, 2017 at 05:50:02PM +0300, Topi Pohjolainen wrote: > Signed-off-by: Topi Pohjolainen> --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 46 > ++- > 1 file changed, 38 insertions(+), 8 deletions(-) > Patches 2-4 are Reviewed-by: Nanley Chery > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index 78a223a7f3..061860cdf6 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -2800,27 +2800,57 @@ intel_miptree_updownsample(struct brw_context *brw, > struct intel_mipmap_tree *src, > struct intel_mipmap_tree *dst) > { > + unsigned src_w, src_h, dst_w, dst_h; > + > + if (src->surf.size > 0) { > + src_w = src->surf.logical_level0_px.width; > + src_h = src->surf.logical_level0_px.height; > + } else { > + src_w = src->logical_width0; > + src_h = src->logical_height0; > + } > + > + if (dst->surf.size > 0) { > + dst_w = dst->surf.logical_level0_px.width; > + dst_h = dst->surf.logical_level0_px.height; > + } else { > + dst_w = dst->logical_width0; > + dst_h = dst->logical_height0; > + } > + > brw_blorp_blit_miptrees(brw, > src, 0 /* level */, 0 /* layer */, > src->format, SWIZZLE_XYZW, > dst, 0 /* level */, 0 /* layer */, dst->format, > - 0, 0, > - src->logical_width0, src->logical_height0, > - 0, 0, > - dst->logical_width0, dst->logical_height0, > + 0, 0, src_w, src_h, > + 0, 0, dst_w, dst_h, > GL_NEAREST, false, false /*mirror x, y*/, > false, false); > > if (src->stencil_mt) { > + if (src->stencil_mt->surf.size > 0) { > + src_w = src->stencil_mt->surf.logical_level0_px.width; > + src_h = src->stencil_mt->surf.logical_level0_px.height; > + } else { > + src_w = src->stencil_mt->logical_width0; > + src_h = src->stencil_mt->logical_height0; > + } > + > + if (dst->stencil_mt->surf.size > 0) { > + dst_w = dst->stencil_mt->surf.logical_level0_px.width; > + dst_h = dst->stencil_mt->surf.logical_level0_px.height; > + } else { > + dst_w = dst->stencil_mt->logical_width0; > + dst_h = dst->stencil_mt->logical_height0; > + } > + >brw_blorp_blit_miptrees(brw, >src->stencil_mt, 0 /* level */, 0 /* layer */, >src->stencil_mt->format, SWIZZLE_XYZW, >dst->stencil_mt, 0 /* level */, 0 /* layer */, >dst->stencil_mt->format, > - 0, 0, > - src->logical_width0, src->logical_height0, > - 0, 0, > - dst->logical_width0, dst->logical_height0, > + 0, 0, src_w, src_h, > + 0, 0, dst_w, dst_h, >GL_NEAREST, false, false /*mirror x, y*/, >false, false /* decode/encode srgb */); > } > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/9] i965: Map the query results for the life of the bo
On Wednesday, June 14, 2017 3:50:12 PM PDT Kenneth Graunke wrote: > On Friday, June 9, 2017 6:01:37 AM PDT Chris Wilson wrote: > > If we map the bo upon creation, we can avoid the latency of mmapping it > > when querying, and later use the asynchronous, persistent map of the > > predicate to do a quick query. > > > > Signed-off-by: Chris Wilson> > Cc: Kenneth Graunke > > Cc: Matt Turner > > --- > > src/mesa/drivers/dri/i965/brw_bufmgr.c| 15 + > > src/mesa/drivers/dri/i965/brw_bufmgr.h| 2 ++ > > src/mesa/drivers/dri/i965/brw_context.h | 1 + > > src/mesa/drivers/dri/i965/gen6_queryobj.c | 37 > > ++- > > 4 files changed, 44 insertions(+), 11 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > > index 01590a0b0a..9028b538c6 100644 > > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > > @@ -775,6 +775,21 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, > > unsigned flags) > >return brw_bo_map_gtt(brw, bo, flags); > > } > > > > +void > > +brw_bo_map_sync(struct brw_context *brw, struct brw_bo *bo, unsigned flags) > > +{ > > + unsigned domain; > > + > > + if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW)) > > + domain = I915_GEM_DOMAIN_GTT; > > + else if (can_map_cpu(bo, flags)) > > + domain = I915_GEM_DOMAIN_CPU; > > + else > > + domain = I915_GEM_DOMAIN_GTT; > > + > > + set_domain(brw, __func__, bo, domain, flags & MAP_WRITE ? domain : 0); > > +} > > + > > int > > brw_bo_unmap(struct brw_bo *bo) > > { > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h > > b/src/mesa/drivers/dri/i965/brw_bufmgr.h > > index 3a397be695..214b75bf1a 100644 > > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h > > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h > > @@ -196,6 +196,8 @@ void brw_bo_unreference(struct brw_bo *bo); > > */ > > MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, > > unsigned flags); > > > > +void brw_bo_map_sync(struct brw_context *brw, struct brw_bo *bo, unsigned > > flags); > > + > > /** > > * Reduces the refcount on the userspace mapping of the buffer > > * object. > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > > b/src/mesa/drivers/dri/i965/brw_context.h > > index c5acb83ad0..117b1ecdca 100644 > > --- a/src/mesa/drivers/dri/i965/brw_context.h > > +++ b/src/mesa/drivers/dri/i965/brw_context.h > > @@ -419,6 +419,7 @@ struct brw_query_object { > > > > /** Last query BO associated with this query. */ > > struct brw_bo *bo; > > + uint64_t *results; > > > > /** Last index in bo with query data for this object. */ > > int last_index; > > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c > > b/src/mesa/drivers/dri/i965/gen6_queryobj.c > > index f913f986ae..18af608166 100644 > > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c > > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c > > @@ -221,7 +221,9 @@ gen6_queryobj_get_results(struct gl_context *ctx, > > if (query->bo == NULL) > >return; > > > > - uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ); > > + brw_bo_map_sync(brw, query->bo, MAP_READ | MAP_COHERENT); > > + uint64_t *results = query->results; > > + > > switch (query->Base.Target) { > > case GL_TIME_ELAPSED: > >/* The query BO contains the starting and ending timestamps. > > @@ -296,7 +298,6 @@ gen6_queryobj_get_results(struct gl_context *ctx, > > default: > >unreachable("Unrecognized query target in > > brw_queryobj_get_results()"); > > } > > - brw_bo_unmap(query->bo); > > > > /* Now that we've processed the data stored in the query's buffer > > object, > > * we can release it. > > @@ -307,6 +308,23 @@ gen6_queryobj_get_results(struct gl_context *ctx, > > query->Base.Ready = true; > > } > > > > +static int gen6_alloc_query(struct brw_context *brw, > > +struct brw_query_object *query) > > +{ > > + /* Since we're starting a new query, we need to throw away old results. > > */ > > + if (query->bo) > > + brw_bo_unreference(query->bo); > > + > > + query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096); > > + query->results = brw_bo_map(brw, query->bo, > > + MAP_READ | MAP_COHERENT | MAP_ASYNC); > > I don't understand why you're using MAP_ASYNC here. We're allocating a new > BO here, and not using the BO_ALLOC_FOR_RENDER flag, so it will be idle. > (brw_bufmgr.c:297 should ensure we never get a busy BO - if the cached BOs > are busy, it will just allocate us a new one.) > > So, MAP_ASYNC shouldn't avoid a stall. It does, however, skip the > SET_DOMAIN call, which means that it may not have the right domain > for our new coherent mapping. Hence, you need to whack it
Re: [Mesa-dev] [PATCH 03/11] intel/genxml: Combine DataDWord{0, 1} fields in to ImmediateData field
Reviewed-by: Rafael AntognolliOn Tue, Jun 13, 2017 at 11:28:22AM -0700, Anuj Phogat wrote: > Signed-off-by: Anuj Phogat > --- > src/intel/genxml/gen10.xml | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml > index 04d89cb..64041c1 100644 > --- a/src/intel/genxml/gen10.xml > +++ b/src/intel/genxml/gen10.xml > @@ -3386,8 +3386,7 @@ > > > > - > - > + > > > > -- > 2.9.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 05/11] intel/genxml: Rename StartInstanceLocation to StartingInstanceLocation
Hi Anuj, On Tue, Jun 13, 2017 at 11:28:24AM -0700, Anuj Phogat wrote: > This is required because we already have a macro defined with > the name StartInstanceLocation. > > Signed-off-by: Anuj Phogat> --- > src/intel/genxml/gen10.xml | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml > index 06260cf..d2bb130 100644 > --- a/src/intel/genxml/gen10.xml > +++ b/src/intel/genxml/gen10.xml > @@ -2570,7 +2570,7 @@ > > > > - > + This looks weird since it is the only value in this instruction which the name doesn't look like a macro (with all caps). But it's not the first case in all the xml's, so it's probably fine: Reviewed-by: Rafael Antognolli > > > > -- > 2.9.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: fix 'make check' by moving bindless functions at the right place
On 06/14/2017 09:30 AM, Aaron Watry wrote: > Looks like Mark beat me to reporting this one (was still bisecting > when this patch was sent). > > Tested-by: Aaron WatryIf dispatch_sanity breaks, it's almost always the most recent commit in src/mapi/glapi/gen. That may save you some time bisecting when this happens again. :) > On Wed, Jun 14, 2017 at 11:08 AM, Samuel Pitoiset > wrote: >> Fixes: 5f249b9f05e ("mapi: add GL_ARB_bindless_texture entry points") >> Reported-by: Mark Janes >> Signed-off-by: Samuel Pitoiset >> --- >> src/mesa/main/tests/dispatch_sanity.cpp | 36 >> - >> 1 file changed, 18 insertions(+), 18 deletions(-) >> >> diff --git a/src/mesa/main/tests/dispatch_sanity.cpp >> b/src/mesa/main/tests/dispatch_sanity.cpp >> index 47d0aa63bf4..724c22ee9b3 100644 >> --- a/src/mesa/main/tests/dispatch_sanity.cpp >> +++ b/src/mesa/main/tests/dispatch_sanity.cpp >> @@ -965,6 +965,24 @@ const struct function >> common_desktop_functions_possible[] = { >> { "glBufferPageCommitmentARB", 43, -1 }, >> { "glNamedBufferPageCommitmentARB", 43, -1 }, >> >> + /* GL_ARB_bindless_texture */ >> + { "glGetTextureHandleARB", 40, -1 }, >> + { "glGetTextureSamplerHandleARB", 40, -1 }, >> + { "glMakeTextureHandleResidentARB", 40, -1 }, >> + { "glMakeTextureHandleNonResidentARB", 40, -1 }, >> + { "glIsTextureHandleResidentARB", 40, -1 }, >> + { "glGetImageHandleARB", 40, -1 }, >> + { "glMakeImageHandleResidentARB", 40, -1 }, >> + { "glMakeImageHandleNonResidentARB", 40, -1 }, >> + { "glIsImageHandleResidentARB", 40, -1 }, >> + { "glUniformHandleui64ARB", 40, -1 }, >> + { "glUniformHandleui64vARB", 40, -1 }, >> + { "glProgramUniformHandleui64ARB", 40, -1 }, >> + { "glProgramUniformHandleui64vARB", 40, -1 }, >> + { "glVertexAttribL1ui64ARB", 40, -1 }, >> + { "glVertexAttribL1ui64vARB", 40, -1 }, >> + { "glGetVertexAttribLui64vARB", 40, -1 }, >> + >> { NULL, 0, -1 } >> }; >> >> @@ -2374,24 +2392,6 @@ const struct function gles2_functions_possible[] = { >> /* GL_KHR_blend_equation_advanced */ >> { "glBlendBarrierKHR", 20, -1 }, >> >> - /* GL_ARB_bindless_texture */ >> - { "glGetTextureHandleARB", 40, -1 }, >> - { "glGetTextureSamplerHandleARB", 40, -1 }, >> - { "glMakeTextureHandleResidentARB", 40, -1 }, >> - { "glMakeTextureHandleNonResidentARB", 40, -1 }, >> - { "glIsTextureHandleResidentARB", 40, -1 }, >> - { "glGetImageHandleARB", 40, -1 }, >> - { "glMakeImageHandleResidentARB", 40, -1 }, >> - { "glMakeImageHandleNonResidentARB", 40, -1 }, >> - { "glIsImageHandleResidentARB", 40, -1 }, >> - { "glUniformHandleui64ARB", 40, -1 }, >> - { "glUniformHandleui64vARB", 40, -1 }, >> - { "glProgramUniformHandleui64ARB", 40, -1 }, >> - { "glProgramUniformHandleui64vARB", 40, -1 }, >> - { "glVertexAttribL1ui64ARB", 40, -1 }, >> - { "glVertexAttribL1ui64vARB", 40, -1 }, >> - { "glGetVertexAttribLui64vARB", 40, -1 }, >> - >> { NULL, 0, -1 } >> }; >> >> -- >> 2.13.1 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: fix 'make check' by moving bindless functions at the right place
Reviewed-by: Ian RomanickOn 06/14/2017 09:08 AM, Samuel Pitoiset wrote: > Fixes: 5f249b9f05e ("mapi: add GL_ARB_bindless_texture entry points") > Reported-by: Mark Janes > Signed-off-by: Samuel Pitoiset > --- > src/mesa/main/tests/dispatch_sanity.cpp | 36 > - > 1 file changed, 18 insertions(+), 18 deletions(-) > > diff --git a/src/mesa/main/tests/dispatch_sanity.cpp > b/src/mesa/main/tests/dispatch_sanity.cpp > index 47d0aa63bf4..724c22ee9b3 100644 > --- a/src/mesa/main/tests/dispatch_sanity.cpp > +++ b/src/mesa/main/tests/dispatch_sanity.cpp > @@ -965,6 +965,24 @@ const struct function > common_desktop_functions_possible[] = { > { "glBufferPageCommitmentARB", 43, -1 }, > { "glNamedBufferPageCommitmentARB", 43, -1 }, > > + /* GL_ARB_bindless_texture */ > + { "glGetTextureHandleARB", 40, -1 }, > + { "glGetTextureSamplerHandleARB", 40, -1 }, > + { "glMakeTextureHandleResidentARB", 40, -1 }, > + { "glMakeTextureHandleNonResidentARB", 40, -1 }, > + { "glIsTextureHandleResidentARB", 40, -1 }, > + { "glGetImageHandleARB", 40, -1 }, > + { "glMakeImageHandleResidentARB", 40, -1 }, > + { "glMakeImageHandleNonResidentARB", 40, -1 }, > + { "glIsImageHandleResidentARB", 40, -1 }, > + { "glUniformHandleui64ARB", 40, -1 }, > + { "glUniformHandleui64vARB", 40, -1 }, > + { "glProgramUniformHandleui64ARB", 40, -1 }, > + { "glProgramUniformHandleui64vARB", 40, -1 }, > + { "glVertexAttribL1ui64ARB", 40, -1 }, > + { "glVertexAttribL1ui64vARB", 40, -1 }, > + { "glGetVertexAttribLui64vARB", 40, -1 }, > + > { NULL, 0, -1 } > }; > > @@ -2374,24 +2392,6 @@ const struct function gles2_functions_possible[] = { > /* GL_KHR_blend_equation_advanced */ > { "glBlendBarrierKHR", 20, -1 }, > > - /* GL_ARB_bindless_texture */ > - { "glGetTextureHandleARB", 40, -1 }, > - { "glGetTextureSamplerHandleARB", 40, -1 }, > - { "glMakeTextureHandleResidentARB", 40, -1 }, > - { "glMakeTextureHandleNonResidentARB", 40, -1 }, > - { "glIsTextureHandleResidentARB", 40, -1 }, > - { "glGetImageHandleARB", 40, -1 }, > - { "glMakeImageHandleResidentARB", 40, -1 }, > - { "glMakeImageHandleNonResidentARB", 40, -1 }, > - { "glIsImageHandleResidentARB", 40, -1 }, > - { "glUniformHandleui64ARB", 40, -1 }, > - { "glUniformHandleui64vARB", 40, -1 }, > - { "glProgramUniformHandleui64ARB", 40, -1 }, > - { "glProgramUniformHandleui64vARB", 40, -1 }, > - { "glVertexAttribL1ui64ARB", 40, -1 }, > - { "glVertexAttribL1ui64vARB", 40, -1 }, > - { "glGetVertexAttribLui64vARB", 40, -1 }, > - > { NULL, 0, -1 } > }; > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/9] i965: Map the query results for the life of the bo
On Friday, June 9, 2017 6:01:37 AM PDT Chris Wilson wrote: > If we map the bo upon creation, we can avoid the latency of mmapping it > when querying, and later use the asynchronous, persistent map of the > predicate to do a quick query. > > Signed-off-by: Chris Wilson> Cc: Kenneth Graunke > Cc: Matt Turner > --- > src/mesa/drivers/dri/i965/brw_bufmgr.c| 15 + > src/mesa/drivers/dri/i965/brw_bufmgr.h| 2 ++ > src/mesa/drivers/dri/i965/brw_context.h | 1 + > src/mesa/drivers/dri/i965/gen6_queryobj.c | 37 > ++- > 4 files changed, 44 insertions(+), 11 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > index 01590a0b0a..9028b538c6 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > @@ -775,6 +775,21 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, > unsigned flags) >return brw_bo_map_gtt(brw, bo, flags); > } > > +void > +brw_bo_map_sync(struct brw_context *brw, struct brw_bo *bo, unsigned flags) > +{ > + unsigned domain; > + > + if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW)) > + domain = I915_GEM_DOMAIN_GTT; > + else if (can_map_cpu(bo, flags)) > + domain = I915_GEM_DOMAIN_CPU; > + else > + domain = I915_GEM_DOMAIN_GTT; > + > + set_domain(brw, __func__, bo, domain, flags & MAP_WRITE ? domain : 0); > +} > + > int > brw_bo_unmap(struct brw_bo *bo) > { > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h > b/src/mesa/drivers/dri/i965/brw_bufmgr.h > index 3a397be695..214b75bf1a 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h > @@ -196,6 +196,8 @@ void brw_bo_unreference(struct brw_bo *bo); > */ > MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, > unsigned flags); > > +void brw_bo_map_sync(struct brw_context *brw, struct brw_bo *bo, unsigned > flags); > + > /** > * Reduces the refcount on the userspace mapping of the buffer > * object. > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index c5acb83ad0..117b1ecdca 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -419,6 +419,7 @@ struct brw_query_object { > > /** Last query BO associated with this query. */ > struct brw_bo *bo; > + uint64_t *results; > > /** Last index in bo with query data for this object. */ > int last_index; > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c > b/src/mesa/drivers/dri/i965/gen6_queryobj.c > index f913f986ae..18af608166 100644 > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c > @@ -221,7 +221,9 @@ gen6_queryobj_get_results(struct gl_context *ctx, > if (query->bo == NULL) >return; > > - uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ); > + brw_bo_map_sync(brw, query->bo, MAP_READ | MAP_COHERENT); > + uint64_t *results = query->results; > + > switch (query->Base.Target) { > case GL_TIME_ELAPSED: >/* The query BO contains the starting and ending timestamps. > @@ -296,7 +298,6 @@ gen6_queryobj_get_results(struct gl_context *ctx, > default: >unreachable("Unrecognized query target in brw_queryobj_get_results()"); > } > - brw_bo_unmap(query->bo); > > /* Now that we've processed the data stored in the query's buffer object, > * we can release it. > @@ -307,6 +308,23 @@ gen6_queryobj_get_results(struct gl_context *ctx, > query->Base.Ready = true; > } > > +static int gen6_alloc_query(struct brw_context *brw, > +struct brw_query_object *query) > +{ > + /* Since we're starting a new query, we need to throw away old results. */ > + if (query->bo) > + brw_bo_unreference(query->bo); > + > + query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096); > + query->results = brw_bo_map(brw, query->bo, > + MAP_READ | MAP_COHERENT | MAP_ASYNC); I don't understand why you're using MAP_ASYNC here. We're allocating a new BO here, and not using the BO_ALLOC_FOR_RENDER flag, so it will be idle. (brw_bufmgr.c:297 should ensure we never get a busy BO - if the cached BOs are busy, it will just allocate us a new one.) So, MAP_ASYNC shouldn't avoid a stall. It does, however, skip the SET_DOMAIN call, which means that it may not have the right domain for our new coherent mapping. Hence, you need to whack it later with your new brw_bo_map_sync() helper. I think you can drop MAP_ASYNC, and drop brw_bo_map_sync() entirely, with no ill-effects. Or am I wrong? > + > + /* For ARB_query_buffer_object: The result is not available */ > + set_query_availability(brw, query, false); > + > + return 0; > +} > + > /** > * Driver
Re: [Mesa-dev] [PATCH 06/15] i965: Prepare slice validator for isl based miptrees
On Tue, Jun 13, 2017 at 05:50:04PM +0300, Topi Pohjolainen wrote: > Signed-off-by: Topi Pohjolainen> --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 8 +++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > index 8479b285cb..0b85bc12ef 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > @@ -748,7 +748,13 @@ intel_miptree_check_level_layer(const struct > intel_mipmap_tree *mt, > > assert(level >= mt->first_level); > assert(level <= mt->last_level); > - assert(layer < mt->level[level].depth); > + > + if (mt->surf.size > 0) > + assert(layer < (mt->surf.dim == ISL_SURF_DIM_3D ? > + mt->surf.phys_level0_sa.depth : Shouldn't we be minifying the depth here? > + mt->surf.phys_level0_sa.array_len)); > + else > + assert(layer < mt->level[level].depth); > } > > void intel_miptree_reference(struct intel_mipmap_tree **dst, > -- > 2.11.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/15] i965/miptree: Refactor mapping table alloc
On Wed, Jun 14, 2017 at 09:45:46PM +0300, Pohjolainen, Topi wrote: > On Tue, Jun 13, 2017 at 04:31:26PM -0700, Nanley Chery wrote: > > On Tue, Jun 13, 2017 at 05:49:59PM +0300, Topi Pohjolainen wrote: > > > Signed-off-by: Topi Pohjolainen> > > --- > > > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29 > > > +-- > > > 1 file changed, 27 insertions(+), 2 deletions(-) > > > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > index 253d833b13..78a223a7f3 100644 > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > > @@ -285,6 +285,26 @@ > > > intel_depth_format_for_depthstencil_format(mesa_format format) { > > > } > > > } > > > > > > +static bool > > > +create_mapping_table(GLenum target, unsigned first_level, unsigned > > > last_level, > > > + unsigned depth0, struct intel_mipmap_level *table) > > > +{ > > > + for (unsigned level = first_level; level <= last_level; level++) { > > > + const unsigned d = target == GL_TEXTURE_3D ? depth0 >> level : > > > depth0; > > > > There's a bug here. If the target is GL_TEXTURE_3D we should > > minify(depth0, level) to avoid setting a depth of 0. > > Oops, definitely. > > > > > This seems to be more than a refactor. Prior to this patch, > > brw_miptree_layout_gen6_hiz_stencil wouldn't shrink the number of slices > > per mipmap level as the level increases, but does so now. > > Right. I actually missed that. How do want to handle that? I could write a > patch against brw_miptree_layout_gen6_hiz_stencil() doing the same thing there > (modifying the argument given to intel_miptree_set_level_info() but keeping > actual allocation size as it was in order to have space for level 0 qpitch). > That should be sufficient. > > > > -Nanley > > > > > + > > > + table[level].slice = calloc(d, sizeof(*table[0].slice)); > > > + if (!table[level].slice) > > > + goto unwind; > > > + } > > > + > > > + return true; > > > + > > > +unwind: > > > + for (unsigned level = first_level; level <= last_level; level++) > > > + free(table[level].slice); > > > + > > > + return false; > > > +} > > > > > > /** > > > * @param for_bo Indicates that the caller is > > > @@ -424,6 +444,12 @@ intel_miptree_create_layout(struct brw_context *brw, > > >} > > > } > > > > > > + if (!create_mapping_table(target, first_level, last_level, depth0, > > > + mt->level)) { > > > + free(mt); > > > + return NULL; > > > + } > > > + > > > /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 > > > can > > > * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces > > > on > > > * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is > > > still > > > @@ -1103,9 +1129,8 @@ intel_miptree_set_level_info(struct > > > intel_mipmap_tree *mt, > > > DBG("%s level %d, depth %d, offset %d,%d\n", __func__, > > > level, d, x, y); > > > > > > - assert(mt->level[level].slice == NULL); > > > + assert(mt->level[level].slice); > > > > > > - mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice)); > > > mt->level[level].slice[0].x_offset = mt->level[level].level_x; > > > mt->level[level].slice[0].y_offset = mt->level[level].level_y; > > > } > > > -- > > > 2.11.0 > > > > > > ___ > > > mesa-dev mailing list > > > mesa-dev@lists.freedesktop.org > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] ac: Use mov_dpp for derivatives.
I was looking into WQM stuff today, and I realized that LLVM will no longer mark this instruction as needing WQM, which seems like a problem. Seems like we need a patch to LLVM. Other uses of DPP (e.g. for the subgroup reduction stuff) won't want WQM, so I'm not sure what's the best approach there. If we add an attribute, will LLVM guarantee that we won't remove it? On Sat, Jun 10, 2017 at 1:05 PM, Bas Nieuwenhuizenwrote: > Slightly faster than bpermute, and seems supported since at least > LLVM 3.9. > > v2: Since this supersedes bpermute, remove the bpermute code. > Signed-off-by: Bas Nieuwenhuizen > --- > src/amd/common/ac_llvm_build.c | 47 > > src/amd/common/ac_llvm_build.h | 2 +- > src/amd/common/ac_nir_to_llvm.c | 8 +++--- > src/gallium/drivers/radeonsi/si_pipe.c | 2 +- > src/gallium/drivers/radeonsi/si_pipe.h | 2 +- > src/gallium/drivers/radeonsi/si_shader.c | 4 +-- > 6 files changed, 38 insertions(+), 27 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > index 237e9291d41..99d41bf52d6 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -783,41 +783,52 @@ ac_get_thread_id(struct ac_llvm_context *ctx) > */ > LLVMValueRef > ac_build_ddxy(struct ac_llvm_context *ctx, > - bool has_ds_bpermute, > + bool has_mov_dpp, > uint32_t mask, > int idx, > LLVMValueRef lds, > LLVMValueRef val) > { > - LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; > + LLVMValueRef thread_id, tl, trbl, args[5]; > LLVMValueRef result; > > - thread_id = ac_get_thread_id(ctx); > + if (has_mov_dpp) { > + uint32_t tl_ctrl = 0, trbl_ctrl = 0; > > - tl_tid = LLVMBuildAnd(ctx->builder, thread_id, > - LLVMConstInt(ctx->i32, mask, false), ""); > - > - trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, > - LLVMConstInt(ctx->i32, idx, false), ""); > + for (unsigned i = 0; i < 4; ++i) { > + tl_ctrl |= (i & mask) << (2 * i); > + trbl_ctrl |= ((i & mask) + idx) << (2 * i); > + } > > - if (has_ds_bpermute) { > - args[0] = LLVMBuildMul(ctx->builder, tl_tid, > - LLVMConstInt(ctx->i32, 4, false), ""); > - args[1] = val; > + args[0] = val; > + args[1] = LLVMConstInt(ctx->i32, tl_ctrl, false); > + args[2] = LLVMConstInt(ctx->i32, 0xf, false); > + args[3] = LLVMConstInt(ctx->i32, 0xf, false); > + args[4] = LLVMConstInt(ctx->i1, 1, false); > tl = ac_build_intrinsic(ctx, > - "llvm.amdgcn.ds.bpermute", ctx->i32, > - args, 2, > + "llvm.amdgcn.mov.dpp.i32", ctx->i32, > + args, 5, > AC_FUNC_ATTR_READNONE | > AC_FUNC_ATTR_CONVERGENT); > > - args[0] = LLVMBuildMul(ctx->builder, trbl_tid, > - LLVMConstInt(ctx->i32, 4, false), ""); > + args[1] = LLVMConstInt(ctx->i32, trbl_ctrl, false); > trbl = ac_build_intrinsic(ctx, > - "llvm.amdgcn.ds.bpermute", ctx->i32, > - args, 2, > + "llvm.amdgcn.mov.dpp.i32", ctx->i32, > + args, 5, > AC_FUNC_ATTR_READNONE | > AC_FUNC_ATTR_CONVERGENT); > } else { > + LLVMValueRef tl_tid, trbl_tid; > + > + thread_id = ac_get_thread_id(ctx); > + > + tl_tid = LLVMBuildAnd(ctx->builder, thread_id, > + LLVMConstInt(ctx->i32, mask, false), ""); > + > + trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, > + LLVMConstInt(ctx->i32, idx, false), > ""); > + > + > LLVMValueRef store_ptr, load_ptr0, load_ptr1; > > store_ptr = ac_build_gep0(ctx, lds, thread_id); > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h > index ebb78fbd79b..14260b05018 100644 > --- a/src/amd/common/ac_llvm_build.h > +++ b/src/amd/common/ac_llvm_build.h > @@ -161,7 +161,7 @@ ac_get_thread_id(struct ac_llvm_context *ctx); > > LLVMValueRef > ac_build_ddxy(struct ac_llvm_context *ctx, > - bool has_ds_bpermute, > + bool has_mov_dpp, > uint32_t mask, > int idx, >
Re: [Mesa-dev] [PATCH 2/5] st/mesa: remove redundant sample_mask checking
On 06/14/2017 11:41 PM, Marek Olšák wrote: From: Marek Olšákcso does that too --- src/mesa/state_tracker/st_atom_msaa.c | 7 +-- src/mesa/state_tracker/st_context.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_msaa.c b/src/mesa/state_tracker/st_atom_msaa.c index 0bdb9b2..814077f 100644 --- a/src/mesa/state_tracker/st_atom_msaa.c +++ b/src/mesa/state_tracker/st_atom_msaa.c @@ -55,26 +55,21 @@ void st_update_sample_mask( struct st_context *st ) Also, there's an interface restriction here in theory it is encouraged this mask not be the same at each pixel. */ sample_mask = (1 << nr_bits) - 1; if (st->ctx->Multisample.SampleCoverageInvert) sample_mask = ~sample_mask; } if (st->ctx->Multisample.SampleMask) sample_mask &= st->ctx->Multisample.SampleMaskValue; } - /* mask off unused bits or don't care? */ - - if (sample_mask != st->state.sample_mask) { - st->state.sample_mask = sample_mask; - cso_set_sample_mask(st->cso_context, sample_mask); - } + cso_set_sample_mask(st->cso_context, sample_mask); Nice one! Sooo, we have three similar checks, st/mesa, cso and radeonsi, fun times. :) } void st_update_sample_shading( struct st_context *st ) { if (!st->fp) return; if (!st->ctx->Extensions.ARB_sample_shading) return; diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 6497587..2fe9d92 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -149,21 +149,20 @@ struct st_context unsigned fb_height; unsigned fb_num_samples; unsigned fb_num_layers; struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS]; struct pipe_viewport_state viewport[PIPE_MAX_VIEWPORTS]; struct { unsigned num; boolean include; struct pipe_scissor_state rects[PIPE_MAX_WINDOW_RECTANGLES]; } window_rects; - unsigned sample_mask; GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */ GLuint fb_orientation; } state; uint64_t dirty; /**< dirty states */ /** This masks out unused shader resources. Only valid in draw calls. */ uint64_t active_states; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/11] i965: Use blorp for depth/stencil clears on gen6+
On Wed, Jun 14, 2017 at 12:00 PM, Pohjolainen, Topi < topi.pohjolai...@gmail.com> wrote: > On Tue, Jun 06, 2017 at 10:00:06PM -0700, Jason Ekstrand wrote: > > --- > > src/mesa/drivers/dri/i965/brw_blorp.c | 106 > ++ > > src/mesa/drivers/dri/i965/brw_blorp.h | 4 ++ > > src/mesa/drivers/dri/i965/brw_clear.c | 6 ++ > > 3 files changed, 116 insertions(+) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > b/src/mesa/drivers/dri/i965/brw_blorp.c > > index 38925d9..a46b624 100644 > > --- a/src/mesa/drivers/dri/i965/brw_blorp.c > > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > > @@ -930,6 +930,112 @@ brw_blorp_clear_color(struct brw_context *brw, > struct gl_framebuffer *fb, > > } > > > > void > > +brw_blorp_clear_depth_stencil(struct brw_context *brw, > > + struct gl_framebuffer *fb, > > + GLbitfield mask, bool partial_clear) > > +{ > > + const struct gl_context *ctx = >ctx; > > + struct gl_renderbuffer *depth_rb = > > + fb->Attachment[BUFFER_DEPTH].Renderbuffer; > > + struct gl_renderbuffer *stencil_rb = > > + fb->Attachment[BUFFER_STENCIL].Renderbuffer; > > + > > + if (!depth_rb || ctx->Depth.Mask == GL_FALSE) > > + mask &= ~BUFFER_BIT_DEPTH; > > + > > + if (!stencil_rb || (ctx->Stencil.WriteMask[0] & 0xff) == 0) > > + mask &= ~BUFFER_BIT_STENCIL; > > + > > + if (!(mask & (BUFFER_BITS_DEPTH_STENCIL))) > > + return; > > + > > + uint32_t x0, x1, y0, y1, rb_name, rb_height; > > + if (depth_rb) { > > + rb_name = depth_rb->Name; > > + rb_height = depth_rb->Height; > > + if (stencil_rb) { > > + assert(depth_rb->Width == stencil_rb->Width); > > + assert(depth_rb->Height == stencil_rb->Height); > > + } > > + } else { > > + assert(stencil_rb); > > + rb_name = stencil_rb->Name; > > + rb_height = stencil_rb->Height; > > + } > > + > > + x0 = fb->_Xmin; > > + x1 = fb->_Xmax; > > + if (rb_name != 0) { > > + y0 = fb->_Ymin; > > + y1 = fb->_Ymax; > > + } else { > > + y0 = rb_height - fb->_Ymax; > > + y1 = rb_height - fb->_Ymin; > > + } > > + > > + /* If the clear region is empty, just return. */ > > + if (x0 == x1 || y0 == y1) > > + return; > > + > > + unsigned level, layer, num_layers; > > + struct isl_surf isl_tmp[4]; > > + struct blorp_surf depth_surf, stencil_surf; > > + > > + if (mask & BUFFER_BIT_DEPTH) { > > + struct intel_renderbuffer *irb = intel_renderbuffer(depth_rb); > > + struct intel_mipmap_tree *depth_mt = > > + find_miptree(GL_DEPTH_BUFFER_BIT, irb); > > + > > + level = irb->mt_level; > > + layer = irb_logical_mt_layer(irb); > > + num_layers = fb->MaxNumLayers ? irb->layer_count : 1; > > + > > + intel_miptree_set_all_slices_need_depth_resolve(depth_mt, level); > > + > > + unsigned depth_level = level; > > + blorp_surf_for_miptree(brw, _surf, depth_mt, true, > > + (1 << ISL_AUX_USAGE_HIZ), > > + _level, layer, num_layers, > _tmp[0]); > > + assert(depth_level == level); > > + } > > + > > + uint8_t stencil_mask = 0; > > + if (mask & BUFFER_BIT_STENCIL) { > > + struct intel_renderbuffer *irb = intel_renderbuffer(stencil_rb); > > + struct intel_mipmap_tree *stencil_mt = > > + find_miptree(GL_STENCIL_BUFFER_BIT, irb); > > + > > + if (mask & BUFFER_BIT_DEPTH) { > > + assert(level == irb->mt_level); > > + assert(layer == irb_logical_mt_layer(irb)); > > + assert(num_layers == fb->MaxNumLayers ? irb->layer_count : 1); > > + } else { > > + level = irb->mt_level; > > + layer = irb_logical_mt_layer(irb); > > + num_layers = fb->MaxNumLayers ? irb->layer_count : 1; > > + } > > + > > + stencil_mask = ctx->Stencil.WriteMask[0] & 0xff; > > + > > + unsigned stencil_level = level; > > + blorp_surf_for_miptree(brw, _surf, stencil_mt, true, > > + (1 << ISL_AUX_USAGE_HIZ), > > Why do we set hiz for stencil? > > I noticed that anv_blorp.c::anv_CmdClearDepthStencilImage() sets it to > NONE > for depth and stencil while get_blorp_surf_for_anv_image() has code to take > the HIZ usage away for stencil (if given). > No reason. I'm happy to make it 0 for no aux support on stencil. > Otherwise looks good to me: > > Reviewed-by: Topi Pohjolainen> Thanks! > > + _level, layer, num_layers, > _tmp[2]); > > + } > > + > > + assert((mask & BUFFER_BIT_DEPTH) || stencil_mask); > > + > > + struct blorp_batch batch; > > + blorp_batch_init(>blorp, , brw, 0); > > + blorp_clear_depth_stencil(, _surf, _surf, > > + level, layer, num_layers, > > + x0, y0, x1, y1, > > + (mask & BUFFER_BIT_DEPTH), > ctx->Depth.Clear, > >
[Mesa-dev] [PATCH] radeonsi: remove useless check in si_set_min_samples()
CSO already takes care of this. Signed-off-by: Samuel Pitoiset--- src/gallium/drivers/radeonsi/si_state.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 1cd1f9190e0..facbc87e310 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2978,9 +2978,6 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) { struct si_context *sctx = (struct si_context *)ctx; - if (sctx->ps_iter_samples == min_samples) - return; - sctx->ps_iter_samples = min_samples; sctx->do_update_shaders = true; -- 2.13.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/9] i965: Replace open-coded gen6 queryobj offsets with simple helpers
On Friday, June 9, 2017 6:01:36 AM PDT Chris Wilson wrote: > Lots of places open-coded the assumed layout of the predicate/results > within the query object, replace those with simple helpers. > > Signed-off-by: Chris Wilson> Cc: Kenneth Graunke > Cc: Matt Turner > --- > src/mesa/drivers/dri/i965/brw_conditional_render.c | 4 ++-- > src/mesa/drivers/dri/i965/brw_context.h| 14 ++ > src/mesa/drivers/dri/i965/gen6_queryobj.c | 6 +++--- > src/mesa/drivers/dri/i965/hsw_queryobj.c | 18 +- > 4 files changed, 28 insertions(+), 14 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c > b/src/mesa/drivers/dri/i965/brw_conditional_render.c > index 046a42b5f5..197c35efe2 100644 > --- a/src/mesa/drivers/dri/i965/brw_conditional_render.c > +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c > @@ -66,13 +66,13 @@ set_predicate_for_occlusion_query(struct brw_context *brw, > query->bo, > I915_GEM_DOMAIN_INSTRUCTION, > 0, /* write domain */ > - 0 /* offset */); > + gen6_query_results_offset(query, 0)); > brw_load_register_mem64(brw, > MI_PREDICATE_SRC1, > query->bo, > I915_GEM_DOMAIN_INSTRUCTION, > 0, /* write domain */ > - 8 /* offset */); > + gen6_query_results_offset(query, 1)); > } > > static void > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index d1503312d4..c5acb83ad0 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -427,6 +427,20 @@ struct brw_query_object { > bool flushed; > }; > > +#define GEN6_QUERY_PREDICATE (2) > +#define GEN6_QUERY_RESULTS (0) > + > +static inline unsigned gen6_query_predicate_offset(const struct > brw_query_object *query) > +{ > + return GEN6_QUERY_PREDICATE * sizeof(uint64_t); > +} > + > +static inline unsigned gen6_query_results_offset(const struct > brw_query_object *query, > +unsigned idx) > +{ > + return (GEN6_QUERY_RESULTS + idx) * sizeof(uint64_t); > +} > + > enum brw_gpu_ring { > UNKNOWN_RING, > RENDER_RING, > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c > b/src/mesa/drivers/dri/i965/gen6_queryobj.c > index cc0f6f0b77..f913f986ae 100644 > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c > @@ -71,7 +71,7 @@ set_query_availability(struct brw_context *brw, struct > brw_query_object *query, >} > >brw_emit_pipe_control_write(brw, flags, > - query->bo, 2 * sizeof(uint64_t), > + query->bo, > gen6_query_predicate_offset(query), >available, 0); > } > } > @@ -318,7 +318,7 @@ gen6_begin_query(struct gl_context *ctx, struct > gl_query_object *q) > { > struct brw_context *brw = brw_context(ctx); > struct brw_query_object *query = (struct brw_query_object *)q; > - const int idx = 0; > + const int idx = GEN6_QUERY_RESULTS; > > /* Since we're starting a new query, we need to throw away old results. */ > brw_bo_unreference(query->bo); > @@ -407,7 +407,7 @@ gen6_end_query(struct gl_context *ctx, struct > gl_query_object *q) > { > struct brw_context *brw = brw_context(ctx); > struct brw_query_object *query = (struct brw_query_object *)q; > - const int idx = 1; > + const int idx = GEN6_QUERY_RESULTS + 1; > > switch (query->Base.Target) { > case GL_TIME_ELAPSED: > diff --git a/src/mesa/drivers/dri/i965/hsw_queryobj.c > b/src/mesa/drivers/dri/i965/hsw_queryobj.c > index b81ab3b6f8..cb1a2df52d 100644 > --- a/src/mesa/drivers/dri/i965/hsw_queryobj.c > +++ b/src/mesa/drivers/dri/i965/hsw_queryobj.c > @@ -191,7 +191,7 @@ load_overflow_data_to_cs_gprs(struct brw_context *brw, >struct brw_query_object *query, >int idx) > { > - int offset = idx * sizeof(uint64_t) * 4; > + int offset = gen6_query_results_offset(query, 0) + idx * sizeof(uint64_t) > * 4; FWIW, I'm pretty sure 4 here is BRW_MAX_XFB_STREAMS. I personally don't think that the code is more readable after patches 4-5, but I suppose that's a matter of taste. I'd be inclined to leave the code with hardcoded offsets, but add a comment to the top of the file describing the layout (I thought we had one already, but it looks like we don't). That said, the patches look correct to me, so if someone else wants to chime in and say that they prefer this style, I'm okay with them. > >
Re: [Mesa-dev] [PATCH 07/13] anv/blorp: Remove 3D subresource transition workaround
On Wed, Jun 14, 2017 at 09:32:22AM +0200, Iago Toral wrote: > On Tue, 2017-06-13 at 11:41 -0700, Nanley Chery wrote: > > For 3D image subresources undergoing a layout transition via > > PipelineBarrier, we increase the number of fast-cleared layers to > > match > > the intended behaviour of KHR_maintenance1. When such subresources > > undergo layout transitions between subpasses, we don't do this to > > avoid > > failing incorrect CTS tests. Instead, unify the behaviour in both > > scenarios, and wait for the CTS tests to catch up. See CL for > > the > > test fix. > > > > On SKL+, this causes 3 test failures under: > > dEQP-VK.pipeline.render_to_image.3d.* > > > > Signed-off-by: Nanley Chery> > --- > > src/intel/vulkan/anv_blorp.c | 8 > > 1 file changed, 4 insertions(+), 4 deletions(-) > > > > diff --git a/src/intel/vulkan/anv_blorp.c > > b/src/intel/vulkan/anv_blorp.c > > index 421f860428..ff3d7b126f 100644 > > --- a/src/intel/vulkan/anv_blorp.c > > +++ b/src/intel/vulkan/anv_blorp.c > > @@ -1478,12 +1478,12 @@ anv_image_ccs_clear(struct anv_cmd_buffer > > *cmd_buffer, > > > > /* Blorp likes to treat 2D_ARRAY and 3D the same. */ > > uint32_t blorp_base_layer, blorp_layer_count; > > - if (view) { > > - blorp_base_layer = view->base_array_layer; > > - blorp_layer_count = view->array_len; > > - } else if (image->type == VK_IMAGE_TYPE_3D) { > > Maybe add a comment referencing the requirement from > VK_KHR_maintenance1 so it is clear why we ignore the view for 3D images > here? > Thank you for suggesting I add a comment. I actually meant to double-check this before sending it out, but forgot. In the process of writing the comment, I discovered that the desired behaviour for this part of the extension is still being determined (Vulkan issue #849). > > + if (image->type == VK_IMAGE_TYPE_3D) { > > blorp_base_layer = 0; > > blorp_layer_count = extent.depth; > > + } else if (view) { > > + blorp_base_layer = view->base_array_layer; > > + blorp_layer_count = view->array_len; > > } else { > > blorp_base_layer = subresourceRange->baseArrayLayer; > > blorp_layer_count = anv_get_layerCount(image, > > subresourceRange); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP
On 14/06/17 22:16, Brian Paul wrote: On 06/14/2017 02:38 PM, Jose Fonseca wrote: On 14/06/17 21:21, Marek Olšák wrote: On Wed, Jun 14, 2017 at 10:13 PM, Jose Fonsecawrote: On 14/06/17 21:07, Marek Olšák wrote: On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca wrote: On 14/06/17 17:12, Marek Olšák wrote: On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák wrote: On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca wrote: On 12/06/17 22:56, Marek Olšák wrote: On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca wrote: On 12/06/17 21:25, Marek Olšák wrote: On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca wrote: How does this help exactly? Are applications actually rendering to the same FBO w/ and w/o SRGB decoding? Or is the problem here GL_SRGB_WRITE state getting spuriously dirtied by the application? And even if they do, why is toggling surface views in framebuffer state so expensive? I don't object per se, but it looks like an unusual thing to optimize for. set_framebuffer_state is basically a memory barrier. We have different caches between FB and textures and we have to flush them when a texture is unbound from the framebuffer and set as a sampler view. To keep thing simple, set_framebuffer_state is the barrier. When we change the blend state, the barrier is avoided. Note that the barrier makes set_framebuffer_state a function that is always GPU-bound. I see. And you're sure that the incoming set_framebuffer_state are not spurious? I know cso_context always eliminates redundant pipe_context::set_framebuffer_state calls, but it is perhaps possible that Mesa state tracker is reseting the framebuffer state with different surface views, but that in practice are exactly the same as the previous one? Like I said, it seems odd apps are doing this: it doesn't make much sense to me to change colorspace of the fragments between draws. (Unless some of the assets are already in SRGB and the app is trying to be too smart for its own good to avoid the sRGB->RGB->sRGB.) It seems much more likely that these framebuffer state changes are self-inflicted some where in our stack, than something truly demanded by the app. And if that's the case and we can fix it, then it would be a better solution all around. Yeah the funny part and the reason is that we have a microbenchmark in piglit (drawoverhead) changing this state between draw calls. :) Marek I couldn't find that piglit microbenchmark. mesademos has src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE. So if fbo is changing internally, then it's a perf bug in Mesa state tracker. Unless it's mimicking something that real apps do, then it's probably better to fix the microbenchmark to use a more realistic tests. If you build piglit, it's in bin/drawoverhead. You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is rather artificial and fairly unlikely to occur with real apps. FYI, I'm dropping this series and I don't have it in my repo anymore. piglit/drawoverhead will be updated not to test this state change. Marek Great. BTW, I'm not sure what's a good state to change in such microbenchmark. There is of course, a myriad of states to pick, but they are not all the same: performance can vary wildly depending on the choice. I'm not sure what's a good representative state change in such circumstances Perhaps toggling between two texture objects? Or some sampler state? If you've ever run the microbenchmark, you know there are plenty of state changes tested. I think there are like 15 state changes tested in about 60 subtests at the moment. I'm adding more tests into it. Currently I have 100 subtests in there locally. At the moment the missing subtests are mostly just shader resources: immutable textures (mutable textures i.e. not TexStorage-based are already tested), TBOs, images, image buffers, SSBOs (maybe), atomic counters (maybe). The methodology is 1 state change followed by 1 draw call in a loop, measuring the number of draw calls per second for that case, and comparing with the baseline draw rate (which is without the state change). Marek I just ran it. Pretty neat! I didn't know we were adding benchmarks to piglit. That's because piglit has a very convenient window system integration framework that I refuse to re-invent elsewhere. Ah, makes sense. Which reminds me: do people think we should transition mesademos off glut to glfw or waffle? Or do you think we should just strive to migrate the stuff there to piglit? I'm not sure I see a need. Does anyone use the Mesa demos for benchmarking anymore? I wasn't thinking of benchmarking per se, but just being able to run any of the Mesa demos directly on Wayland (ie, EGL as oposed to GLUT+X11). And in general, many/most of the Mesa demos have some interactive aspect to them (key presses or mouse
Re: [Mesa-dev] [PATCH 2/9] i965: Check last known busy status on bo before asking the kernel
On Friday, June 9, 2017 6:01:33 AM PDT Chris Wilson wrote: > If we know the bo is idle (that is we have no submitted a command buffer > referencing this bo since the last query) we can skip asking the kernel. > Note this may report a false negative if the target is being shared > between processes (exported via dmabuf or flink). To allow the caller > control over using the last known flag, the query is split into two. I'm not crazy about exposing __brw_bo_busy and brw_bo_busy, with slightly different semantics. Why not just make brw_bo_busy do: if (bo->idle && bo->reusable) return false; /* otherwise query the kernel */ These days, it appears that bo->reusable is false for any buffers that have been imported/exported via dmabuf or flink, and true otherwise. (We might want to rename it to bo->foreign or such.) With that change, brw_bo_busy should bypass the ioctl for most BOs, but would still work for foreign BOs, without the caller having to worry about it. > Signed-off-by: Chris Wilson> Cc: Kenneth Graunke > Cc: Matt Turner > --- > src/mesa/drivers/dri/i965/brw_bufmgr.c | 17 + > src/mesa/drivers/dri/i965/brw_bufmgr.h | 33 ++--- > 2 files changed, 35 insertions(+), 15 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > index 67c15878d0..01590a0b0a 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > @@ -194,21 +194,14 @@ brw_bo_reference(struct brw_bo *bo) > } > > int > -brw_bo_busy(struct brw_bo *bo) > +__brw_bo_busy(struct brw_bo *bo) > { > - struct brw_bufmgr *bufmgr = bo->bufmgr; > - struct drm_i915_gem_busy busy; > - int ret; > + struct drm_i915_gem_busy busy = { bo->gem_handle }; > > - memclear(busy); > - busy.handle = bo->gem_handle; > + drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, ); > > - ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, ); > - if (ret == 0) { > - bo->idle = !busy.busy; > - return busy.busy; > - } > - return false; > + bo->idle = !busy.busy; > + return busy.busy; > } > > int > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h > b/src/mesa/drivers/dri/i965/brw_bufmgr.h > index 70cc2bbc6c..3a397be695 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h > @@ -240,11 +240,38 @@ int brw_bo_get_tiling(struct brw_bo *bo, uint32_t > *tiling_mode, > */ > int brw_bo_flink(struct brw_bo *bo, uint32_t *name); > > +int __brw_bo_busy(struct brw_bo *bo); > + > /** > - * Returns 1 if mapping the buffer for write could cause the process > - * to block, due to the object being active in the GPU. > + * Returns 0 if mapping the buffer is not in active use by the gpu. > + * If non-zero, any mapping for for write could cause the process > + * to block, due to the object being active in the GPU. If the lower > + * 16 bits are zero, then we can map for read without stalling. > + * > + * The last-known busy status of the brw_bo is checked first. This may be > + * stale if the brw_bo has been exported to a foriegn process. If used on an > + * exported bo, call __brw_bo_busy() directly to bypass the local check. > */ > -int brw_bo_busy(struct brw_bo *bo); > +static inline int brw_bo_busy(struct brw_bo *bo) > +{ > + if (bo->idle) /* Note this may be stale if the bo is exported */ > + return 0; > + > + return __brw_bo_busy(bo); > +} I'd rather keep this as a boolean result, rather than an integer with certain bits having particular meanings. Bonus points for changing the return type to "bool". > + > +/** > + * Returns true if mapping the buffer for read will cause the process to > + * block (i.e. the buffer is still being writen). Note that when it > + * returns false, the buffer may still be concurrently read by the GPU. > + */ > +static inline int brw_bo_write_busy(struct brw_bo *bo) > +{ > + if (bo->idle) /* Note this may be stale if the bo is exported */ > + return 0; > + > + return __brw_bo_busy(bo) & 0x; > +} > > /** > * Specify the volatility of the buffer. This seems like a nice helper. --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/5] st/mesa: simplify st_update_viewport
Also adding this: diff --git a/src/mesa/state_tracker/st_atom_viewport.c b/src/mesa/state_tracker/st_atom_viewport.c index b434f0d..d01836f 100644 --- a/src/mesa/state_tracker/st_atom_viewport.c +++ b/src/mesa/state_tracker/st_atom_viewport.c @@ -54,9 +54,10 @@ st_update_viewport( struct st_context *st ) _mesa_get_viewport_xform(ctx, i, scale, translate); /* _NEW_BUFFERS */ + /* Drawing to a window where the coordinate system is upside down. */ if (st->state.fb_orientation == Y_0_TOP) { scale[1] *= -1; - translate[1] = translate[1] * -1 + st->state.fb_height; + translate[1] = st->state.fb_height - translate[1]; } } Marek On Wed, Jun 14, 2017 at 11:41 PM, Marek Olšákwrote: > From: Marek Olšák > > --- > src/mesa/state_tracker/st_atom_viewport.c | 34 > --- > 1 file changed, 9 insertions(+), 25 deletions(-) > > diff --git a/src/mesa/state_tracker/st_atom_viewport.c > b/src/mesa/state_tracker/st_atom_viewport.c > index 1fc8908..9a9d570 100644 > --- a/src/mesa/state_tracker/st_atom_viewport.c > +++ b/src/mesa/state_tracker/st_atom_viewport.c > @@ -36,47 +36,31 @@ > /** > * Update the viewport transformation matrix. Depends on: > * - viewport pos/size > * - depthrange > * - window pos/size or FBO size > */ > void > st_update_viewport( struct st_context *st ) > { > struct gl_context *ctx = st->ctx; > - GLfloat yScale, yBias; > unsigned i; > - /* _NEW_BUFFERS > -*/ > - if (st->state.fb_orientation == Y_0_TOP) { > - /* Drawing to a window. The corresponding gallium surface uses > - * Y=0=TOP but OpenGL is Y=0=BOTTOM. So we need to invert the > viewport. > - */ > - yScale = -1; > - yBias = (GLfloat)ctx->DrawBuffer->Height; > - } > - else { > - /* Drawing to an FBO where Y=0=BOTTOM, like OpenGL - don't invert */ > - yScale = 1.0; > - yBias = 0.0; > - } > > /* _NEW_VIEWPORT > */ > - for (i = 0; i < ctx->Const.MaxViewports; i++) > - { > - float scale[3], translate[3]; > - _mesa_get_viewport_xform(ctx, i, scale, translate); > + for (i = 0; i < ctx->Const.MaxViewports; i++) { > + float *scale = st->state.viewport[i].scale; > + float *translate = st->state.viewport[i].translate; > > - st->state.viewport[i].scale[0] = scale[0]; > - st->state.viewport[i].scale[1] = scale[1] * yScale; > - st->state.viewport[i].scale[2] = scale[2]; > + _mesa_get_viewport_xform(ctx, i, scale, translate); > > - st->state.viewport[i].translate[0] = translate[0]; > - st->state.viewport[i].translate[1] = translate[1] * yScale + yBias; > - st->state.viewport[i].translate[2] = translate[2]; > + /* _NEW_BUFFERS */ > + if (st->state.fb_orientation == Y_0_TOP) { > + scale[1] *= -1; > + translate[1] = translate[1] * -1 + st->state.fb_height; > + } > } > > cso_set_viewport(st->cso_context, >state.viewport[0]); > if (ctx->Const.MaxViewports > 1) >st->pipe->set_viewport_states(st->pipe, 1, ctx->Const.MaxViewports - > 1, >state.viewport[1]); > } > -- > 2.7.4 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: don't call _mesa_update_clip_plane in the GL core profile
Reviewed-by: Ilia MirkinOn Wed, Jun 14, 2017 at 5:37 PM, Marek Olšák wrote: > From: Marek Olšák > > It uses the projection matrix to transform the clip plane. > --- > src/mesa/main/enable.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c > index 0324170..0f7cdcd 100644 > --- a/src/mesa/main/enable.c > +++ b/src/mesa/main/enable.c > @@ -352,21 +352,22 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, > GLboolean state) > if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES || > !ctx->DriverFlags.NewClipPlaneEnable) { > FLUSH_VERTICES(ctx, _NEW_TRANSFORM); > } else { > FLUSH_VERTICES(ctx, 0); > } > ctx->NewDriverState |= ctx->DriverFlags.NewClipPlaneEnable; > > if (state) { > ctx->Transform.ClipPlanesEnabled |= (1 << p); > - _mesa_update_clip_plane(ctx, p); > + if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) > + _mesa_update_clip_plane(ctx, p); > } > else { > ctx->Transform.ClipPlanesEnabled &= ~(1 << p); > } > } > break; >case GL_COLOR_MATERIAL: > if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES) > goto invalid_enum_error; > if (ctx->Light.ColorMaterialEnabled == state) > -- > 2.7.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] st/mesa: use precomputed st_fb_orientation
From: Marek Olšák--- src/mesa/state_tracker/st_atom_list.h | 6 +++--- src/mesa/state_tracker/st_atom_rasterizer.c | 6 +++--- src/mesa/state_tracker/st_atom_scissor.c| 2 +- src/mesa/state_tracker/st_atom_viewport.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_list.h b/src/mesa/state_tracker/st_atom_list.h index 81a9308..b76854e 100644 --- a/src/mesa/state_tracker/st_atom_list.h +++ b/src/mesa/state_tracker/st_atom_list.h @@ -1,24 +1,21 @@ /* Render (non-compute) states must be first. */ ST_STATE(ST_NEW_DSA, st_update_depth_stencil_alpha) ST_STATE(ST_NEW_CLIP_STATE, st_update_clip) ST_STATE(ST_NEW_FS_STATE, st_update_fp) ST_STATE(ST_NEW_GS_STATE, st_update_gp) ST_STATE(ST_NEW_TES_STATE, st_update_tep) ST_STATE(ST_NEW_TCS_STATE, st_update_tcp) ST_STATE(ST_NEW_VS_STATE, st_update_vp) -ST_STATE(ST_NEW_RASTERIZER, st_update_rasterizer) ST_STATE(ST_NEW_POLY_STIPPLE, st_update_polygon_stipple) -ST_STATE(ST_NEW_VIEWPORT, st_update_viewport) -ST_STATE(ST_NEW_SCISSOR, st_update_scissor) ST_STATE(ST_NEW_WINDOW_RECTANGLES, st_update_window_rectangles) ST_STATE(ST_NEW_BLEND, st_update_blend) ST_STATE(ST_NEW_BLEND_COLOR, st_update_blend_color) ST_STATE(ST_NEW_VS_SAMPLER_VIEWS, st_update_vertex_textures) ST_STATE(ST_NEW_FS_SAMPLER_VIEWS, st_update_fragment_textures) ST_STATE(ST_NEW_GS_SAMPLER_VIEWS, st_update_geometry_textures) ST_STATE(ST_NEW_TCS_SAMPLER_VIEWS, st_update_tessctrl_textures) ST_STATE(ST_NEW_TES_SAMPLER_VIEWS, st_update_tesseval_textures) @@ -29,22 +26,25 @@ ST_STATE(ST_NEW_TES_SAMPLERS, st_update_tesseval_samplers) /* depends on update_ ST_STATE(ST_NEW_GS_SAMPLERS, st_update_geometry_samplers) /* depends on update_*_texture for swizzle */ ST_STATE(ST_NEW_FS_SAMPLERS, st_update_fragment_samplers) /* depends on update_*_texture for swizzle */ ST_STATE(ST_NEW_VS_IMAGES, st_bind_vs_images) ST_STATE(ST_NEW_TCS_IMAGES, st_bind_tcs_images) ST_STATE(ST_NEW_TES_IMAGES, st_bind_tes_images) ST_STATE(ST_NEW_GS_IMAGES, st_bind_gs_images) ST_STATE(ST_NEW_FS_IMAGES, st_bind_fs_images) ST_STATE(ST_NEW_FB_STATE, st_update_framebuffer_state) /* depends on update_*_texture and bind_*_images */ +ST_STATE(ST_NEW_RASTERIZER, st_update_rasterizer) /* depends on update_framebuffer_state */ ST_STATE(ST_NEW_SAMPLE_MASK, st_update_sample_mask) /* depends on update_framebuffer_state */ ST_STATE(ST_NEW_SAMPLE_SHADING, st_update_sample_shading) +ST_STATE(ST_NEW_SCISSOR, st_update_scissor) /* depends on update_framebuffer_state */ +ST_STATE(ST_NEW_VIEWPORT, st_update_viewport) /* depends on update_framebuffer_state */ ST_STATE(ST_NEW_VS_CONSTANTS, st_update_vs_constants) ST_STATE(ST_NEW_TCS_CONSTANTS, st_update_tcs_constants) ST_STATE(ST_NEW_TES_CONSTANTS, st_update_tes_constants) ST_STATE(ST_NEW_GS_CONSTANTS, st_update_gs_constants) ST_STATE(ST_NEW_FS_CONSTANTS, st_update_fs_constants) ST_STATE(ST_NEW_VS_UBOS, st_bind_vs_ubos) ST_STATE(ST_NEW_TCS_UBOS, st_bind_tcs_ubos) ST_STATE(ST_NEW_TES_UBOS, st_bind_tes_ubos) diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 6e17562..e388960 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -79,21 +79,21 @@ void st_update_rasterizer( struct st_context *st ) if (ctx->Transform.ClipOrigin == GL_UPPER_LEFT) { raster->front_ccw ^= 1; } /* * Gallium's surfaces are Y=0=TOP orientation. OpenGL is the * opposite. Window system surfaces are Y=0=TOP. Mesa's FBOs * must match OpenGL conventions so FBOs use Y=0=BOTTOM. In that * case, we must invert Y and flip the notion of front vs. back. */ - if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) { + if (st->state.fb_orientation == Y_0_BOTTOM) { /* Drawing to an FBO. The viewport will be inverted. */ raster->front_ccw ^= 1; } } /* _NEW_LIGHT */ raster->flatshade = ctx->Light.ShadeModel == GL_FLAT; raster->flatshade_first = ctx->Light.ProvokingVertex == @@ -167,21 +167,21 @@ void st_update_rasterizer( struct st_context *st ) /* _NEW_POINT */ raster->point_size = ctx->Point.Size; raster->point_smooth = !ctx->Point.PointSprite && ctx->Point.SmoothFlag; /* _NEW_POINT | _NEW_PROGRAM */ if (ctx->Point.PointSprite) { /* origin */ if ((ctx->Point.SpriteOrigin == GL_UPPER_LEFT) ^ - (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM)) + (st->state.fb_orientation == Y_0_BOTTOM)) raster->sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT; else raster->sprite_coord_mode = PIPE_SPRITE_COORD_LOWER_LEFT; /* Coord replacement flags. If bit 'k' is set that means * that we need to replace GENERIC[k] attrib with an automatically * computed texture
[Mesa-dev] [PATCH 2/5] st/mesa: remove redundant sample_mask checking
From: Marek Olšákcso does that too --- src/mesa/state_tracker/st_atom_msaa.c | 7 +-- src/mesa/state_tracker/st_context.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_msaa.c b/src/mesa/state_tracker/st_atom_msaa.c index 0bdb9b2..814077f 100644 --- a/src/mesa/state_tracker/st_atom_msaa.c +++ b/src/mesa/state_tracker/st_atom_msaa.c @@ -55,26 +55,21 @@ void st_update_sample_mask( struct st_context *st ) Also, there's an interface restriction here in theory it is encouraged this mask not be the same at each pixel. */ sample_mask = (1 << nr_bits) - 1; if (st->ctx->Multisample.SampleCoverageInvert) sample_mask = ~sample_mask; } if (st->ctx->Multisample.SampleMask) sample_mask &= st->ctx->Multisample.SampleMaskValue; } - /* mask off unused bits or don't care? */ - - if (sample_mask != st->state.sample_mask) { - st->state.sample_mask = sample_mask; - cso_set_sample_mask(st->cso_context, sample_mask); - } + cso_set_sample_mask(st->cso_context, sample_mask); } void st_update_sample_shading( struct st_context *st ) { if (!st->fp) return; if (!st->ctx->Extensions.ARB_sample_shading) return; diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 6497587..2fe9d92 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -149,21 +149,20 @@ struct st_context unsigned fb_height; unsigned fb_num_samples; unsigned fb_num_layers; struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS]; struct pipe_viewport_state viewport[PIPE_MAX_VIEWPORTS]; struct { unsigned num; boolean include; struct pipe_scissor_state rects[PIPE_MAX_WINDOW_RECTANGLES]; } window_rects; - unsigned sample_mask; GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */ GLuint fb_orientation; } state; uint64_t dirty; /**< dirty states */ /** This masks out unused shader resources. Only valid in draw calls. */ uint64_t active_states; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] st/mesa: fix pipe_rasterizer_state::scissor with multiple viewports
From: Marek OlšákCc: 17.1 --- src/mesa/state_tracker/st_atom_rasterizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index e388960..39be6b1 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -255,21 +255,21 @@ void st_update_rasterizer( struct st_context *st ) /* _NEW_MULTISAMPLE | _NEW_BUFFERS */ raster->force_persample_interp = !st->force_persample_in_shader && raster->multisample && ctx->Multisample.SampleShading && ctx->Multisample.MinSampleShadingValue * _mesa_geometric_samples(ctx->DrawBuffer) > 1; /* _NEW_SCISSOR */ - raster->scissor = ctx->Scissor.EnableFlags; + raster->scissor = !!ctx->Scissor.EnableFlags; /* _NEW_FRAG_CLAMP */ raster->clamp_fragment_color = !st->clamp_frag_color_in_shader && ctx->Color._ClampFragmentColor; raster->half_pixel_center = 1; if (st->state.fb_orientation == Y_0_TOP) raster->bottom_edge_rule = 1; /* _NEW_TRANSFORM */ if (ctx->Transform.ClipOrigin == GL_UPPER_LEFT) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] st/mesa: don't set 16 scissors and 16 viewports if they're unused
From: Marek OlšákOnly do so if there is a shader writing gl_ViewportIndex. This removes a lot of CPU overhead for the most common case. --- src/mesa/state_tracker/st_atom.c | 18 ++ src/mesa/state_tracker/st_atom_scissor.c | 10 +++--- src/mesa/state_tracker/st_atom_viewport.c | 11 --- src/mesa/state_tracker/st_context.h | 1 + 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index bcfbcf8..253b508 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -69,20 +69,21 @@ static void check_program_state( struct st_context *st ) struct st_common_program *old_tep = st->tep; struct st_common_program *old_gp = st->gp; struct st_fragment_program *old_fp = st->fp; struct gl_program *new_vp = ctx->VertexProgram._Current; struct gl_program *new_tcp = ctx->TessCtrlProgram._Current; struct gl_program *new_tep = ctx->TessEvalProgram._Current; struct gl_program *new_gp = ctx->GeometryProgram._Current; struct gl_program *new_fp = ctx->FragmentProgram._Current; uint64_t dirty = 0; + unsigned num_viewports = 1; /* Flag states used by both new and old shaders to unbind shader resources * properly when transitioning to shaders that don't use them. */ if (unlikely(new_vp != _vp->Base)) { if (old_vp) dirty |= old_vp->affected_states; if (new_vp) dirty |= ST_NEW_VERTEX_PROGRAM(st, st_vertex_program(new_vp)); } @@ -108,20 +109,37 @@ static void check_program_state( struct st_context *st ) dirty |= st_common_program(new_gp)->affected_states; } if (unlikely(new_fp != _fp->Base)) { if (old_fp) dirty |= old_fp->affected_states; if (new_fp) dirty |= st_fragment_program(new_fp)->affected_states; } + /* Find out the number of viewports. This determines how many scissors +* and viewport states we need to update. +*/ + struct gl_program *last_prim_shader = new_gp ? new_gp : + new_tep ? new_tep : new_vp; + if (last_prim_shader && + last_prim_shader->info.outputs_written & VARYING_BIT_VIEWPORT) + num_viewports = ctx->Const.MaxViewports; + + if (st->state.num_viewports != num_viewports) { + st->state.num_viewports = num_viewports; + dirty |= ST_NEW_VIEWPORT; + + if (ctx->Scissor.EnableFlags & u_bit_consecutive(0, num_viewports)) + dirty |= ST_NEW_SCISSOR; + } + st->dirty |= dirty; } static void check_attrib_edgeflag(struct st_context *st) { const struct gl_vertex_array **arrays = st->ctx->Array._DrawArrays; GLboolean vertdata_edgeflags, edgeflag_culls_prims, edgeflags_enabled; struct gl_program *vp = st->ctx->VertexProgram._Current; if (!arrays) diff --git a/src/mesa/state_tracker/st_atom_scissor.c b/src/mesa/state_tracker/st_atom_scissor.c index ccd6e8e..a87d029 100644 --- a/src/mesa/state_tracker/st_atom_scissor.c +++ b/src/mesa/state_tracker/st_atom_scissor.c @@ -46,21 +46,21 @@ st_update_scissor( struct st_context *st ) { struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS]; const struct gl_context *ctx = st->ctx; const struct gl_framebuffer *fb = ctx->DrawBuffer; const unsigned int fb_width = _mesa_geometric_width(fb); const unsigned int fb_height = _mesa_geometric_height(fb); GLint miny, maxy; unsigned i; bool changed = false; - for (i = 0 ; i < ctx->Const.MaxViewports; i++) { + for (i = 0 ; i < st->state.num_viewports; i++) { scissor[i].minx = 0; scissor[i].miny = 0; scissor[i].maxx = fb_width; scissor[i].maxy = fb_height; if (ctx->Scissor.EnableFlags & (1 << i)) { /* need to be careful here with xmax or ymax < 0 */ GLint xmax = MAX2(0, ctx->Scissor.ScissorArray[i].X + ctx->Scissor.ScissorArray[i].Width); GLint ymax = MAX2(0, ctx->Scissor.ScissorArray[i].Y + ctx->Scissor.ScissorArray[i].Height); @@ -88,22 +88,26 @@ st_update_scissor( struct st_context *st ) scissor[i].miny = miny; scissor[i].maxy = maxy; } if (memcmp([i], >state.scissor[i], sizeof(scissor[0])) != 0) { /* state has changed */ st->state.scissor[i] = scissor[i]; /* struct copy */ changed = true; } } - if (changed) - st->pipe->set_scissor_states(st->pipe, 0, ctx->Const.MaxViewports, scissor); /* activate */ + + if (changed) { + struct pipe_context *pipe = st->pipe; + + pipe->set_scissor_states(pipe, 0, st->state.num_viewports, scissor); + } } void st_update_window_rectangles(struct st_context *st) { struct pipe_scissor_state new_rects[PIPE_MAX_WINDOW_RECTANGLES]; const struct gl_context *ctx = st->ctx; const struct gl_scissor_attrib *scissor = >Scissor; unsigned i; bool changed =
[Mesa-dev] [PATCH 3/5] st/mesa: simplify st_update_viewport
From: Marek Olšák--- src/mesa/state_tracker/st_atom_viewport.c | 34 --- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_viewport.c b/src/mesa/state_tracker/st_atom_viewport.c index 1fc8908..9a9d570 100644 --- a/src/mesa/state_tracker/st_atom_viewport.c +++ b/src/mesa/state_tracker/st_atom_viewport.c @@ -36,47 +36,31 @@ /** * Update the viewport transformation matrix. Depends on: * - viewport pos/size * - depthrange * - window pos/size or FBO size */ void st_update_viewport( struct st_context *st ) { struct gl_context *ctx = st->ctx; - GLfloat yScale, yBias; unsigned i; - /* _NEW_BUFFERS -*/ - if (st->state.fb_orientation == Y_0_TOP) { - /* Drawing to a window. The corresponding gallium surface uses - * Y=0=TOP but OpenGL is Y=0=BOTTOM. So we need to invert the viewport. - */ - yScale = -1; - yBias = (GLfloat)ctx->DrawBuffer->Height; - } - else { - /* Drawing to an FBO where Y=0=BOTTOM, like OpenGL - don't invert */ - yScale = 1.0; - yBias = 0.0; - } /* _NEW_VIEWPORT */ - for (i = 0; i < ctx->Const.MaxViewports; i++) - { - float scale[3], translate[3]; - _mesa_get_viewport_xform(ctx, i, scale, translate); + for (i = 0; i < ctx->Const.MaxViewports; i++) { + float *scale = st->state.viewport[i].scale; + float *translate = st->state.viewport[i].translate; - st->state.viewport[i].scale[0] = scale[0]; - st->state.viewport[i].scale[1] = scale[1] * yScale; - st->state.viewport[i].scale[2] = scale[2]; + _mesa_get_viewport_xform(ctx, i, scale, translate); - st->state.viewport[i].translate[0] = translate[0]; - st->state.viewport[i].translate[1] = translate[1] * yScale + yBias; - st->state.viewport[i].translate[2] = translate[2]; + /* _NEW_BUFFERS */ + if (st->state.fb_orientation == Y_0_TOP) { + scale[1] *= -1; + translate[1] = translate[1] * -1 + st->state.fb_height; + } } cso_set_viewport(st->cso_context, >state.viewport[0]); if (ctx->Const.MaxViewports > 1) st->pipe->set_viewport_states(st->pipe, 1, ctx->Const.MaxViewports - 1, >state.viewport[1]); } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/9] i965: Mark freshly allocate bo as idle
On Friday, June 9, 2017 6:01:32 AM PDT Chris Wilson wrote: > When created, buffers are idle, so mark them as such to save an early > ioctl or mistaken assuming the fresh buffer is busy. > > Signed-off-by: Chris Wilson> Cc: Kenneth Graunke > Cc: Matt Turner > --- > src/mesa/drivers/dri/i965/brw_bufmgr.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > index 2ee57e2a0d..67c15878d0 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > @@ -324,6 +324,7 @@ retry: > goto err; > >bo->size = bo_size; > + bo->idle = true; > >memclear(create); >create.size = bo_size; > I noticed the other day that we weren't using bo->idle anymore. Too much hacking and slashing on my part, I guess...thanks for fixing it. This patch is: Reviewed-by: Kenneth Graunke signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: don't call _mesa_update_clip_plane in the GL core profile
From: Marek OlšákIt uses the projection matrix to transform the clip plane. --- src/mesa/main/enable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index 0324170..0f7cdcd 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -352,21 +352,22 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES || !ctx->DriverFlags.NewClipPlaneEnable) { FLUSH_VERTICES(ctx, _NEW_TRANSFORM); } else { FLUSH_VERTICES(ctx, 0); } ctx->NewDriverState |= ctx->DriverFlags.NewClipPlaneEnable; if (state) { ctx->Transform.ClipPlanesEnabled |= (1 << p); - _mesa_update_clip_plane(ctx, p); + if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) + _mesa_update_clip_plane(ctx, p); } else { ctx->Transform.ClipPlanesEnabled &= ~(1 << p); } } break; case GL_COLOR_MATERIAL: if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES) goto invalid_enum_error; if (ctx->Light.ColorMaterialEnabled == state) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/24] st/mesa: sink code needed for apply_texture_swizzle_to_border_color
On Wed, Jun 14, 2017 at 11:23 PM, Timothy Arceriwrote: > > > On 15/06/17 04:10, Marek Olšák wrote: >> >> On Wed, Jun 14, 2017 at 7:27 PM, Marek Olšák wrote: >>> >>> On Tue, Jun 13, 2017 at 8:10 AM, Timothy Arceri >>> wrote: On 13/06/17 04:18, Marek Olšák wrote: > > > From: Marek Olšák > > AMD SI-VI use this. GFX9 doesn't. We can stop doing this for SI-VI > since > border color swizzling is broken there anyway. The only other user of > this > code is nouveau. Maybe move this comment into the code as a TODO? I was a little confused at first as I thought this commit was meant to make the change. With that: >>> >>> >>> I don't understand. What are you confused about? >> >> >> The commit message talks about radeonsi, but this patch is for >> st/mesa. st/mesa doesn't care which drivers use the codepath. > > > Well how do you intent to stop using this? Why is the commit message for a > st change talking about radeonsi? I was assuming you wanted to eventually > remove this code path from all drivers (or at least skip it for some) in > which case making this a code comment would make sense, otherwise why do you > even talk about this in the commit message? You're right. I'll just remove that commit message. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/24] st/mesa: don't unbind sampler states if none are used
On 15/06/17 03:17, Marek Olšák wrote: On Tue, Jun 13, 2017 at 7:46 AM, Timothy Arceriwrote: On 13/06/17 15:32, Timothy Arceri wrote: On 13/06/17 04:23, Ilia Mirkin wrote: On Mon, Jun 12, 2017 at 2:18 PM, Marek Olšák wrote: From: Marek Olšák --- src/mesa/state_tracker/st_atom_sampler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index f33e334..11db6e1 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -263,21 +263,21 @@ update_shader_samplers(struct st_context *st, struct pipe_sampler_state *samplers, unsigned *num_samplers) { GLbitfield samplers_used = prog->SamplersUsed; GLbitfield free_slots = ~prog->SamplersUsed; GLbitfield external_samplers_used = prog->ExternalSamplersUsed; GLuint unit; const GLuint old_max = *num_samplers; const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS]; - if (*num_samplers == 0 && samplers_used == 0x0) + if (samplers_used == 0x0) return; *num_samplers = 0; Does this still need to get executed even if samplers_used == 0? It seems correct to skip this, otherwise old_max won't be set correctly in the above code the next time we get here. Although it seems we ignore old_max in the following patches anyway because cso_set_samplers() will set things to NULL for us. So maybe it would make sense to set it to 0? Yes, it should be set to 0, but it's not that important (only DrawPixels would be affected), though I think we can just drop tracking num_samplers in st_context and simply rely on num_sampler_views. I'll fix that in a follow-up patch, which I'm gonna send shortly ([25/24]]. In the meantime, I'd like an Rb on this one if there are no other comments. You still have my r-b here. 25 is also: Reviewed-by: Timothy Arceri Thanks, Marek Reviewed-by: Timothy Arceri /* loop over sampler units (aka tex image units) */ for (unit = 0; unit < max_units; unit++, samplers_used >>= 1) { struct pipe_sampler_state *sampler = samplers + unit; if (samplers_used & 1) { const GLuint texUnit = prog->SamplerUnits[unit]; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/24] st/mesa: sink code needed for apply_texture_swizzle_to_border_color
On 15/06/17 04:10, Marek Olšák wrote: On Wed, Jun 14, 2017 at 7:27 PM, Marek Olšákwrote: On Tue, Jun 13, 2017 at 8:10 AM, Timothy Arceri wrote: On 13/06/17 04:18, Marek Olšák wrote: From: Marek Olšák AMD SI-VI use this. GFX9 doesn't. We can stop doing this for SI-VI since border color swizzling is broken there anyway. The only other user of this code is nouveau. Maybe move this comment into the code as a TODO? I was a little confused at first as I thought this commit was meant to make the change. With that: I don't understand. What are you confused about? The commit message talks about radeonsi, but this patch is for st/mesa. st/mesa doesn't care which drivers use the codepath. Well how do you intent to stop using this? Why is the commit message for a st change talking about radeonsi? I was assuming you wanted to eventually remove this code path from all drivers (or at least skip it for some) in which case making this a code comment would make sense, otherwise why do you even talk about this in the commit message? Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] mesa/main: Missing NULL pointer check.
This and the following patch are in no error paths. The result cannot be NULL unless the function is being used incorrectly. I would rather this be left as to segfault than to fail silently. On 15/06/17 02:33, Plamena Manolova wrote: In prepare_target it's plausible that the parameters of _mesa_lookup_texture might be invalid and NULL is returned, so we need a NULL pointer check. CID: 1412566 Signed-off-by: Plamena Manolova--- src/mesa/main/copyimage.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c index 2cb617c..dd97f1a 100644 --- a/src/mesa/main/copyimage.c +++ b/src/mesa/main/copyimage.c @@ -228,14 +228,17 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum target, } else { struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name); + *renderbuffer = NULL; + + if (texObj == NULL) +return; + if (target == GL_TEXTURE_CUBE_MAP) { *texImage = texObj->Image[z][level]; } else { *texImage = _mesa_select_tex_image(texObj, target, level); } - - *renderbuffer = NULL; } } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP
On 06/14/2017 02:38 PM, Jose Fonseca wrote: On 14/06/17 21:21, Marek Olšák wrote: On Wed, Jun 14, 2017 at 10:13 PM, Jose Fonsecawrote: On 14/06/17 21:07, Marek Olšák wrote: On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca wrote: On 14/06/17 17:12, Marek Olšák wrote: On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák wrote: On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca wrote: On 12/06/17 22:56, Marek Olšák wrote: On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca wrote: On 12/06/17 21:25, Marek Olšák wrote: On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca wrote: How does this help exactly? Are applications actually rendering to the same FBO w/ and w/o SRGB decoding? Or is the problem here GL_SRGB_WRITE state getting spuriously dirtied by the application? And even if they do, why is toggling surface views in framebuffer state so expensive? I don't object per se, but it looks like an unusual thing to optimize for. set_framebuffer_state is basically a memory barrier. We have different caches between FB and textures and we have to flush them when a texture is unbound from the framebuffer and set as a sampler view. To keep thing simple, set_framebuffer_state is the barrier. When we change the blend state, the barrier is avoided. Note that the barrier makes set_framebuffer_state a function that is always GPU-bound. I see. And you're sure that the incoming set_framebuffer_state are not spurious? I know cso_context always eliminates redundant pipe_context::set_framebuffer_state calls, but it is perhaps possible that Mesa state tracker is reseting the framebuffer state with different surface views, but that in practice are exactly the same as the previous one? Like I said, it seems odd apps are doing this: it doesn't make much sense to me to change colorspace of the fragments between draws. (Unless some of the assets are already in SRGB and the app is trying to be too smart for its own good to avoid the sRGB->RGB->sRGB.) It seems much more likely that these framebuffer state changes are self-inflicted some where in our stack, than something truly demanded by the app. And if that's the case and we can fix it, then it would be a better solution all around. Yeah the funny part and the reason is that we have a microbenchmark in piglit (drawoverhead) changing this state between draw calls. :) Marek I couldn't find that piglit microbenchmark. mesademos has src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE. So if fbo is changing internally, then it's a perf bug in Mesa state tracker. Unless it's mimicking something that real apps do, then it's probably better to fix the microbenchmark to use a more realistic tests. If you build piglit, it's in bin/drawoverhead. You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is rather artificial and fairly unlikely to occur with real apps. FYI, I'm dropping this series and I don't have it in my repo anymore. piglit/drawoverhead will be updated not to test this state change. Marek Great. BTW, I'm not sure what's a good state to change in such microbenchmark. There is of course, a myriad of states to pick, but they are not all the same: performance can vary wildly depending on the choice. I'm not sure what's a good representative state change in such circumstances Perhaps toggling between two texture objects? Or some sampler state? If you've ever run the microbenchmark, you know there are plenty of state changes tested. I think there are like 15 state changes tested in about 60 subtests at the moment. I'm adding more tests into it. Currently I have 100 subtests in there locally. At the moment the missing subtests are mostly just shader resources: immutable textures (mutable textures i.e. not TexStorage-based are already tested), TBOs, images, image buffers, SSBOs (maybe), atomic counters (maybe). The methodology is 1 state change followed by 1 draw call in a loop, measuring the number of draw calls per second for that case, and comparing with the baseline draw rate (which is without the state change). Marek I just ran it. Pretty neat! I didn't know we were adding benchmarks to piglit. That's because piglit has a very convenient window system integration framework that I refuse to re-invent elsewhere. Ah, makes sense. Which reminds me: do people think we should transition mesademos off glut to glfw or waffle? Or do you think we should just strive to migrate the stuff there to piglit? I'm not sure I see a need. Does anyone use the Mesa demos for benchmarking anymore? And in general, many/most of the Mesa demos have some interactive aspect to them (key presses or mouse input) that isn't available in waffle or piglit (I'm not familiar with glfw). And few of the Mesa demos do pixel probing for correctness. -Brian
Re: [Mesa-dev] [PATCH 04/11] intel/genxml: Rename IndirectStatePointer to BorderColorPointer
Reviewed-by: Rafael AntognolliOn Tue, Jun 13, 2017 at 11:28:23AM -0700, Anuj Phogat wrote: > Signed-off-by: Anuj Phogat > --- > src/intel/genxml/gen10.xml | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml > index 64041c1..06260cf 100644 > --- a/src/intel/genxml/gen10.xml > +++ b/src/intel/genxml/gen10.xml > @@ -900,7 +900,7 @@ > > > > - > + > > type="uint"> > > -- > 2.9.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/7] i965: Stop hanging on Haswell
On Tuesday, June 13, 2017 2:53:20 PM PDT Jason Ekstrand wrote: > As I've been working on converting more things in the GL driver over to > blorp, I've been highly annoyed by all of the hangs on Haswell. About one > in 3-5 Jenkins runs would hang somewhere. After looking at about a > half-dozen error states, I noticed that all of the hangs seemed to be on > fast-clear operations (clear or resolve) that happen at the start of a > batch, right after STATE_BASE_ADDRESS. > > Haswell seems to be a bit more picky than other hardware about having > fast-clear operations in flight at the same time as regular rendering and > hangs if the two ever overlap. (Other hardware can get rendering > corruption but not usually hangs.) Also, Haswell doesn't fully stall if > you just do a RT flush and a CS stall. The hardware docs refer to > something they call an "end of pipe sync" which is a CS stall with a write > to the workaround BO. On Haswell, you also need to read from that same > address to create a memory dependency and make sure the system is fully > stalled. > > When you call brw_blorp_resolve_color it calls brw_emit_pipe_control_flush > and does the correct flushes and then calls into core blorp to do the > actual resolve operation. If the batch doesn't have enough space left in > it for the fast-clear operation, the batch will get split and the > fast-clear will happen in the next batch. I believe what is happening is > that while we're building the second batch that actually contains the > fast-clear, some other process completes a batch and inserts it between our > PIPE_CONTROL to do the stall and the actual fast-clear. We then end up > with more stuff in flight than we can handle and the GPU explodes. > > I'm not 100% convinced of this explanation because it seems a bit fishy > that a context switch wouldn't be enough to fully flush out the GPU. > However, what I do know is that, without these patches I get a hang in one > out of three to five Jenkins runs on my wip/i965-blorp-ds branch. With the > patches (or an older variant that did the same thing), I have done almost 20 > Jenkins runs and have yet to see a hang. I'd call that success. > > Jason Ekstrand (6): > i965: Flush around state base address > i965: Take a uint64_t immediate in emit_pipe_control_write > i965: Unify the two emit_pipe_control functions > i965: Do an end-of-pipe sync prior to STATE_BASE_ADDRESS > i965/blorp: Do an end-of-pipe sync around CCS ops > i965: Do an end-of-pipe sync after flushes > > Topi Pohjolainen (1): > i965: Add an end-of-pipe sync helper > > src/mesa/drivers/dri/i965/brw_blorp.c| 16 +- > src/mesa/drivers/dri/i965/brw_context.h | 3 +- > src/mesa/drivers/dri/i965/brw_misc_state.c | 38 + > src/mesa/drivers/dri/i965/brw_pipe_control.c | 243 > ++- > src/mesa/drivers/dri/i965/brw_queryobj.c | 5 +- > src/mesa/drivers/dri/i965/gen6_queryobj.c| 2 +- > src/mesa/drivers/dri/i965/genX_blorp_exec.c | 2 +- > 7 files changed, 211 insertions(+), 98 deletions(-) > > The series is: Reviewed-by: Kenneth GraunkeIf Chris is right, and what we're really seeing is that MI_SET_CONTEXT needs additional flushing, it probably makes sense to fix the kernel. If it's really fast clear related, then we should do it in Mesa. I'm not sure we'll ever be able to properly determine that. Even if we go the kernel route, we should land patches 1-3. signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/7] i965: Add an end-of-pipe sync helper
On Tuesday, June 13, 2017 2:53:24 PM PDT Jason Ekstrand wrote: > From: Topi Pohjolainen> > v2 (Jason Ekstrand): > - Take a flags parameter to control the flushes > - Refactoring > > Signed-off-by: Topi Pohjolainen > --- > src/mesa/drivers/dri/i965/brw_context.h | 1 + > src/mesa/drivers/dri/i965/brw_pipe_control.c | 96 > +++- > 2 files changed, 96 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 7b9be8a..b137409 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -1641,6 +1641,7 @@ void brw_emit_pipe_control_flush(struct brw_context > *brw, uint32_t flags); > void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, > struct brw_bo *bo, uint32_t offset, > uint64_t imm); > +void brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags); > void brw_emit_mi_flush(struct brw_context *brw); > void brw_emit_post_sync_nonzero_flush(struct brw_context *brw); > void brw_emit_depth_stall_flushes(struct brw_context *brw); > diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c > b/src/mesa/drivers/dri/i965/brw_pipe_control.c > index 39bb9c7..338e4fc 100644 > --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c > +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c > @@ -271,7 +271,6 @@ gen7_emit_cs_stall_flush(struct brw_context *brw) > brw->workaround_bo, 0, 0); > } > > - > /** > * Emits a PIPE_CONTROL with a non-zero post-sync operation, for > * implementing two workarounds on gen6. From section 1.4.7.1 > @@ -320,6 +319,101 @@ brw_emit_post_sync_nonzero_flush(struct brw_context > *brw) > brw->workaround_bo, 0, 0); > } > > +/* > + * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization": > + * > + * Write synchronization is a special case of end-of-pipe > + * synchronization that requires that the render cache and/or depth > + * related caches are flushed to memory, where the data will become > + * globally visible. This type of synchronization is required prior to > + * SW (CPU) actually reading the result data from memory, or initiating > + * an operation that will use as a read surface (such as a texture > + * surface) a previous render target and/or depth/stencil buffer > + * > + * > + * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization": > + * > + * Exercising the write cache flush bits (Render Target Cache Flush > + * Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only > + * ensures the write caches are flushed and doesn't guarantee the data > + * is globally visible. > + * > + * SW can track the completion of the end-of-pipe-synchronization by > + * using "Notify Enable" and "PostSync Operation - Write Immediate > + * Data" in the PIPE_CONTROL command. > + */ > +void > +brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags) > +{ > + if (brw->gen >= 6) { > + /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory": > + * > + *"The most common action to perform upon reaching a > synchronization > + *point is to write a value out to memory. An immediate value > + *(included with the synchronization command) may be written." > + * > + * > + * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization": > + * > + *"In case the data flushed out by the render engine is to be read > + *back in to the render engine in coherent manner, then the render > + *engine has to wait for the fence completion before accessing the > + *flushed data. This can be achieved by following means on various > + *products: PIPE_CONTROL command with CS Stall and the required > + *write caches flushed with Post-Sync-Operation as Write Immediate > + *Data. > + * > + *Example: > + * - Workload-1 (3D/GPGPU/MEDIA) > + * - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate > + * Data, Required Write Cache Flush bits set) > + * - Workload-2 (Can use the data produce or output by > Workload-1) > + */ > + brw_emit_pipe_control_write(brw, > + flags | PIPE_CONTROL_CS_STALL | > + PIPE_CONTROL_WRITE_IMMEDIATE, > + brw->workaround_bo, 0, 0); > + > + if (brw->is_haswell) { > + /* Haswell needs addition work-arounds: > + * > + * From Haswell PRM, volume 2, part 1, "End-of-Pipe > Synchronization": > + * > + *Option 1: > + *PIPE_CONTROL command with the CS Stall and the required
Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP
On 14/06/17 21:21, Marek Olšák wrote: On Wed, Jun 14, 2017 at 10:13 PM, Jose Fonsecawrote: On 14/06/17 21:07, Marek Olšák wrote: On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca wrote: On 14/06/17 17:12, Marek Olšák wrote: On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák wrote: On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca wrote: On 12/06/17 22:56, Marek Olšák wrote: On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca wrote: On 12/06/17 21:25, Marek Olšák wrote: On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca wrote: How does this help exactly? Are applications actually rendering to the same FBO w/ and w/o SRGB decoding? Or is the problem here GL_SRGB_WRITE state getting spuriously dirtied by the application? And even if they do, why is toggling surface views in framebuffer state so expensive? I don't object per se, but it looks like an unusual thing to optimize for. set_framebuffer_state is basically a memory barrier. We have different caches between FB and textures and we have to flush them when a texture is unbound from the framebuffer and set as a sampler view. To keep thing simple, set_framebuffer_state is the barrier. When we change the blend state, the barrier is avoided. Note that the barrier makes set_framebuffer_state a function that is always GPU-bound. I see. And you're sure that the incoming set_framebuffer_state are not spurious? I know cso_context always eliminates redundant pipe_context::set_framebuffer_state calls, but it is perhaps possible that Mesa state tracker is reseting the framebuffer state with different surface views, but that in practice are exactly the same as the previous one? Like I said, it seems odd apps are doing this: it doesn't make much sense to me to change colorspace of the fragments between draws. (Unless some of the assets are already in SRGB and the app is trying to be too smart for its own good to avoid the sRGB->RGB->sRGB.) It seems much more likely that these framebuffer state changes are self-inflicted some where in our stack, than something truly demanded by the app. And if that's the case and we can fix it, then it would be a better solution all around. Yeah the funny part and the reason is that we have a microbenchmark in piglit (drawoverhead) changing this state between draw calls. :) Marek I couldn't find that piglit microbenchmark. mesademos has src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE. So if fbo is changing internally, then it's a perf bug in Mesa state tracker. Unless it's mimicking something that real apps do, then it's probably better to fix the microbenchmark to use a more realistic tests. If you build piglit, it's in bin/drawoverhead. You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is rather artificial and fairly unlikely to occur with real apps. FYI, I'm dropping this series and I don't have it in my repo anymore. piglit/drawoverhead will be updated not to test this state change. Marek Great. BTW, I'm not sure what's a good state to change in such microbenchmark. There is of course, a myriad of states to pick, but they are not all the same: performance can vary wildly depending on the choice. I'm not sure what's a good representative state change in such circumstances Perhaps toggling between two texture objects? Or some sampler state? If you've ever run the microbenchmark, you know there are plenty of state changes tested. I think there are like 15 state changes tested in about 60 subtests at the moment. I'm adding more tests into it. Currently I have 100 subtests in there locally. At the moment the missing subtests are mostly just shader resources: immutable textures (mutable textures i.e. not TexStorage-based are already tested), TBOs, images, image buffers, SSBOs (maybe), atomic counters (maybe). The methodology is 1 state change followed by 1 draw call in a loop, measuring the number of draw calls per second for that case, and comparing with the baseline draw rate (which is without the state change). Marek I just ran it. Pretty neat! I didn't know we were adding benchmarks to piglit. That's because piglit has a very convenient window system integration framework that I refuse to re-invent elsewhere. Ah, makes sense. Which reminds me: do people think we should transition mesademos off glut to glfw or waffle? Or do you think we should just strive to migrate the stuff there to piglit? Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP
On Wed, Jun 14, 2017 at 10:13 PM, Jose Fonsecawrote: > On 14/06/17 21:07, Marek Olšák wrote: >> >> On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca wrote: >>> >>> On 14/06/17 17:12, Marek Olšák wrote: On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák wrote: > > > On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca > wrote: >> >> >> On 12/06/17 22:56, Marek Olšák wrote: >>> >>> >>> >>> On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca >>> wrote: On 12/06/17 21:25, Marek Olšák wrote: > > > > > On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca > wrote: >> >> >> >> >> How does this help exactly? >> >> Are applications actually rendering to the same FBO w/ and w/o >> SRGB >> decoding? >> >> Or is the problem here GL_SRGB_WRITE state getting spuriously >> dirtied >> by >> the >> application? >> >> And even if they do, why is toggling surface views in framebuffer >> state >> so >> expensive? >> >> I don't object per se, but it looks like an unusual thing to >> optimize >> for. >> > > set_framebuffer_state is basically a memory barrier. We have > different > caches between FB and textures and we have to flush them when a > texture is unbound from the framebuffer and set as a sampler view. > To > keep thing simple, set_framebuffer_state is the barrier. When we > change the blend state, the barrier is avoided. Note that the > barrier > makes set_framebuffer_state a function that is always GPU-bound. I see. And you're sure that the incoming set_framebuffer_state are not spurious? I know cso_context always eliminates redundant pipe_context::set_framebuffer_state calls, but it is perhaps possible that Mesa state tracker is reseting the framebuffer state with different surface views, but that in practice are exactly the same as the previous one? Like I said, it seems odd apps are doing this: it doesn't make much sense to me to change colorspace of the fragments between draws. (Unless some of the assets are already in SRGB and the app is trying to be too smart for its own good to avoid the sRGB->RGB->sRGB.) It seems much more likely that these framebuffer state changes are self-inflicted some where in our stack, than something truly demanded by the app. And if that's the case and we can fix it, then it would be a better solution all around. >>> >>> >>> >>> >>> Yeah the funny part and the reason is that we have a microbenchmark >>> in >>> piglit (drawoverhead) changing this state between draw calls. :) >>> >>> Marek >>> >> >> I couldn't find that piglit microbenchmark. mesademos has >> src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE. So if fbo >> is >> changing internally, then it's a perf bug in Mesa state tracker. >> >> Unless it's mimicking something that real apps do, then it's probably >> better >> to fix the microbenchmark to use a more realistic tests. > > > > If you build piglit, it's in bin/drawoverhead. > > You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is > rather artificial and fairly unlikely to occur with real apps. FYI, I'm dropping this series and I don't have it in my repo anymore. piglit/drawoverhead will be updated not to test this state change. Marek >>> >>> >>> >>> Great. >>> >>> BTW, I'm not sure what's a good state to change in such microbenchmark. >>> >>> There is of course, a myriad of states to pick, but they are not all the >>> same: performance can vary wildly depending on the choice. I'm not sure >>> what's a good representative state change in such circumstances Perhaps >>> toggling between two texture objects? Or some sampler state? >> >> >> If you've ever run the microbenchmark, you know there are plenty of >> state changes tested. I think there are like 15 state changes tested >> in about 60 subtests at the moment. I'm adding more tests into it. >> Currently I have 100 subtests in there locally. At the moment the >> missing subtests are mostly just shader resources: immutable textures >> (mutable textures i.e. not TexStorage-based are already tested), TBOs, >>
Re: [Mesa-dev] [PATCH] gallium/radeon: fix initialization of new resource bindless fields
Reviewed-by: Marek OlšákMarek On Wed, Jun 14, 2017 at 9:11 PM, Samuel Pitoiset wrote: > r600_resource objects are not calloc'd. > > Signed-off-by: Samuel Pitoiset > --- > src/gallium/drivers/radeon/r600_buffer_common.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c > b/src/gallium/drivers/radeon/r600_buffer_common.c > index fb74b45d2fa..5336f55cb57 100644 > --- a/src/gallium/drivers/radeon/r600_buffer_common.c > +++ b/src/gallium/drivers/radeon/r600_buffer_common.c > @@ -110,6 +110,8 @@ void r600_init_resource_fields(struct r600_common_screen > *rscreen, > res->bo_size = size; > res->bo_alignment = alignment; > res->flags = 0; > + res->texture_handle_allocated = false; > + res->image_handle_allocated = false; > > switch (res->b.b.usage) { > case PIPE_USAGE_STREAM: > -- > 2.13.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP
On 14/06/17 21:07, Marek Olšák wrote: On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonsecawrote: On 14/06/17 17:12, Marek Olšák wrote: On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák wrote: On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca wrote: On 12/06/17 22:56, Marek Olšák wrote: On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca wrote: On 12/06/17 21:25, Marek Olšák wrote: On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca wrote: How does this help exactly? Are applications actually rendering to the same FBO w/ and w/o SRGB decoding? Or is the problem here GL_SRGB_WRITE state getting spuriously dirtied by the application? And even if they do, why is toggling surface views in framebuffer state so expensive? I don't object per se, but it looks like an unusual thing to optimize for. set_framebuffer_state is basically a memory barrier. We have different caches between FB and textures and we have to flush them when a texture is unbound from the framebuffer and set as a sampler view. To keep thing simple, set_framebuffer_state is the barrier. When we change the blend state, the barrier is avoided. Note that the barrier makes set_framebuffer_state a function that is always GPU-bound. I see. And you're sure that the incoming set_framebuffer_state are not spurious? I know cso_context always eliminates redundant pipe_context::set_framebuffer_state calls, but it is perhaps possible that Mesa state tracker is reseting the framebuffer state with different surface views, but that in practice are exactly the same as the previous one? Like I said, it seems odd apps are doing this: it doesn't make much sense to me to change colorspace of the fragments between draws. (Unless some of the assets are already in SRGB and the app is trying to be too smart for its own good to avoid the sRGB->RGB->sRGB.) It seems much more likely that these framebuffer state changes are self-inflicted some where in our stack, than something truly demanded by the app. And if that's the case and we can fix it, then it would be a better solution all around. Yeah the funny part and the reason is that we have a microbenchmark in piglit (drawoverhead) changing this state between draw calls. :) Marek I couldn't find that piglit microbenchmark. mesademos has src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE. So if fbo is changing internally, then it's a perf bug in Mesa state tracker. Unless it's mimicking something that real apps do, then it's probably better to fix the microbenchmark to use a more realistic tests. If you build piglit, it's in bin/drawoverhead. You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is rather artificial and fairly unlikely to occur with real apps. FYI, I'm dropping this series and I don't have it in my repo anymore. piglit/drawoverhead will be updated not to test this state change. Marek Great. BTW, I'm not sure what's a good state to change in such microbenchmark. There is of course, a myriad of states to pick, but they are not all the same: performance can vary wildly depending on the choice. I'm not sure what's a good representative state change in such circumstances Perhaps toggling between two texture objects? Or some sampler state? If you've ever run the microbenchmark, you know there are plenty of state changes tested. I think there are like 15 state changes tested in about 60 subtests at the moment. I'm adding more tests into it. Currently I have 100 subtests in there locally. At the moment the missing subtests are mostly just shader resources: immutable textures (mutable textures i.e. not TexStorage-based are already tested), TBOs, images, image buffers, SSBOs (maybe), atomic counters (maybe). The methodology is 1 state change followed by 1 draw call in a loop, measuring the number of draw calls per second for that case, and comparing with the baseline draw rate (which is without the state change). Marek I just ran it. Pretty neat! I didn't know we were adding benchmarks to piglit. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] i965/bufmgr: Rename bo_alloc_tiled to bo_alloc_tiled_2d
On Tuesday, June 13, 2017 4:19:00 PM PDT Jason Ekstrand wrote: > --- > src/mesa/drivers/dri/i965/brw_bufmgr.c| 6 ++-- > src/mesa/drivers/dri/i965/brw_bufmgr.h| 12 +++ > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 46 > +-- > src/mesa/drivers/dri/i965/intel_screen.c | 24 +++--- > 4 files changed, 44 insertions(+), 44 deletions(-) Thanks, this is so much nicer. I was looking into orphaning busy storage and the new interface will make it a lot easier to allocate a new tiled BO of the same size and pitch. Series is: Reviewed-by: Kenneth Graunkesignature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP
On 06/14/2017 10:07 PM, Marek Olšák wrote: On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonsecawrote: On 14/06/17 17:12, Marek Olšák wrote: On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák wrote: On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca wrote: On 12/06/17 22:56, Marek Olšák wrote: On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca wrote: On 12/06/17 21:25, Marek Olšák wrote: On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca wrote: How does this help exactly? Are applications actually rendering to the same FBO w/ and w/o SRGB decoding? Or is the problem here GL_SRGB_WRITE state getting spuriously dirtied by the application? And even if they do, why is toggling surface views in framebuffer state so expensive? I don't object per se, but it looks like an unusual thing to optimize for. set_framebuffer_state is basically a memory barrier. We have different caches between FB and textures and we have to flush them when a texture is unbound from the framebuffer and set as a sampler view. To keep thing simple, set_framebuffer_state is the barrier. When we change the blend state, the barrier is avoided. Note that the barrier makes set_framebuffer_state a function that is always GPU-bound. I see. And you're sure that the incoming set_framebuffer_state are not spurious? I know cso_context always eliminates redundant pipe_context::set_framebuffer_state calls, but it is perhaps possible that Mesa state tracker is reseting the framebuffer state with different surface views, but that in practice are exactly the same as the previous one? Like I said, it seems odd apps are doing this: it doesn't make much sense to me to change colorspace of the fragments between draws. (Unless some of the assets are already in SRGB and the app is trying to be too smart for its own good to avoid the sRGB->RGB->sRGB.) It seems much more likely that these framebuffer state changes are self-inflicted some where in our stack, than something truly demanded by the app. And if that's the case and we can fix it, then it would be a better solution all around. Yeah the funny part and the reason is that we have a microbenchmark in piglit (drawoverhead) changing this state between draw calls. :) Marek I couldn't find that piglit microbenchmark. mesademos has src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE. So if fbo is changing internally, then it's a perf bug in Mesa state tracker. Unless it's mimicking something that real apps do, then it's probably better to fix the microbenchmark to use a more realistic tests. If you build piglit, it's in bin/drawoverhead. You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is rather artificial and fairly unlikely to occur with real apps. FYI, I'm dropping this series and I don't have it in my repo anymore. piglit/drawoverhead will be updated not to test this state change. Marek Great. BTW, I'm not sure what's a good state to change in such microbenchmark. There is of course, a myriad of states to pick, but they are not all the same: performance can vary wildly depending on the choice. I'm not sure what's a good representative state change in such circumstances Perhaps toggling between two texture objects? Or some sampler state? If you've ever run the microbenchmark, you know there are plenty of state changes tested. I think there are like 15 state changes tested in about 60 subtests at the moment. I'm adding more tests into it. Currently I have 100 subtests in there locally. At the moment the missing subtests are mostly just shader resources: immutable textures (mutable textures i.e. not TexStorage-based are already tested), TBOs, images, image buffers, SSBOs (maybe), atomic counters (maybe). The methodology is 1 state change followed by 1 draw call in a loop, measuring the number of draw calls per second for that case, and comparing with the baseline draw rate (which is without the state change). And I plan to add bindless support to it. :) Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP
On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonsecawrote: > On 14/06/17 17:12, Marek Olšák wrote: >> >> On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák wrote: >>> >>> On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca >>> wrote: On 12/06/17 22:56, Marek Olšák wrote: > > > On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca > wrote: >> >> >> On 12/06/17 21:25, Marek Olšák wrote: >>> >>> >>> >>> On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca >>> wrote: How does this help exactly? Are applications actually rendering to the same FBO w/ and w/o SRGB decoding? Or is the problem here GL_SRGB_WRITE state getting spuriously dirtied by the application? And even if they do, why is toggling surface views in framebuffer state so expensive? I don't object per se, but it looks like an unusual thing to optimize for. >>> >>> set_framebuffer_state is basically a memory barrier. We have >>> different >>> caches between FB and textures and we have to flush them when a >>> texture is unbound from the framebuffer and set as a sampler view. To >>> keep thing simple, set_framebuffer_state is the barrier. When we >>> change the blend state, the barrier is avoided. Note that the barrier >>> makes set_framebuffer_state a function that is always GPU-bound. >> >> >> >> >> I see. >> >> And you're sure that the incoming set_framebuffer_state are not >> spurious? >> >> I know cso_context always eliminates redundant >> pipe_context::set_framebuffer_state calls, but it is perhaps possible >> that >> Mesa state tracker is reseting the framebuffer state with different >> surface >> views, but that in practice are exactly the same as the previous one? >> >> Like I said, it seems odd apps are doing this: it doesn't make much >> sense >> to >> me to change colorspace of the fragments between draws. (Unless some >> of >> the >> assets are already in SRGB and the app is trying to be too smart for >> its >> own >> good to avoid the sRGB->RGB->sRGB.) It seems much more likely that >> these >> framebuffer state changes are self-inflicted some where in our stack, >> than >> something truly demanded by the app. >> >> And if that's the case and we can fix it, then it would be a better >> solution >> all around. > > > > Yeah the funny part and the reason is that we have a microbenchmark in > piglit (drawoverhead) changing this state between draw calls. :) > > Marek > I couldn't find that piglit microbenchmark. mesademos has src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE. So if fbo is changing internally, then it's a perf bug in Mesa state tracker. Unless it's mimicking something that real apps do, then it's probably better to fix the microbenchmark to use a more realistic tests. >>> >>> >>> If you build piglit, it's in bin/drawoverhead. >>> >>> You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is >>> rather artificial and fairly unlikely to occur with real apps. >> >> >> FYI, I'm dropping this series and I don't have it in my repo anymore. >> piglit/drawoverhead will be updated not to test this state change. >> >> Marek > > > Great. > > BTW, I'm not sure what's a good state to change in such microbenchmark. > > There is of course, a myriad of states to pick, but they are not all the > same: performance can vary wildly depending on the choice. I'm not sure > what's a good representative state change in such circumstances Perhaps > toggling between two texture objects? Or some sampler state? If you've ever run the microbenchmark, you know there are plenty of state changes tested. I think there are like 15 state changes tested in about 60 subtests at the moment. I'm adding more tests into it. Currently I have 100 subtests in there locally. At the moment the missing subtests are mostly just shader resources: immutable textures (mutable textures i.e. not TexStorage-based are already tested), TBOs, images, image buffers, SSBOs (maybe), atomic counters (maybe). The methodology is 1 state change followed by 1 draw call in a loop, measuring the number of draw calls per second for that case, and comparing with the baseline draw rate (which is without the state change). Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [v2 1/7] i965/gen4: Set depth offset when there is stencil attachment only
On Wed, Jun 14, 2017 at 11:18:40AM +0100, Emil Velikov wrote: > Hi Topi, > > On 22 May 2017 at 20:12, Topi Pohjolainenwrote: > > Current version fails to set depthstencil.depth_offset when there > > is only stencil attachment (it does set the intra tile offsets > > though). Fixes piglits: > > > > g45,g965,ilk: depthstencil-render-miplevels 1024 s=z24_s8 > > g45,ilk:depthstencil-render-miplevels 273 s=z24_s8 > > > > CC: mesa-sta...@lists.freedesktop.org > > Signed-off-by: Topi Pohjolainen > > --- > > src/mesa/drivers/dri/i965/brw_misc_state.c | 6 ++ > > 1 file changed, 6 insertions(+) > > > Doesn't seems like this patch has landed. Did it fell through the > cracks, or it's been superseded/other? It is still pending review, I'll let you know when I get to push it. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] i965/gen4: Add support for single layer in alignment workaround
On Wed, Jun 14, 2017 at 10:48:09AM -0700, Ian Romanick wrote: > On 06/09/2017 07:04 AM, Topi Pohjolainen wrote: > > On gen < 6 one doesn't have level or layer specifiers available > > for render and depth targets. In order to support rendering to > > specific level/layer, driver needs to manually offset the surface > > to the desired slice. > > There are, however, alignment restrictions to respect as well and > > in come cases the only option is to use temporary single slice > > surface which driver copies after rendering to the full miptree. > > > > Current alignment workaround introduces new texture images which > > are added to the parent texture object. Texture validation later > > on copies the additional levels back to the surface that contains > > the full mipmap. > > This only works for non-arrayed surfaces and driver currently > > creates new arrayed images in vain - individual layers within the > > newly created are still unaligned the same as before. > > > > This patch drops this mechanism and instead attaches single > > temporary slice into the render buffer. This gets immediately > > copied back to the mipmapped and/or arrayed surface just after > > the render is done. > > > > Sitting on top of earlier series cleaning up the depth buffer > > state, this patch additionally fixes the following piglit tests: > > > > ext_texture_array.copyteximage 2d_array.g45m64 > > ext_texture_array.copyteximage 1d_array.g45m64 > > arb_framebuffer_object.fbo-blit-stretch.g33m64 > > I wish I had noticed this before I had Mark open a bug for it: > > https://bugs.freedesktop.org/show_bug.cgi?id=101414 Sorry about this Ian, it is a wrong call. I blindly took it from the change list after I saw it there every time (I have been churning my isl work a lot in jenkins). I don't even touch i915 driver here. > > > ext_framebuffer_object.fbo-cubemap.g965m64 > > arb_framebuffer_object.fbo-generatemipmap-cubemap.g965m64 > > arb_texture_cube_map.copyteximage cube.g965m64 > > ext_texture_array.copyteximage 1d_array.g965m64 > > ext_texture_array.copyteximage 2d_array.g965m64 > > ext_texture_array.fbo-array.g965m64 > > ext_texture_array.gen-mipmap.g965m64 > > ext_texture_array.fbo-generatemipmap-array.g965m64 > > arb_pixel_buffer_object.texsubimage array pbo.g965m64 > > ext_texture_array.copyteximage 2d_array.ilkm64 > > ext_texture_array.copyteximage 1d_array.ilkm64 > > arb_texture_cube_map.copyteximage cube.ilkm64 > > > > CC: Kenneth Graunke> > CC: Jason Ekstrand > > CC: Ian Romanick > > Signed-off-by: Topi Pohjolainen > > --- > > src/mesa/drivers/dri/i965/brw_draw.c | 51 > > > > src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +- > > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 3 +- > > src/mesa/drivers/dri/i965/intel_fbo.c| 19 + > > src/mesa/drivers/dri/i965/intel_fbo.h| 24 +++ > > src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 2 +- > > src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 7 > > 7 files changed, 99 insertions(+), 11 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > > b/src/mesa/drivers/dri/i965/brw_draw.c > > index 611cb86..cb441c3 100644 > > --- a/src/mesa/drivers/dri/i965/brw_draw.c > > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > > @@ -396,6 +396,56 @@ brw_postdraw_set_buffers_need_resolve(struct > > brw_context *brw) > > } > > > > static void > > +intel_renderbuffer_move_temp_back(struct brw_context *brw, > > + struct intel_renderbuffer *irb) > > +{ > > + if (irb->align_wa_mt == NULL) > > + return; > > + > > + brw_render_cache_set_check_flush(brw, irb->align_wa_mt->bo); > > + > > + intel_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0, > > +irb->mt, > > +irb->Base.Base.TexImage->Level, irb->mt_layer); > > + > > + intel_miptree_reference(>align_wa_mt, NULL); > > + > > + /* Finally restore the x,y to correspond to full miptree. */ > > + intel_renderbuffer_set_draw_offset(irb); > > + > > + /* Make sure render surface state gets re-emitted with updated miptree. > > */ > > + brw->NewGLState |= _NEW_BUFFERS; > > +} > > + > > +static void > > +brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw) > > +{ > > + struct gl_context *ctx = >ctx; > > + struct gl_framebuffer *fb = ctx->DrawBuffer; > > + > > + struct intel_renderbuffer *depth_irb = > > + intel_get_renderbuffer(fb, BUFFER_DEPTH); > > + struct intel_renderbuffer *stencil_irb = > > + intel_get_renderbuffer(fb, BUFFER_STENCIL); > > + > > + if (depth_irb && depth_irb->align_wa_mt) > > + intel_renderbuffer_move_temp_back(brw, depth_irb); > > + > > + if (stencil_irb && stencil_irb->align_wa_mt) > > +
Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP
On 14/06/17 17:12, Marek Olšák wrote: On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšákwrote: On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca wrote: On 12/06/17 22:56, Marek Olšák wrote: On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca wrote: On 12/06/17 21:25, Marek Olšák wrote: On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca wrote: How does this help exactly? Are applications actually rendering to the same FBO w/ and w/o SRGB decoding? Or is the problem here GL_SRGB_WRITE state getting spuriously dirtied by the application? And even if they do, why is toggling surface views in framebuffer state so expensive? I don't object per se, but it looks like an unusual thing to optimize for. set_framebuffer_state is basically a memory barrier. We have different caches between FB and textures and we have to flush them when a texture is unbound from the framebuffer and set as a sampler view. To keep thing simple, set_framebuffer_state is the barrier. When we change the blend state, the barrier is avoided. Note that the barrier makes set_framebuffer_state a function that is always GPU-bound. I see. And you're sure that the incoming set_framebuffer_state are not spurious? I know cso_context always eliminates redundant pipe_context::set_framebuffer_state calls, but it is perhaps possible that Mesa state tracker is reseting the framebuffer state with different surface views, but that in practice are exactly the same as the previous one? Like I said, it seems odd apps are doing this: it doesn't make much sense to me to change colorspace of the fragments between draws. (Unless some of the assets are already in SRGB and the app is trying to be too smart for its own good to avoid the sRGB->RGB->sRGB.) It seems much more likely that these framebuffer state changes are self-inflicted some where in our stack, than something truly demanded by the app. And if that's the case and we can fix it, then it would be a better solution all around. Yeah the funny part and the reason is that we have a microbenchmark in piglit (drawoverhead) changing this state between draw calls. :) Marek I couldn't find that piglit microbenchmark. mesademos has src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE. So if fbo is changing internally, then it's a perf bug in Mesa state tracker. Unless it's mimicking something that real apps do, then it's probably better to fix the microbenchmark to use a more realistic tests. If you build piglit, it's in bin/drawoverhead. You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is rather artificial and fairly unlikely to occur with real apps. FYI, I'm dropping this series and I don't have it in my repo anymore. piglit/drawoverhead will be updated not to test this state change. Marek Great. BTW, I'm not sure what's a good state to change in such microbenchmark. There is of course, a myriad of states to pick, but they are not all the same: performance can vary wildly depending on the choice. I'm not sure what's a good representative state change in such circumstances Perhaps toggling between two texture objects? Or some sampler state? Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] radeonsi: reduce overhead for resident textures which need color decompression
On Wed, Jun 14, 2017 at 1:55 PM, Samuel Pitoisetwrote: > This is done by introducing a separate list. > > si_decompress_textures() is now 5x faster. > > Signed-off-by: Samuel Pitoiset > --- > src/gallium/drivers/radeonsi/si_blit.c| 21 +++-- > src/gallium/drivers/radeonsi/si_descriptors.c | 64 > --- > src/gallium/drivers/radeonsi/si_pipe.c| 4 ++ > src/gallium/drivers/radeonsi/si_pipe.h| 4 +- > 4 files changed, 59 insertions(+), 34 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_blit.c > b/src/gallium/drivers/radeonsi/si_blit.c > index 06a99fbc8a2..b240c4d355e 100644 > --- a/src/gallium/drivers/radeonsi/si_blit.c > +++ b/src/gallium/drivers/radeonsi/si_blit.c > @@ -693,18 +693,13 @@ static void si_check_render_feedback(struct si_context > *sctx) > > static void si_decompress_resident_textures(struct si_context *sctx) > { > - util_dynarray_foreach(>resident_tex_handles, > + util_dynarray_foreach(>resident_tex_needs_color_decompress, > struct si_texture_handle *, tex_handle) { > struct pipe_sampler_view *view = (*tex_handle)->view; > - struct si_sampler_view *sview = (struct si_sampler_view > *)view; > struct r600_texture *tex = (struct r600_texture > *)view->texture; > > - if (view->texture->target == PIPE_BUFFER) > - continue; > - > - if ((*tex_handle)->needs_color_decompress) > - si_decompress_color_texture(sctx, tex, > view->u.tex.first_level, > - view->u.tex.last_level); > + si_decompress_color_texture(sctx, tex, > view->u.tex.first_level, > + view->u.tex.last_level); > } > > util_dynarray_foreach(>resident_tex_needs_depth_decompress, > @@ -722,17 +717,13 @@ static void si_decompress_resident_textures(struct > si_context *sctx) > > static void si_decompress_resident_images(struct si_context *sctx) > { > - util_dynarray_foreach(>resident_img_handles, > + util_dynarray_foreach(>resident_img_needs_color_decompress, > struct si_image_handle *, img_handle) { > struct pipe_image_view *view = &(*img_handle)->view; > struct r600_texture *tex = (struct r600_texture > *)view->resource; > > - if (view->resource->target == PIPE_BUFFER) > - continue; > - > - if ((*img_handle)->needs_color_decompress) > - si_decompress_color_texture(sctx, tex, > view->u.tex.level, > - view->u.tex.level); > + si_decompress_color_texture(sctx, tex, view->u.tex.level, > + view->u.tex.level); > } > } > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > index a8f54e0714a..f9e87530330 100644 > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > @@ -1617,29 +1617,41 @@ static void si_set_polygon_stipple(struct > pipe_context *ctx, > static void > si_resident_handles_update_needs_color_decompress(struct si_context *sctx) > { > + needless new line? For the series: Reviewed-by: Marek Olšák Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/8] i965/gen6: Use isl for hiz
On Wed, Jun 14, 2017 at 10:18:18AM +0300, Pohjolainen, Topi wrote: > On Tue, Jun 13, 2017 at 04:20:02PM -0700, Jason Ekstrand wrote: > > On Tue, Jun 13, 2017 at 4:14 PM, Jason Ekstrand> > wrote: > > > > > On Tue, Jun 13, 2017 at 7:53 AM, Topi Pohjolainen < > > > topi.pohjolai...@gmail.com> wrote: > > > > > >> Signed-off-by: Topi Pohjolainen > > >> --- > > >> src/mesa/drivers/dri/i965/brw_blorp.c | 9 +++-- > > >> src/mesa/drivers/dri/i965/gen6_depth_state.c | 12 +++ > > >> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 50 > > >> ++- > > >> src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 7 ++-- > > >> 4 files changed, 39 insertions(+), 39 deletions(-) > > >> > > >> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > > >> b/src/mesa/drivers/dri/i965/brw_blorp.c > > >> index 4bc53b76b5..b722454703 100644 > > >> --- a/src/mesa/drivers/dri/i965/brw_blorp.c > > >> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > > >> @@ -165,8 +165,13 @@ blorp_surf_for_miptree(struct brw_context *brw, > > >> > > >> surf->aux_usage = intel_miptree_get_aux_isl_usage(brw, mt); > > >> > > >> - struct isl_surf *aux_surf = _surfs[1]; > > >> - intel_miptree_get_aux_isl_surf(brw, mt, surf->aux_usage, aux_surf); > > >> + struct isl_surf *aux_surf; > > >> + if (brw->gen == 6 && mt->hiz_buf) { > > >> + aux_surf = >hiz_buf->aux_base.surf; > > >> + } else { > > >> + aux_surf = _surfs[1]; > > >> + intel_miptree_get_aux_isl_surf(brw, mt, surf->aux_usage, > > >> aux_surf); > > >> > > > > > > This is a bit awkward. Maybe just make intel_miptree_get_aux_isl_surf > > > return the surf from hiz_buf on gen6? Not that it matters much since I > > > have a feeling this is all going away in the future. > > I'd like to keep intel_miptree_get_aux_isl_surf() unchanged, I'm throwing it > out later and it is clearer when I don't need to move anything back from it. > > > > > > > > > >> + } > > >> > > >> if (wants_resolve) { > > >>bool supports_aux = surf->aux_usage != ISL_AUX_USAGE_NONE && > > >> diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c > > >> b/src/mesa/drivers/dri/i965/gen6_depth_state.c > > >> index 0d8785db65..0f5e4d3201 100644 > > >> --- a/src/mesa/drivers/dri/i965/gen6_depth_state.c > > >> +++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c > > >> @@ -165,18 +165,14 @@ gen6_emit_depth_stencil_hiz(struct brw_context > > >> *brw, > > >>/* Emit hiz buffer. */ > > >>if (hiz) { > > >> assert(depth_mt); > > >> - struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt; > > >> > > >> - assert(hiz_mt->array_layout == GEN6_HIZ_STENCIL); > > >> - > > >> - const uint32_t offset = intel_miptree_get_aligned_offset( > > >> -hiz_mt, > > >> -hiz_mt->level[lod].level_x, > > >> -hiz_mt->level[lod].level_y); > > >> + uint32_t offset; > > >> + isl_surf_get_image_offset_B_tile_sa(_mt->hiz_buf->aux > > >> _base.surf, > > >> + lod, 0, 0, , NULL, > > >> NULL); > > >> > > >> BEGIN_BATCH(3); > > >> OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); > > >> -OUT_BATCH(depth_mt->hiz_buf->aux_base.pitch - 1); > > >> +OUT_BATCH(depth_mt->hiz_buf->aux_base.surf.row_pitch - 1); > > >> OUT_RELOC(depth_mt->hiz_buf->aux_base.bo, > > >>I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, > > >>offset); > > >> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > >> index d87dbfaacd..4dbf853eee 100644 > > >> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > >> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > >> @@ -1058,10 +1058,7 @@ intel_miptree_hiz_buffer_free(struct > > >> intel_miptree_hiz_buffer *hiz_buf) > > >> if (hiz_buf == NULL) > > >>return; > > >> > > >> - if (hiz_buf->mt) > > >> - intel_miptree_release(_buf->mt); > > >> - else > > >> - brw_bo_unreference(hiz_buf->aux_base.bo); > > >> + brw_bo_unreference(hiz_buf->aux_base.bo); > > >> > > >> free(hiz_buf); > > >> } > > >> @@ -2007,34 +2004,39 @@ intel_hiz_miptree_buf_create(struct brw_context > > >> *brw, > > >> struct intel_mipmap_tree *mt) > > >> { > > >> struct intel_miptree_hiz_buffer *buf = calloc(sizeof(*buf), 1); > > >> - uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; > > >> + if (!buf) > > >> + return NULL; > > >> > > >> - if (brw->gen == 6) > > >> - layout_flags |= MIPTREE_LAYOUT_GEN6_HIZ_STENCIL; > > >> + struct isl_surf_init_info init_info = { > > >> + .dim = get_isl_surf_dim(mt->target), > > >> + .format = ISL_FORMAT_HIZ, > > >> + .width = mt->logical_width0, > > >> +
Re: [Mesa-dev] [Mesa-stable] [PATCH 08/11] i965/blorp: Do a depth flush/stall prior to HiZ operations
On 14 June 2017 at 18:15, Jason Ekstrandwrote: > On Wed, Jun 14, 2017 at 10:12 AM, Jason Ekstrand > wrote >> >> On Wed, Jun 14, 2017 at 3:51 AM, Emil Velikov >> wrote: >>> >>> Hi Jason, >>> >>> On 7 June 2017 at 06:00, Jason Ekstrand wrote: >>> > Without this stall, the test group ES3-CTS.functional.fbo.msaa.\* hangs >>> > about 1 out of every 2 or 3 times on my Sky Lake GT3 laptop. With the >>> > flush and stall, I can run it 6 times in a row without a hang. >>> > >>> > Cc: "17.1" >>> > --- >>> > src/mesa/drivers/dri/i965/brw_blorp.c | 17 + >>> > 1 file changed, 17 insertions(+) >>> > >>> Doesn't seem like this patch has landed, has it? Or perhaps it's >>> superseded by another commit? >> >> >> No, I just have too much in flight and need to actually push my patches. >> :-) I'll try and get this landed today. > > > Scratch that... It was superseded by > acbd02450bfd53f61bbe468a6f0e8bf5e4507095 and friends. > Great, I've already have it (& Co) in the queue. Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa/tests: add GL_ARB_bindless_texture entry points
See https://lists.freedesktop.org/archives/mesa-dev/2017-June/159273.html. Thanks. On 06/14/2017 09:33 PM, Ian Romanick wrote: From: Ian RomanickShould have been part of commit 5f249b9 "mapi: add GL_ARB_bindless_texture entry points" Signed-off-by: Ian Romanick Cc: Samuel Pitoiset Cc: Nicolai Hähnle Cc: Mark Janes --- src/mesa/main/tests/dispatch_sanity.cpp | 18 ++ 1 file changed, 18 insertions(+) diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 47d0aa6..408c813 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -965,6 +965,24 @@ const struct function common_desktop_functions_possible[] = { { "glBufferPageCommitmentARB", 43, -1 }, { "glNamedBufferPageCommitmentARB", 43, -1 }, + /* GL_ARB_bindless_texture */ + { "glGetTextureHandleARB", 45, -1 }, + { "glGetTextureSamplerHandleARB", 45, -1 }, + { "glMakeTextureHandleResidentARB", 45, -1 }, + { "glMakeTextureHandleNonResidentARB", 45, -1 }, + { "glGetImageHandleARB", 45, -1 }, + { "glMakeImageHandleResidentARB", 45, -1 }, + { "glMakeImageHandleNonResidentARB", 45, -1 }, + { "glUniformHandleui64ARB", 45, -1 }, + { "glUniformHandleui64vARB", 45, -1 }, + { "glProgramUniformHandleui64ARB", 45, -1 }, + { "glProgramUniformHandleui64vARB", 45, -1 }, + { "glIsTextureHandleResidentARB", 45, -1 }, + { "glIsImageHandleResidentARB", 45, -1 }, + { "glVertexAttribL1ui64ARB", 45, -1 }, + { "glVertexAttribL1ui64vARB", 45, -1 }, + { "glGetVertexAttribLui64vARB", 45, -1 }, + { NULL, 0, -1 } }; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa/tests: add GL_ARB_bindless_texture entry points
From: Ian RomanickShould have been part of commit 5f249b9 "mapi: add GL_ARB_bindless_texture entry points" Signed-off-by: Ian Romanick Cc: Samuel Pitoiset Cc: Nicolai Hähnle Cc: Mark Janes --- src/mesa/main/tests/dispatch_sanity.cpp | 18 ++ 1 file changed, 18 insertions(+) diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 47d0aa6..408c813 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -965,6 +965,24 @@ const struct function common_desktop_functions_possible[] = { { "glBufferPageCommitmentARB", 43, -1 }, { "glNamedBufferPageCommitmentARB", 43, -1 }, + /* GL_ARB_bindless_texture */ + { "glGetTextureHandleARB", 45, -1 }, + { "glGetTextureSamplerHandleARB", 45, -1 }, + { "glMakeTextureHandleResidentARB", 45, -1 }, + { "glMakeTextureHandleNonResidentARB", 45, -1 }, + { "glGetImageHandleARB", 45, -1 }, + { "glMakeImageHandleResidentARB", 45, -1 }, + { "glMakeImageHandleNonResidentARB", 45, -1 }, + { "glUniformHandleui64ARB", 45, -1 }, + { "glUniformHandleui64vARB", 45, -1 }, + { "glProgramUniformHandleui64ARB", 45, -1 }, + { "glProgramUniformHandleui64vARB", 45, -1 }, + { "glIsTextureHandleResidentARB", 45, -1 }, + { "glIsImageHandleResidentARB", 45, -1 }, + { "glVertexAttribL1ui64ARB", 45, -1 }, + { "glVertexAttribL1ui64vARB", 45, -1 }, + { "glGetVertexAttribLui64vARB", 45, -1 }, + { NULL, 0, -1 } }; -- 2.9.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] mesa/util: add util_dynarray_clear() helper
On Wed, Jun 14, 2017 at 9:08 PM, Samuel Pitoisetwrote: > > > On 06/14/2017 08:24 PM, Marek Olšák wrote: >> >> On Wed, Jun 14, 2017 at 1:55 PM, Samuel Pitoiset >> wrote: >>> >>> Signed-off-by: Samuel Pitoiset >>> --- >>> src/util/u_dynarray.h | 6 ++ >>> 1 file changed, 6 insertions(+) >>> >>> diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h >>> index 57f96ff79c0..cc316323f28 100644 >>> --- a/src/util/u_dynarray.h >>> +++ b/src/util/u_dynarray.h >>> @@ -68,6 +68,12 @@ util_dynarray_fini(struct util_dynarray *buf) >>> } >>> } >>> >>> +static inline void >>> +util_dynarray_clear(struct util_dynarray *buf) >> >> >> util_dynarray_init? "clear" sounds like "free". >> > > There is already util_dynarray_init(), but it resets all fields to 0 > including 'capacity', same for util_dynarray_fini(). > > The idea behind util_dynarray_clear() is to avoid extra reallocations. > > Though, the number of color/depth textures which need decompression is most > likely small, so using util_dynarray_fini() shouldn't impact too much. What > do you think? "clear" sounds good then. Marek > > >> Marek >> >>> +{ >>> + buf->size = 0; >>> +} >>> + >>> #define DYN_ARRAY_INITIAL_SIZE 64 >>> >>> /* use util_dynarray_trim to reduce the allocated storage */ >>> -- >>> 2.13.1 >>> >>> ___ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallium/radeon: fix initialization of new resource bindless fields
r600_resource objects are not calloc'd. Signed-off-by: Samuel Pitoiset--- src/gallium/drivers/radeon/r600_buffer_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index fb74b45d2fa..5336f55cb57 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -110,6 +110,8 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen, res->bo_size = size; res->bo_alignment = alignment; res->flags = 0; + res->texture_handle_allocated = false; + res->image_handle_allocated = false; switch (res->b.b.usage) { case PIPE_USAGE_STREAM: -- 2.13.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] mesa/util: add util_dynarray_clear() helper
On 06/14/2017 08:24 PM, Marek Olšák wrote: On Wed, Jun 14, 2017 at 1:55 PM, Samuel Pitoisetwrote: Signed-off-by: Samuel Pitoiset --- src/util/u_dynarray.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h index 57f96ff79c0..cc316323f28 100644 --- a/src/util/u_dynarray.h +++ b/src/util/u_dynarray.h @@ -68,6 +68,12 @@ util_dynarray_fini(struct util_dynarray *buf) } } +static inline void +util_dynarray_clear(struct util_dynarray *buf) util_dynarray_init? "clear" sounds like "free". There is already util_dynarray_init(), but it resets all fields to 0 including 'capacity', same for util_dynarray_fini(). The idea behind util_dynarray_clear() is to avoid extra reallocations. Though, the number of color/depth textures which need decompression is most likely small, so using util_dynarray_fini() shouldn't impact too much. What do you think? Marek +{ + buf->size = 0; +} + #define DYN_ARRAY_INITIAL_SIZE 64 /* use util_dynarray_trim to reduce the allocated storage */ -- 2.13.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/11] i965: Use blorp for depth/stencil clears on gen6+
On Tue, Jun 06, 2017 at 10:00:06PM -0700, Jason Ekstrand wrote: > --- > src/mesa/drivers/dri/i965/brw_blorp.c | 106 > ++ > src/mesa/drivers/dri/i965/brw_blorp.h | 4 ++ > src/mesa/drivers/dri/i965/brw_clear.c | 6 ++ > 3 files changed, 116 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > b/src/mesa/drivers/dri/i965/brw_blorp.c > index 38925d9..a46b624 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.c > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > @@ -930,6 +930,112 @@ brw_blorp_clear_color(struct brw_context *brw, struct > gl_framebuffer *fb, > } > > void > +brw_blorp_clear_depth_stencil(struct brw_context *brw, > + struct gl_framebuffer *fb, > + GLbitfield mask, bool partial_clear) > +{ > + const struct gl_context *ctx = >ctx; > + struct gl_renderbuffer *depth_rb = > + fb->Attachment[BUFFER_DEPTH].Renderbuffer; > + struct gl_renderbuffer *stencil_rb = > + fb->Attachment[BUFFER_STENCIL].Renderbuffer; > + > + if (!depth_rb || ctx->Depth.Mask == GL_FALSE) > + mask &= ~BUFFER_BIT_DEPTH; > + > + if (!stencil_rb || (ctx->Stencil.WriteMask[0] & 0xff) == 0) > + mask &= ~BUFFER_BIT_STENCIL; > + > + if (!(mask & (BUFFER_BITS_DEPTH_STENCIL))) > + return; > + > + uint32_t x0, x1, y0, y1, rb_name, rb_height; > + if (depth_rb) { > + rb_name = depth_rb->Name; > + rb_height = depth_rb->Height; > + if (stencil_rb) { > + assert(depth_rb->Width == stencil_rb->Width); > + assert(depth_rb->Height == stencil_rb->Height); > + } > + } else { > + assert(stencil_rb); > + rb_name = stencil_rb->Name; > + rb_height = stencil_rb->Height; > + } > + > + x0 = fb->_Xmin; > + x1 = fb->_Xmax; > + if (rb_name != 0) { > + y0 = fb->_Ymin; > + y1 = fb->_Ymax; > + } else { > + y0 = rb_height - fb->_Ymax; > + y1 = rb_height - fb->_Ymin; > + } > + > + /* If the clear region is empty, just return. */ > + if (x0 == x1 || y0 == y1) > + return; > + > + unsigned level, layer, num_layers; > + struct isl_surf isl_tmp[4]; > + struct blorp_surf depth_surf, stencil_surf; > + > + if (mask & BUFFER_BIT_DEPTH) { > + struct intel_renderbuffer *irb = intel_renderbuffer(depth_rb); > + struct intel_mipmap_tree *depth_mt = > + find_miptree(GL_DEPTH_BUFFER_BIT, irb); > + > + level = irb->mt_level; > + layer = irb_logical_mt_layer(irb); > + num_layers = fb->MaxNumLayers ? irb->layer_count : 1; > + > + intel_miptree_set_all_slices_need_depth_resolve(depth_mt, level); > + > + unsigned depth_level = level; > + blorp_surf_for_miptree(brw, _surf, depth_mt, true, > + (1 << ISL_AUX_USAGE_HIZ), > + _level, layer, num_layers, _tmp[0]); > + assert(depth_level == level); > + } > + > + uint8_t stencil_mask = 0; > + if (mask & BUFFER_BIT_STENCIL) { > + struct intel_renderbuffer *irb = intel_renderbuffer(stencil_rb); > + struct intel_mipmap_tree *stencil_mt = > + find_miptree(GL_STENCIL_BUFFER_BIT, irb); > + > + if (mask & BUFFER_BIT_DEPTH) { > + assert(level == irb->mt_level); > + assert(layer == irb_logical_mt_layer(irb)); > + assert(num_layers == fb->MaxNumLayers ? irb->layer_count : 1); > + } else { > + level = irb->mt_level; > + layer = irb_logical_mt_layer(irb); > + num_layers = fb->MaxNumLayers ? irb->layer_count : 1; > + } > + > + stencil_mask = ctx->Stencil.WriteMask[0] & 0xff; > + > + unsigned stencil_level = level; > + blorp_surf_for_miptree(brw, _surf, stencil_mt, true, > + (1 << ISL_AUX_USAGE_HIZ), Why do we set hiz for stencil? I noticed that anv_blorp.c::anv_CmdClearDepthStencilImage() sets it to NONE for depth and stencil while get_blorp_surf_for_anv_image() has code to take the HIZ usage away for stencil (if given). Otherwise looks good to me: Reviewed-by: Topi Pohjolainen> + _level, layer, num_layers, _tmp[2]); > + } > + > + assert((mask & BUFFER_BIT_DEPTH) || stencil_mask); > + > + struct blorp_batch batch; > + blorp_batch_init(>blorp, , brw, 0); > + blorp_clear_depth_stencil(, _surf, _surf, > + level, layer, num_layers, > + x0, y0, x1, y1, > + (mask & BUFFER_BIT_DEPTH), ctx->Depth.Clear, > + stencil_mask, ctx->Stencil.Clear); > + blorp_batch_finish(); > +} > + > +void > brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree > *mt, > unsigned level, unsigned layer) > { > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h > b/src/mesa/drivers/dri/i965/brw_blorp.h > index 8743d96..868301f 100644 > ---
Re: [Mesa-dev] [PATCH 01/15] i965/miptree: Refactor mapping table alloc
On Tue, Jun 13, 2017 at 04:31:26PM -0700, Nanley Chery wrote: > On Tue, Jun 13, 2017 at 05:49:59PM +0300, Topi Pohjolainen wrote: > > Signed-off-by: Topi Pohjolainen> > --- > > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29 > > +-- > > 1 file changed, 27 insertions(+), 2 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > index 253d833b13..78a223a7f3 100644 > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > @@ -285,6 +285,26 @@ intel_depth_format_for_depthstencil_format(mesa_format > > format) { > > } > > } > > > > +static bool > > +create_mapping_table(GLenum target, unsigned first_level, unsigned > > last_level, > > + unsigned depth0, struct intel_mipmap_level *table) > > +{ > > + for (unsigned level = first_level; level <= last_level; level++) { > > + const unsigned d = target == GL_TEXTURE_3D ? depth0 >> level : > > depth0; > > There's a bug here. If the target is GL_TEXTURE_3D we should > minify(depth0, level) to avoid setting a depth of 0. Oops, definitely. > > This seems to be more than a refactor. Prior to this patch, > brw_miptree_layout_gen6_hiz_stencil wouldn't shrink the number of slices > per mipmap level as the level increases, but does so now. Right. I actually missed that. How do want to handle that? I could write a patch against brw_miptree_layout_gen6_hiz_stencil() doing the same thing there (modifying the argument given to intel_miptree_set_level_info() but keeping actual allocation size as it was in order to have space for level 0 qpitch). > > -Nanley > > > + > > + table[level].slice = calloc(d, sizeof(*table[0].slice)); > > + if (!table[level].slice) > > + goto unwind; > > + } > > + > > + return true; > > + > > +unwind: > > + for (unsigned level = first_level; level <= last_level; level++) > > + free(table[level].slice); > > + > > + return false; > > +} > > > > /** > > * @param for_bo Indicates that the caller is > > @@ -424,6 +444,12 @@ intel_miptree_create_layout(struct brw_context *brw, > >} > > } > > > > + if (!create_mapping_table(target, first_level, last_level, depth0, > > + mt->level)) { > > + free(mt); > > + return NULL; > > + } > > + > > /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 > > can > > * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces on > > * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is > > still > > @@ -1103,9 +1129,8 @@ intel_miptree_set_level_info(struct intel_mipmap_tree > > *mt, > > DBG("%s level %d, depth %d, offset %d,%d\n", __func__, > > level, d, x, y); > > > > - assert(mt->level[level].slice == NULL); > > + assert(mt->level[level].slice); > > > > - mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice)); > > mt->level[level].slice[0].x_offset = mt->level[level].level_x; > > mt->level[level].slice[0].y_offset = mt->level[level].level_y; > > } > > -- > > 2.11.0 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] mesa/util: add util_dynarray_clear() helper
2017-06-14 13:55 GMT+02:00 Samuel Pitoiset: > Signed-off-by: Samuel Pitoiset > --- > src/util/u_dynarray.h | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h > index 57f96ff79c0..cc316323f28 100644 > --- a/src/util/u_dynarray.h > +++ b/src/util/u_dynarray.h > @@ -68,6 +68,12 @@ util_dynarray_fini(struct util_dynarray *buf) > } > } > > +static inline void > +util_dynarray_clear(struct util_dynarray *buf) > +{ > + buf->size = 0; > +} > + The indentation looks a bit off here? Also, I agree with Marek; clear indicates to me that the memory is either freed or that it is cleared to NULL. > #define DYN_ARRAY_INITIAL_SIZE 64 > > /* use util_dynarray_trim to reduce the allocated storage */ > -- > 2.13.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/15] i965: Prepare slice validator for isl based miptrees
On Wed, Jun 14, 2017 at 11:28:39AM -0700, Nanley Chery wrote: > On Wed, Jun 14, 2017 at 10:36:16AM +0300, Pohjolainen, Topi wrote: > > On Tue, Jun 13, 2017 at 05:26:52PM -0700, Nanley Chery wrote: > > > On Tue, Jun 13, 2017 at 05:50:04PM +0300, Topi Pohjolainen wrote: > > > > Signed-off-by: Topi Pohjolainen> > > > --- > > > > src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 8 +++- > > > > 1 file changed, 7 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > > > index 8479b285cb..0b85bc12ef 100644 > > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > > > @@ -748,7 +748,13 @@ intel_miptree_check_level_layer(const struct > > > > intel_mipmap_tree *mt, > > > > > > > > assert(level >= mt->first_level); > > > > assert(level <= mt->last_level); > > > > - assert(layer < mt->level[level].depth); > > > > + > > > > + if (mt->surf.size > 0) > > > > + assert(layer < (mt->surf.dim == ISL_SURF_DIM_3D ? > > > > + mt->surf.phys_level0_sa.depth : > > > > + mt->surf.phys_level0_sa.array_len)); > > > > > > Did you mean to access mt->surf.logical_level0_px here? > > > > I was just about to say that "Actually no, mt->level[level].depth represents > > the number of physical layers." > > You may be right. I just expected the other field would be accessed > because in the previous patch you access logical_level0_px instead of > phys_level0_sa. I'm not very experienced with this code so I may have > missed some detail. Funny, I was just double checking this myself :) Things actually are as they used to be, "mt->level[level].depth" is still based on physical depth. It will go away altogether once I'm done with color surfaces. > > > But now reading the current logic I remembered > > that Jason just recently changed all that. This is based on the way it was > > before. Thanks Nanley! > > > > > > > > -Nanley > > > > > > > + else > > > > + assert(layer < mt->level[level].depth); > > > > } > > > > > > > > void intel_miptree_reference(struct intel_mipmap_tree **dst, > > > > -- > > > > 2.11.0 > > > > > > > > ___ > > > > mesa-dev mailing list > > > > mesa-dev@lists.freedesktop.org > > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/15] i965: Prepare slice validator for isl based miptrees
On Wed, Jun 14, 2017 at 10:36:16AM +0300, Pohjolainen, Topi wrote: > On Tue, Jun 13, 2017 at 05:26:52PM -0700, Nanley Chery wrote: > > On Tue, Jun 13, 2017 at 05:50:04PM +0300, Topi Pohjolainen wrote: > > > Signed-off-by: Topi Pohjolainen> > > --- > > > src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 8 +++- > > > 1 file changed, 7 insertions(+), 1 deletion(-) > > > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > > index 8479b285cb..0b85bc12ef 100644 > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > > @@ -748,7 +748,13 @@ intel_miptree_check_level_layer(const struct > > > intel_mipmap_tree *mt, > > > > > > assert(level >= mt->first_level); > > > assert(level <= mt->last_level); > > > - assert(layer < mt->level[level].depth); > > > + > > > + if (mt->surf.size > 0) > > > + assert(layer < (mt->surf.dim == ISL_SURF_DIM_3D ? > > > + mt->surf.phys_level0_sa.depth : > > > + mt->surf.phys_level0_sa.array_len)); > > > > Did you mean to access mt->surf.logical_level0_px here? > > I was just about to say that "Actually no, mt->level[level].depth represents > the number of physical layers." You may be right. I just expected the other field would be accessed because in the previous patch you access logical_level0_px instead of phys_level0_sa. I'm not very experienced with this code so I may have missed some detail. > But now reading the current logic I remembered > that Jason just recently changed all that. This is based on the way it was > before. Thanks Nanley! > > > > > -Nanley > > > > > + else > > > + assert(layer < mt->level[level].depth); > > > } > > > > > > void intel_miptree_reference(struct intel_mipmap_tree **dst, > > > -- > > > 2.11.0 > > > > > > ___ > > > mesa-dev mailing list > > > mesa-dev@lists.freedesktop.org > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] mesa/util: add util_dynarray_clear() helper
On Wed, Jun 14, 2017 at 1:55 PM, Samuel Pitoisetwrote: > Signed-off-by: Samuel Pitoiset > --- > src/util/u_dynarray.h | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h > index 57f96ff79c0..cc316323f28 100644 > --- a/src/util/u_dynarray.h > +++ b/src/util/u_dynarray.h > @@ -68,6 +68,12 @@ util_dynarray_fini(struct util_dynarray *buf) > } > } > > +static inline void > +util_dynarray_clear(struct util_dynarray *buf) util_dynarray_init? "clear" sounds like "free". Marek > +{ > + buf->size = 0; > +} > + > #define DYN_ARRAY_INITIAL_SIZE 64 > > /* use util_dynarray_trim to reduce the allocated storage */ > -- > 2.13.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/radeon: add a new HUD query for the number of resident handles
Reviewed-by: Marek OlšákMarek On Wed, Jun 14, 2017 at 11:40 AM, Samuel Pitoiset wrote: > Useful for debugging performance issues when ARB_bindless_texture > is enabled. This query doesn't make a distinction between texture > and image handles. > > Signed-off-by: Samuel Pitoiset > --- > src/gallium/drivers/radeon/r600_pipe_common.h | 1 + > src/gallium/drivers/radeon/r600_query.c | 7 +++ > src/gallium/drivers/radeon/r600_query.h | 1 + > src/gallium/drivers/radeonsi/si_descriptors.c | 3 +++ > 4 files changed, 12 insertions(+) > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h > b/src/gallium/drivers/radeon/r600_pipe_common.h > index 45ed5bab74b..006b795fe29 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.h > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h > @@ -598,6 +598,7 @@ struct r600_common_context { > unsignednum_fb_cache_flushes; > unsignednum_L2_invalidates; > unsignednum_L2_writebacks; > + unsignednum_resident_handles; > uint64_tnum_alloc_tex_transfer_bytes; > unsignedlast_tex_ps_draw_ratio; /* for query > */ > > diff --git a/src/gallium/drivers/radeon/r600_query.c > b/src/gallium/drivers/radeon/r600_query.c > index bce43171e30..8bd94e6a940 100644 > --- a/src/gallium/drivers/radeon/r600_query.c > +++ b/src/gallium/drivers/radeon/r600_query.c > @@ -134,6 +134,9 @@ static bool r600_query_sw_begin(struct > r600_common_context *rctx, > case R600_QUERY_NUM_L2_WRITEBACKS: > query->begin_result = rctx->num_L2_writebacks; > break; > + case R600_QUERY_NUM_RESIDENT_HANDLES: > + query->begin_result = rctx->num_resident_handles; > + break; > case R600_QUERY_TC_OFFLOADED_SLOTS: > query->begin_result = rctx->tc ? > rctx->tc->num_offloaded_slots : 0; > break; > @@ -276,6 +279,9 @@ static bool r600_query_sw_end(struct r600_common_context > *rctx, > case R600_QUERY_NUM_L2_WRITEBACKS: > query->end_result = rctx->num_L2_writebacks; > break; > + case R600_QUERY_NUM_RESIDENT_HANDLES: > + query->end_result = rctx->num_resident_handles; > + break; > case R600_QUERY_TC_OFFLOADED_SLOTS: > query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots > : 0; > break; > @@ -1834,6 +1840,7 @@ static struct pipe_driver_query_info > r600_driver_query_list[] = { > X("num-fb-cache-flushes", NUM_FB_CACHE_FLUSHES, UINT64, > AVERAGE), > X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, > AVERAGE), > X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, > AVERAGE), > + X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, > AVERAGE), > X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, > AVERAGE), > X("tc-direct-slots",TC_DIRECT_SLOTS,UINT64, > AVERAGE), > X("tc-num-syncs", TC_NUM_SYNCS, UINT64, > AVERAGE), > diff --git a/src/gallium/drivers/radeon/r600_query.h > b/src/gallium/drivers/radeon/r600_query.h > index ed607ec199b..9e6617f342a 100644 > --- a/src/gallium/drivers/radeon/r600_query.h > +++ b/src/gallium/drivers/radeon/r600_query.h > @@ -54,6 +54,7 @@ enum { > R600_QUERY_NUM_FB_CACHE_FLUSHES, > R600_QUERY_NUM_L2_INVALIDATES, > R600_QUERY_NUM_L2_WRITEBACKS, > + R600_QUERY_NUM_RESIDENT_HANDLES, > R600_QUERY_TC_OFFLOADED_SLOTS, > R600_QUERY_TC_DIRECT_SLOTS, > R600_QUERY_TC_NUM_SYNCS, > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > index 375bcaea937..41f6e054615 100644 > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > @@ -2595,6 +2595,9 @@ void si_all_resident_buffers_begin_new_cs(struct > si_context *sctx) >RADEON_USAGE_READWRITE, >false, false); > } > + > + sctx->b.num_resident_handles += num_resident_tex_handles + > + num_resident_img_handles; > } > > /* INIT/DEINIT/UPLOAD */ > -- > 2.13.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/24] st/mesa: sink code needed for apply_texture_swizzle_to_border_color
On Wed, Jun 14, 2017 at 7:27 PM, Marek Olšákwrote: > On Tue, Jun 13, 2017 at 8:10 AM, Timothy Arceri wrote: >> >> >> On 13/06/17 04:18, Marek Olšák wrote: >>> >>> From: Marek Olšák >>> >>> AMD SI-VI use this. GFX9 doesn't. We can stop doing this for SI-VI since >>> border color swizzling is broken there anyway. The only other user of this >>> code is nouveau. >> >> >> Maybe move this comment into the code as a TODO? I was a little confused at >> first as I thought this commit was meant to make the change. With that: > > I don't understand. What are you confused about? The commit message talks about radeonsi, but this patch is for st/mesa. st/mesa doesn't care which drivers use the codepath. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 12/24] cso: don't track the number of sampler states bound
On Wed, Jun 14, 2017 at 8:03 PM, Marek Olšákwrote: > On Wed, Jun 14, 2017 at 9:23 AM, Nicolai Hähnle wrote: >> On 12.06.2017 20:18, Marek Olšák wrote: >>> >>> From: Marek Olšák >>> >>> This removes 2 loops from hot codepaths and adds 1 loop to a rare codepath >>> (restore_sampler_states), and makes sanitize_hash() slightly worse. >>> >>> Sampler states, when bound, are not unbound for draw calls that don't need >>> them. That's OK, because bound sampler states don't add any overhead. >> >> >> Is this really always true? They might show up in texture decompression >> checks. > > Sampler states are not sampler views. They don't have any resources > attached to them. Also, radeonsi never unbinds sampler states, so this change has no effect on radeonsi behavior. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 12/24] cso: don't track the number of sampler states bound
On Wed, Jun 14, 2017 at 9:23 AM, Nicolai Hähnlewrote: > On 12.06.2017 20:18, Marek Olšák wrote: >> >> From: Marek Olšák >> >> This removes 2 loops from hot codepaths and adds 1 loop to a rare codepath >> (restore_sampler_states), and makes sanitize_hash() slightly worse. >> >> Sampler states, when bound, are not unbound for draw calls that don't need >> them. That's OK, because bound sampler states don't add any overhead. > > > Is this really always true? They might show up in texture decompression > checks. Sampler states are not sampler views. They don't have any resources attached to them. Marek > > Cheers, > Nicolai > > > >> >> This results in lower CPU overhead in most cases. >> --- >> src/gallium/auxiliary/cso_cache/cso_context.c | 59 >> +++ >> 1 file changed, 23 insertions(+), 36 deletions(-) >> >> diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c >> b/src/gallium/auxiliary/cso_cache/cso_context.c >> index 5558385..4947b8e 100644 >> --- a/src/gallium/auxiliary/cso_cache/cso_context.c >> +++ b/src/gallium/auxiliary/cso_cache/cso_context.c >> @@ -50,21 +50,20 @@ >> #include "cso_context.h" >> /** >>* Per-shader sampler information. >>*/ >> struct sampler_info >> { >> struct cso_sampler *cso_samplers[PIPE_MAX_SAMPLERS]; >> void *samplers[PIPE_MAX_SAMPLERS]; >> - unsigned nr_samplers; >> }; >> struct cso_context { >> struct pipe_context *pipe; >> struct cso_cache *cache; >> struct u_vbuf *vbuf; >>boolean has_geometry_shader; >> @@ -76,20 +75,25 @@ struct cso_context { >>struct pipe_sampler_view >> *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS]; >> unsigned nr_fragment_views; >>struct pipe_sampler_view >> *fragment_views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS]; >> unsigned nr_fragment_views_saved; >>struct sampler_info fragment_samplers_saved; >> struct sampler_info samplers[PIPE_SHADER_TYPES]; >> + /* Temporary number until cso_single_sampler_done is called. >> +* It tracks the highest sampler seen in cso_single_sampler. >> +*/ >> + int max_sampler_seen; >> + >> struct pipe_vertex_buffer aux_vertex_buffer_current; >> struct pipe_vertex_buffer aux_vertex_buffer_saved; >> unsigned aux_vertex_buffer_index; >>struct pipe_constant_buffer >> aux_constbuf_current[PIPE_SHADER_TYPES]; >> struct pipe_constant_buffer aux_constbuf_saved[PIPE_SHADER_TYPES]; >>struct pipe_image_view fragment_image0_current; >> struct pipe_image_view fragment_image0_saved; >> @@ -233,21 +237,21 @@ sanitize_hash(struct cso_hash *hash, enum >> cso_cache_type type, >> if (type == CSO_SAMPLER) { >> int i, j; >> samplers_to_restore = MALLOC(PIPE_SHADER_TYPES * >> PIPE_MAX_SAMPLERS * >> sizeof(*samplers_to_restore)); >> /* Temporarily remove currently bound sampler states from the >> hash >> * table, to prevent them from being deleted >> */ >> for (i = 0; i < PIPE_SHADER_TYPES; i++) { >> - for (j = 0; j < ctx->samplers[i].nr_samplers; j++) { >> + for (j = 0; j < PIPE_MAX_SAMPLERS; j++) { >> struct cso_sampler *sampler = >> ctx->samplers[i].cso_samplers[j]; >> if (sampler && cso_hash_take(hash, sampler->hash_key)) >> samplers_to_restore[to_restore++] = sampler; >>} >> } >> } >>iter = cso_hash_first_node(hash); >> while (to_remove) { >> @@ -327,20 +331,21 @@ cso_create_context(struct pipe_context *pipe, >> unsigned u_vbuf_flags) >> PIPE_SHADER_CAP_SUPPORTED_IRS); >> if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) { >>ctx->has_compute_shader = TRUE; >> } >> } >> if (pipe->screen->get_param(pipe->screen, >> PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) >> { >> ctx->has_streamout = TRUE; >> } >> + ctx->max_sampler_seen = -1; >> return ctx; >> out: >> cso_destroy_context( ctx ); >> return NULL; >> } >> /** >>* Free the CSO context. >>*/ >> @@ -1223,116 +1228,98 @@ cso_single_sampler(struct cso_context *ctx, enum >> pipe_shader_type shader_stage, >> FREE(cso); >> return PIPE_ERROR_OUT_OF_MEMORY; >>} >> } >> else { >>cso = cso_hash_iter_data(iter); >> } >> ctx->samplers[shader_stage].cso_samplers[idx] = cso; >> ctx->samplers[shader_stage].samplers[idx] = cso->data; >> - } else { >> - ctx->samplers[shader_stage].cso_samplers[idx] = NULL; >> - ctx->samplers[shader_stage].samplers[idx] = NULL; >> + ctx->max_sampler_seen = MAX2(ctx->max_sampler_seen, (int)idx); >> } >>return PIPE_OK; >> } >> /** >>* Send staged sampler state to the
Re: [Mesa-dev] [PATCH 3/3] i965/gen4: Add support for single layer in alignment workaround
On 06/09/2017 07:04 AM, Topi Pohjolainen wrote: > On gen < 6 one doesn't have level or layer specifiers available > for render and depth targets. In order to support rendering to > specific level/layer, driver needs to manually offset the surface > to the desired slice. > There are, however, alignment restrictions to respect as well and > in come cases the only option is to use temporary single slice > surface which driver copies after rendering to the full miptree. > > Current alignment workaround introduces new texture images which > are added to the parent texture object. Texture validation later > on copies the additional levels back to the surface that contains > the full mipmap. > This only works for non-arrayed surfaces and driver currently > creates new arrayed images in vain - individual layers within the > newly created are still unaligned the same as before. > > This patch drops this mechanism and instead attaches single > temporary slice into the render buffer. This gets immediately > copied back to the mipmapped and/or arrayed surface just after > the render is done. > > Sitting on top of earlier series cleaning up the depth buffer > state, this patch additionally fixes the following piglit tests: > > ext_texture_array.copyteximage 2d_array.g45m64 > ext_texture_array.copyteximage 1d_array.g45m64 > arb_framebuffer_object.fbo-blit-stretch.g33m64 I wish I had noticed this before I had Mark open a bug for it: https://bugs.freedesktop.org/show_bug.cgi?id=101414 > ext_framebuffer_object.fbo-cubemap.g965m64 > arb_framebuffer_object.fbo-generatemipmap-cubemap.g965m64 > arb_texture_cube_map.copyteximage cube.g965m64 > ext_texture_array.copyteximage 1d_array.g965m64 > ext_texture_array.copyteximage 2d_array.g965m64 > ext_texture_array.fbo-array.g965m64 > ext_texture_array.gen-mipmap.g965m64 > ext_texture_array.fbo-generatemipmap-array.g965m64 > arb_pixel_buffer_object.texsubimage array pbo.g965m64 > ext_texture_array.copyteximage 2d_array.ilkm64 > ext_texture_array.copyteximage 1d_array.ilkm64 > arb_texture_cube_map.copyteximage cube.ilkm64 > > CC: Kenneth Graunke> CC: Jason Ekstrand > CC: Ian Romanick > Signed-off-by: Topi Pohjolainen > --- > src/mesa/drivers/dri/i965/brw_draw.c | 51 > > src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 3 +- > src/mesa/drivers/dri/i965/intel_fbo.c| 19 + > src/mesa/drivers/dri/i965/intel_fbo.h| 24 +++ > src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 2 +- > src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 7 > 7 files changed, 99 insertions(+), 11 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > b/src/mesa/drivers/dri/i965/brw_draw.c > index 611cb86..cb441c3 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw.c > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > @@ -396,6 +396,56 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context > *brw) > } > > static void > +intel_renderbuffer_move_temp_back(struct brw_context *brw, > + struct intel_renderbuffer *irb) > +{ > + if (irb->align_wa_mt == NULL) > + return; > + > + brw_render_cache_set_check_flush(brw, irb->align_wa_mt->bo); > + > + intel_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0, > +irb->mt, > +irb->Base.Base.TexImage->Level, irb->mt_layer); > + > + intel_miptree_reference(>align_wa_mt, NULL); > + > + /* Finally restore the x,y to correspond to full miptree. */ > + intel_renderbuffer_set_draw_offset(irb); > + > + /* Make sure render surface state gets re-emitted with updated miptree. */ > + brw->NewGLState |= _NEW_BUFFERS; > +} > + > +static void > +brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw) > +{ > + struct gl_context *ctx = >ctx; > + struct gl_framebuffer *fb = ctx->DrawBuffer; > + > + struct intel_renderbuffer *depth_irb = > + intel_get_renderbuffer(fb, BUFFER_DEPTH); > + struct intel_renderbuffer *stencil_irb = > + intel_get_renderbuffer(fb, BUFFER_STENCIL); > + > + if (depth_irb && depth_irb->align_wa_mt) > + intel_renderbuffer_move_temp_back(brw, depth_irb); > + > + if (stencil_irb && stencil_irb->align_wa_mt) > + intel_renderbuffer_move_temp_back(brw, stencil_irb); > + > + for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { > + struct intel_renderbuffer *irb = > + intel_renderbuffer(fb->_ColorDrawBuffers[i]); > + > + if (!irb || irb->align_wa_mt == NULL) > + continue; > + > + intel_renderbuffer_move_temp_back(brw, irb); > + } > +} > + > +static void > brw_predraw_set_aux_buffers(struct brw_context *brw) > { > if (brw->gen < 9) > @@ -626,6 +676,7 @@
Re: [Mesa-dev] [PATCH 2/6] i965: When gl_PointSize is unwritten, default to 1.0 on Gen4-5.
Reviewed-by: Rafael AntognolliOn Wed, May 10, 2017 at 11:47:26AM -0700, Kenneth Graunke wrote: > Modern GL specifications say that the point size should be 1.0 when > gl_PointSize is unwritten and the last enabled stage is a geometry > or tessellation shader. If it's a vertex shader, though, both the > GL specs and ES 3.0 spec say that it's undefined - so since Gen4-5 > only support vertex shaders, there's no actual requirement to do this. > > Since there is a cost associated (an extra dirty bit, which may cause > SF_STATE to be emitted more often), it may not be a good idea. > > The real benefit is that it makes all generations behave identically. > And that seems somewhat nice... > --- > src/mesa/drivers/dri/i965/brw_sf_state.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c > b/src/mesa/drivers/dri/i965/brw_sf_state.c > index ff6b5ebf79b..d5e586d1bf3 100644 > --- a/src/mesa/drivers/dri/i965/brw_sf_state.c > +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c > @@ -263,9 +263,8 @@ static void upload_sf_unit( struct brw_context *brw ) > point_sz = CLAMP(point_sz, 0.125f, 255.875f); > sf->sf7.point_size = U_FIXED(point_sz, 3); > > - /* _NEW_PROGRAM | _NEW_POINT */ > - sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled || > - ctx->Point._Attenuated); > + /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */ > + sf->sf7.use_point_size_state = use_state_point_size(brw); > sf->sf7.aa_line_distance_mode = brw->is_g4x || brw->gen == 5; > > /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: > @@ -317,6 +316,7 @@ const struct brw_tracked_state brw_sf_unit = { > BRW_NEW_PROGRAM_CACHE | > BRW_NEW_SF_PROG_DATA | > BRW_NEW_SF_VP | > + BRW_NEW_VUE_MAP_GEOM_OUT | > BRW_NEW_URB_FENCE, > }, > .emit = upload_sf_unit, > -- > 2.12.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 10/24] st/mesa: optimize sampler state translation code
On Wed, Jun 14, 2017 at 9:15 AM, Nicolai Hähnlewrote: > On 12.06.2017 20:18, Marek Olšák wrote: >> >> From: Marek Olšák >> >> --- >> src/mesa/state_tracker/st_atom_sampler.c | 79 >> +--- >> 1 file changed, 31 insertions(+), 48 deletions(-) >> >> diff --git a/src/mesa/state_tracker/st_atom_sampler.c >> b/src/mesa/state_tracker/st_atom_sampler.c >> index 9695069..ea231f3 100644 >> --- a/src/mesa/state_tracker/st_atom_sampler.c >> +++ b/src/mesa/state_tracker/st_atom_sampler.c >> @@ -51,85 +51,68 @@ >> #include "util/u_format.h" >> /** >>* Convert GLenum texcoord wrap tokens to pipe tokens. >>*/ >> static GLuint >> gl_wrap_xlate(GLenum wrap) >> { >> - switch (wrap) { >> - case GL_REPEAT: >> - return PIPE_TEX_WRAP_REPEAT; >> - case GL_CLAMP: >> - return PIPE_TEX_WRAP_CLAMP; >> - case GL_CLAMP_TO_EDGE: >> - return PIPE_TEX_WRAP_CLAMP_TO_EDGE; >> - case GL_CLAMP_TO_BORDER: >> - return PIPE_TEX_WRAP_CLAMP_TO_BORDER; >> - case GL_MIRRORED_REPEAT: >> - return PIPE_TEX_WRAP_MIRROR_REPEAT; >> - case GL_MIRROR_CLAMP_EXT: >> - return PIPE_TEX_WRAP_MIRROR_CLAMP; >> - case GL_MIRROR_CLAMP_TO_EDGE_EXT: >> - return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; >> - case GL_MIRROR_CLAMP_TO_BORDER_EXT: >> - return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER; >> - default: >> - assert(0); >> - return 0; >> - } >> + /* Take advantage of how the enums are defined. */ >> + static const unsigned table[32] = { >> + PIPE_TEX_WRAP_CLAMP, >> + PIPE_TEX_WRAP_REPEAT, >> + PIPE_TEX_WRAP_MIRROR_CLAMP, >> + PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE, >> + 0, >> + 0, >> + 0, >> + 0, >> + 0, >> + 0, >> + 0, >> + 0, >> + 0, >> + PIPE_TEX_WRAP_CLAMP_TO_BORDER, >> + 0, >> + PIPE_TEX_WRAP_CLAMP_TO_EDGE, >> + PIPE_TEX_WRAP_MIRROR_REPEAT, >> + 0, >> + PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER, > > > Please use designated initializers, like > > [GL_REPEAT & 0x1f] = PIPE_TEX_WRAP_CLAMP > > etc. Do designated initializers work with MSVC? Marek > > With this and Timothy's comment on patch 9 fixed, patches 1-10 are > > Reviewed-by: Nicolai Hähnle > > > >> + }; >> + >> + return table[wrap & 0x1f]; >> } >> static GLuint >> gl_filter_to_mip_filter(GLenum filter) >> { >> - switch (filter) { >> - case GL_NEAREST: >> - case GL_LINEAR: >> + /* Take advantage of how the enums are defined. */ >> + if (filter <= GL_LINEAR) >> return PIPE_TEX_MIPFILTER_NONE; >> - >> - case GL_NEAREST_MIPMAP_NEAREST: >> - case GL_LINEAR_MIPMAP_NEAREST: >> + if (filter <= GL_LINEAR_MIPMAP_NEAREST) >> return PIPE_TEX_MIPFILTER_NEAREST; >> - case GL_NEAREST_MIPMAP_LINEAR: >> - case GL_LINEAR_MIPMAP_LINEAR: >> - return PIPE_TEX_MIPFILTER_LINEAR; >> - >> - default: >> - assert(0); >> - return PIPE_TEX_MIPFILTER_NONE; >> - } >> + return PIPE_TEX_MIPFILTER_LINEAR; >> } >> static GLuint >> gl_filter_to_img_filter(GLenum filter) >> { >> - switch (filter) { >> - case GL_NEAREST: >> - case GL_NEAREST_MIPMAP_NEAREST: >> - case GL_NEAREST_MIPMAP_LINEAR: >> - return PIPE_TEX_FILTER_NEAREST; >> - >> - case GL_LINEAR: >> - case GL_LINEAR_MIPMAP_NEAREST: >> - case GL_LINEAR_MIPMAP_LINEAR: >> + /* Take advantage of how the enums are defined. */ >> + if (filter & 1) >> return PIPE_TEX_FILTER_LINEAR; >> - default: >> - assert(0); >> - return PIPE_TEX_FILTER_NEAREST; >> - } >> + return PIPE_TEX_FILTER_NEAREST; >> } >> /** >>* Convert a gl_sampler_object to a pipe_sampler_state object. >>*/ >> void >> st_convert_sampler(const struct st_context *st, >> const struct gl_texture_object *texobj, >> const struct gl_sampler_object *msamp, >> > > > -- > Lerne, wie die Welt wirklich ist, > Aber vergiss niemals, wie sie sein sollte. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/24] st/mesa: sink code needed for apply_texture_swizzle_to_border_color
On Tue, Jun 13, 2017 at 8:10 AM, Timothy Arceriwrote: > > > On 13/06/17 04:18, Marek Olšák wrote: >> >> From: Marek Olšák >> >> AMD SI-VI use this. GFX9 doesn't. We can stop doing this for SI-VI since >> border color swizzling is broken there anyway. The only other user of this >> code is nouveau. > > > Maybe move this comment into the code as a TODO? I was a little confused at > first as I thought this commit was meant to make the change. With that: I don't understand. What are you confused about? Marek > > Reviewed-by: Timothy Arceri > > >> --- >> src/mesa/state_tracker/st_atom_sampler.c | 61 >> +--- >> 1 file changed, 33 insertions(+), 28 deletions(-) >> >> diff --git a/src/mesa/state_tracker/st_atom_sampler.c >> b/src/mesa/state_tracker/st_atom_sampler.c >> index 9e5d940..9695069 100644 >> --- a/src/mesa/state_tracker/st_atom_sampler.c >> +++ b/src/mesa/state_tracker/st_atom_sampler.c >> @@ -170,51 +170,56 @@ st_convert_sampler(const struct st_context *st, >> sampler->max_lod = sampler->min_lod; >> sampler->min_lod = tmp; >> assert(sampler->min_lod <= sampler->max_lod); >> } >>/* For non-black borders... */ >> if (msamp->BorderColor.ui[0] || >> msamp->BorderColor.ui[1] || >> msamp->BorderColor.ui[2] || >> msamp->BorderColor.ui[3]) { >> - const struct st_texture_object *stobj = >> st_texture_object_const(texobj); >> const GLboolean is_integer = texobj->_IsIntegerFormat; >> - const struct pipe_sampler_view *sv = NULL; >> - union pipe_color_union border_color; >> - GLuint i; >> - >> - /* Just search for the first used view. We can do this because the >> - swizzle is per-texture, not per context. */ >> - /* XXX: clean that up to not use the sampler view at all */ >> - for (i = 0; i < stobj->num_sampler_views; ++i) { >> - if (stobj->sampler_views[i]) { >> -sv = stobj->sampler_views[i]; >> -break; >> - } >> - } >> - if (st->apply_texture_swizzle_to_border_color && sv) { >> - const unsigned char swz[4] = >> - { >> -sv->swizzle_r, >> -sv->swizzle_g, >> -sv->swizzle_b, >> -sv->swizzle_a, >> - }; >> - >> - st_translate_color(>BorderColor, >> -_color, >> -texBaseFormat, is_integer); >> + if (st->apply_texture_swizzle_to_border_color) { >> + const struct st_texture_object *stobj = >> st_texture_object_const(texobj); >> + const struct pipe_sampler_view *sv = NULL; >> + >> + /* Just search for the first used view. We can do this because >> the >> +swizzle is per-texture, not per context. */ >> + /* XXX: clean that up to not use the sampler view at all */ >> + for (unsigned i = 0; i < stobj->num_sampler_views; ++i) { >> +if (stobj->sampler_views[i]) { >> + sv = stobj->sampler_views[i]; >> + break; >> +} >> + } >> - util_format_apply_color_swizzle(>border_color, >> - _color, swz, is_integer); >> + if (sv) { >> +union pipe_color_union tmp; >> +const unsigned char swz[4] = >> +{ >> + sv->swizzle_r, >> + sv->swizzle_g, >> + sv->swizzle_b, >> + sv->swizzle_a, >> +}; >> + >> +st_translate_color(>BorderColor, , >> + texBaseFormat, is_integer); >> + >> +util_format_apply_color_swizzle(>border_color, >> +, swz, is_integer); >> + } else { >> +st_translate_color(>BorderColor, >> + >border_color, >> + texBaseFormat, is_integer); >> + } >> } else { >>st_translate_color(>BorderColor, >> >border_color, >> texBaseFormat, is_integer); >> } >> } >>sampler->max_anisotropy = (msamp->MaxAnisotropy == 1.0 ? >> 0 : (GLuint) msamp->MaxAnisotropy); >> ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: gen4_blorp_exec.h to the sources list
On Wed, 2017-06-14 at 17:02 +0100, Emil Velikov wrote: > From: Emil Velikov> > We tend to use the sources, as opposed to EXTRA_DIST to include the > headers. > Reviewed-by: Juan A. Suarez Romero > Cc: Juan A. Suarez Romero > Signed-off-by: Emil Velikov > --- > src/mesa/drivers/dri/i965/Makefile.am | 1 - > src/mesa/drivers/dri/i965/Makefile.sources | 1 + > 2 files changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/Makefile.am > b/src/mesa/drivers/dri/i965/Makefile.am > index 9879bf79a57..c29cc594c8e 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.am > +++ b/src/mesa/drivers/dri/i965/Makefile.am > @@ -102,7 +102,6 @@ BUILT_SOURCES = $(i965_oa_GENERATED_FILES) > CLEANFILES = $(BUILT_SOURCES) > > EXTRA_DIST = \ > - gen4_blorp_exec.h \ > brw_oa_hsw.xml \ > brw_oa.py > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index 1e656eb65a4..041cd079884 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -67,6 +67,7 @@ i965_FILES = \ > brw_wm.h \ > brw_wm_state.c \ > brw_wm_surface_state.c \ > + gen4_blorp_exec.h \ > gen6_clip_state.c \ > gen6_constant_state.c \ > gen6_depth_state.c \ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev