Mesa (master): radv: don't lower indirects until after opts have run
Module: Mesa Branch: master Commit: 9a243eccae618e85aa7af762a4c40ecd8a2e4882 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9a243eccae618e85aa7af762a4c40ecd8a2e4882 Author: Timothy Arceri Date: Thu Mar 8 16:20:48 2018 +1100 radv: don't lower indirects until after opts have run Noticed while passing by. Not sure if it impacts anything, but likely to impact GFX9 more than anything else since we lower inputs, outputs and locals there. Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_shader.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index ac577c36e9..c6935805c7 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -275,7 +275,6 @@ radv_shader_compile_to_nir(struct radv_device *device, nir_lower_var_copies(nir); nir_lower_global_vars_to_local(nir); nir_remove_dead_variables(nir, nir_var_local); - ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class); nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) { .subgroup_size = 64, .ballot_bit_size = 64, @@ -287,6 +286,14 @@ radv_shader_compile_to_nir(struct radv_device *device, radv_optimize_nir(nir); + /* Indirect lowering must be called after the radv_optimize_nir() loop +* has been called at least once. Otherwise indirect lowering can +* bloat the instruction count of the loop and cause it to be +* considered too large for unrolling. +*/ + ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class); + radv_optimize_nir(nir); + return nir; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): st/nir: fix atomic lowering for gallium drivers
Module: Mesa Branch: master Commit: dfe2f198550b262186e2882d7e573f1f3759deb7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dfe2f198550b262186e2882d7e573f1f3759deb7 Author: Timothy Arceri Date: Mon Mar 19 22:23:55 2018 +1100 st/nir: fix atomic lowering for gallium drivers i965 and gallium handle the atomic buffer index differently. It was just by luck that the single piglit test for this was passing. For gallium we use the atomic binding so that we match the handling in st_bind_atomics(). On radeonsi this fixes the CTS test: KHR-GL43.shader_storage_buffer_object.advanced-write-fragment It also fixes tressfx hair rendering in Tomb Raider. Reviewed-by: Marek Olšák --- src/compiler/nir/nir.h| 3 ++- src/compiler/nir/nir_lower_atomics.c | 15 ++- src/mesa/drivers/dri/i965/brw_link.cpp| 2 +- src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d7baabd6f6..0d207d0ea5 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2710,7 +2710,8 @@ typedef struct nir_lower_bitmap_options { void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); bool nir_lower_atomics(nir_shader *shader, - const struct gl_shader_program *shader_program); + const struct gl_shader_program *shader_program, + bool use_binding_as_idx); bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset); bool nir_lower_to_source_mods(nir_shader *shader); diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c index bdab4b8737..6b046bc426 100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@ -38,7 +38,7 @@ static bool lower_instr(nir_intrinsic_instr *instr, const struct gl_shader_program *shader_program, -nir_shader *shader) +nir_shader *shader, bool use_binding_as_idx) { nir_intrinsic_op op; switch (instr->intrinsic) { @@ -98,9 +98,12 @@ lower_instr(nir_intrinsic_instr *instr, void *mem_ctx = ralloc_parent(instr); unsigned uniform_loc = instr->variables[0]->var->data.location; + unsigned idx = use_binding_as_idx ? + instr->variables[0]->var->data.binding : + shader_program->data->UniformStorage[uniform_loc].opaque[shader->info.stage].index; + nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); - nir_intrinsic_set_base(new_instr, - shader_program->data->UniformStorage[uniform_loc].opaque[shader->info.stage].index); + nir_intrinsic_set_base(new_instr, idx); nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1, 32); @@ -174,7 +177,8 @@ lower_instr(nir_intrinsic_instr *instr, bool nir_lower_atomics(nir_shader *shader, - const struct gl_shader_program *shader_program) + const struct gl_shader_program *shader_program, + bool use_binding_as_idx) { bool progress = false; @@ -184,7 +188,8 @@ nir_lower_atomics(nir_shader *shader, nir_foreach_instr_safe(instr, block) { if (instr->type == nir_instr_type_intrinsic) progress |= lower_instr(nir_instr_as_intrinsic(instr), - shader_program, shader); + shader_program, shader, + use_binding_as_idx); } } diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index b08b56a935..274a738cbb 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -299,7 +299,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) brw_shader_gather_info(prog->nir, prog); NIR_PASS_V(prog->nir, nir_lower_samplers, shProg); - NIR_PASS_V(prog->nir, nir_lower_atomics, shProg); + NIR_PASS_V(prog->nir, nir_lower_atomics, shProg, false); NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo, prog->nir->info.num_abos); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 2eb2ece5b1..9bb99f3061 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -414,7 +414,7 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog, st_set_prog_affected_state_flags(prog); NIR_PASS_V(nir, st_nir_lower_builtin); - NIR_PASS_V(nir, nir_lower_atomics, shader_program); + NIR_PASS_V(nir, nir_lower_atomics, shader_program, true); if (st->ctx->_Shader->Flags & GLSL_DUMP) { _mesa_log("\n"); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https:
Mesa (master): mesa: rework ParameterList to allow packing
Module: Mesa Branch: master Commit: edded1237607348683f492db313e823dc2e380c3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=edded1237607348683f492db313e823dc2e380c3 Author: Timothy Arceri Date: Fri Jun 16 10:17:56 2017 +1000 mesa: rework ParameterList to allow packing Currently everything is padded to 4 components. Making the list more flexible will allow us to do uniform packing. V2 (suggestions from Nicolai): - always pass existing calls to _mesa_add_parameter() true for padd_and_align - fix bindless param value offsets - remove left over wip logic from pad and align code - zero out param value padding - whitespace fix Reviewed-by: Marek Olšák --- src/compiler/glsl/serialize.cpp | 14 +-- src/mesa/drivers/dri/i915/i915_fragprog.c | 9 +++-- src/mesa/drivers/dri/i965/gen6_constant_state.c | 3 +- src/mesa/drivers/dri/r200/r200_vertprog.c | 10 +++-- src/mesa/main/uniform_query.cpp | 14 --- src/mesa/program/ir_to_mesa.cpp | 9 +++-- src/mesa/program/prog_execute.c | 6 ++- src/mesa/program/prog_opt_constant_fold.c | 3 +- src/mesa/program/prog_parameter.c | 52 ++--- src/mesa/program/prog_parameter.h | 9 +++-- src/mesa/program/prog_parameter_layout.c| 21 +++--- src/mesa/program/prog_print.c | 4 +- src/mesa/program/prog_statevars.c | 3 +- src/mesa/program/prog_to_nir.c | 3 +- src/mesa/program/program_parse.y| 2 +- src/mesa/state_tracker/st_atifs_to_tgsi.c | 6 ++- src/mesa/state_tracker/st_atom_constbuf.c | 5 ++- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 +- src/mesa/state_tracker/st_mesa_to_tgsi.c| 8 ++-- 19 files changed, 125 insertions(+), 60 deletions(-) diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp index 1fdbaa990f..727822633d 100644 --- a/src/compiler/glsl/serialize.cpp +++ b/src/compiler/glsl/serialize.cpp @@ -950,6 +950,7 @@ write_shader_parameters(struct blob *metadata, struct gl_program_parameter_list *params) { blob_write_uint32(metadata, params->NumParameters); + blob_write_uint32(metadata, params->NumParameterValues); uint32_t i = 0; while (i < params->NumParameters) { @@ -966,7 +967,10 @@ write_shader_parameters(struct blob *metadata, } blob_write_bytes(metadata, params->ParameterValues, -sizeof(gl_constant_value) * 4 * params->NumParameters); +sizeof(gl_constant_value) * params->NumParameterValues); + + blob_write_bytes(metadata, params->ParameterValueOffset, +sizeof(uint32_t) * params->NumParameters); blob_write_uint32(metadata, params->StateFlags); } @@ -978,6 +982,7 @@ read_shader_parameters(struct blob_reader *metadata, gl_state_index16 state_indexes[STATE_LENGTH]; uint32_t i = 0; uint32_t num_parameters = blob_read_uint32(metadata); + uint32_t num_parameters_values = blob_read_uint32(metadata); _mesa_reserve_parameter_storage(params, num_parameters); while (i < num_parameters) { @@ -989,13 +994,16 @@ read_shader_parameters(struct blob_reader *metadata, sizeof(state_indexes)); _mesa_add_parameter(params, type, name, size, data_type, - NULL, state_indexes); + NULL, state_indexes, false); i++; } blob_copy_bytes(metadata, (uint8_t *) params->ParameterValues, -sizeof(gl_constant_value) * 4 * params->NumParameters); + sizeof(gl_constant_value) * num_parameters_values); + + blob_copy_bytes(metadata, (uint8_t *) params->ParameterValueOffset, + sizeof(uint32_t) * num_parameters); params->StateFlags = blob_read_uint32(metadata); } diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 2e04319512..6493ab99b1 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -184,11 +184,12 @@ src_vector(struct i915_fragment_program *p, */ case PROGRAM_CONSTANT: case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - src = i915_emit_param4fv(p, -&program->Parameters->ParameterValues[source->Index][0].f); + case PROGRAM_UNIFORM: { + struct gl_program_parameter_list *params = program->Parameters; + unsigned offset = params->ParameterValueOffset[source->Index]; + src = i915_emit_param4fv(p, ¶ms->ParameterValues[offset].f); break; - + } default: i915_program_error(p, "Bad source->File: %d", source->File); return 0; diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c index afcd2bebd7..919aee49ad 100644 --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c +++ b/src/
Mesa (master): mesa: add packing support for setting uniform handles
Module: Mesa Branch: master Commit: a2198d4fdb7d93568ba0792a326971abb6d6b3a9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2198d4fdb7d93568ba0792a326971abb6d6b3a9 Author: Timothy Arceri Date: Tue Jun 20 10:31:32 2017 +1000 mesa: add packing support for setting uniform handles Reviewed-by: Nicolai Hähnle --- src/mesa/main/uniform_query.cpp | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 14ecfdca2f..f901fcb3e5 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -1523,10 +1523,20 @@ _mesa_uniform_handle(GLint location, GLsizei count, const GLvoid *values, /* Store the data in the "actual type" backing storage for the uniform. */ - memcpy(&uni->storage[size_mul * components * offset], values, - sizeof(uni->storage[0]) * components * count * size_mul); + gl_constant_value *storage; + if (ctx->Const.PackedDriverUniformStorage) { + for (unsigned s = 0; s < uni->num_driver_storage; s++) { + storage = (gl_constant_value *) +uni->driver_storage[s].data + (size_mul * offset * components); + memcpy(storage, values, +sizeof(uni->storage[0]) * components * count * size_mul); + } + } else { + memcpy(&uni->storage[size_mul * components * offset], values, + sizeof(uni->storage[0]) * components * count * size_mul); - _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); + _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); + } if (uni->type->is_sampler()) { /* Mark this bindless sampler as not bound to a texture unit because ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: add _mesa_add_sized_state_reference() helper
Module: Mesa Branch: master Commit: 57ebab64c0dd1abd646f4f274d01f19c8e0e7293 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=57ebab64c0dd1abd646f4f274d01f19c8e0e7293 Author: Timothy Arceri Date: Tue Mar 13 20:47:48 2018 +1100 mesa: add _mesa_add_sized_state_reference() helper This will be used for adding packed builtin uniforms. Reviewed-by: Marek Olšák --- src/mesa/program/prog_parameter.c | 36 ++-- src/mesa/program/prog_parameter.h | 5 + 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index af9bb37cd5..88821cfba1 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -362,21 +362,11 @@ _mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, return pos; } - -/** - * Add a new state reference to the parameter list. - * This will be used when the program contains something like this: - *PARAM ambient = state.material.front.ambient; - * - * \param paramList the parameter list - * \param stateTokens an array of 5 (STATE_LENGTH) state tokens - * \return index of the new parameter. - */ GLint -_mesa_add_state_reference(struct gl_program_parameter_list *paramList, - const gl_state_index16 stateTokens[STATE_LENGTH]) +_mesa_add_sized_state_reference(struct gl_program_parameter_list *paramList, +const gl_state_index16 stateTokens[STATE_LENGTH], +const unsigned size, bool pad_and_align) { - const GLuint size = 4; /* XXX fix */ char *name; GLint index; @@ -391,7 +381,8 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, name = _mesa_program_state_string(stateTokens); index = _mesa_add_parameter(paramList, PROGRAM_STATE_VAR, name, - size, GL_NONE, NULL, stateTokens, true); + size, GL_NONE, NULL, stateTokens, + pad_and_align); paramList->StateFlags |= _mesa_program_state_flags(stateTokens); /* free name string here since we duplicated it in add_parameter() */ @@ -399,3 +390,20 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, return index; } + + +/** + * Add a new state reference to the parameter list. + * This will be used when the program contains something like this: + *PARAM ambient = state.material.front.ambient; + * + * \param paramList the parameter list + * \param stateTokens an array of 5 (STATE_LENGTH) state tokens + * \return index of the new parameter. + */ +GLint +_mesa_add_state_reference(struct gl_program_parameter_list *paramList, + const gl_state_index16 stateTokens[STATE_LENGTH]) +{ + return _mesa_add_sized_state_reference(paramList, stateTokens, 4, true); +} diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index 83eb0c5613..8e36a1c590 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -128,6 +128,11 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, } extern GLint +_mesa_add_sized_state_reference(struct gl_program_parameter_list *paramList, +const gl_state_index16 stateTokens[STATE_LENGTH], +const unsigned size, bool pad_and_align); + +extern GLint _mesa_add_state_reference(struct gl_program_parameter_list *paramList, const gl_state_index16 stateTokens[]); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): st: add st_glsl_type_dword_size() helper
Module: Mesa Branch: master Commit: a80cf442d939212da40951a0a57a0978de6f39a6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a80cf442d939212da40951a0a57a0978de6f39a6 Author: Timothy Arceri Date: Tue Mar 13 12:34:50 2018 +1100 st: add st_glsl_type_dword_size() helper This will be used to support uniform packing. Reviewed-by: Marek Olšák --- src/mesa/state_tracker/st_glsl_types.cpp | 43 src/mesa/state_tracker/st_glsl_types.h | 1 + 2 files changed, 44 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_types.cpp b/src/mesa/state_tracker/st_glsl_types.cpp index d4d2139d9d..ef7b7fa777 100644 --- a/src/mesa/state_tracker/st_glsl_types.cpp +++ b/src/mesa/state_tracker/st_glsl_types.cpp @@ -108,3 +108,46 @@ st_glsl_storage_type_size(const struct glsl_type *type, bool is_bindless) } return 0; } + +int +st_glsl_type_dword_size(const struct glsl_type *type) +{ + unsigned int size, i; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return type->components(); + case GLSL_TYPE_UINT16: + case GLSL_TYPE_INT16: + case GLSL_TYPE_FLOAT16: + return DIV_ROUND_UP(type->components(), 2); + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_UINT64: + case GLSL_TYPE_INT64: + return type->components() * 2; + case GLSL_TYPE_ARRAY: + return st_glsl_type_dword_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += st_glsl_type_dword_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_ATOMIC_UINT: + return 0; + case GLSL_TYPE_SUBROUTINE: + return 1; + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: + unreachable("not reached"); + } + + return 0; +} diff --git a/src/mesa/state_tracker/st_glsl_types.h b/src/mesa/state_tracker/st_glsl_types.h index 915816d1fa..16b31b684f 100644 --- a/src/mesa/state_tracker/st_glsl_types.h +++ b/src/mesa/state_tracker/st_glsl_types.h @@ -36,6 +36,7 @@ extern "C" { int st_glsl_storage_type_size(const struct glsl_type *type, bool is_bindless); +int st_glsl_type_dword_size(const struct glsl_type *type); #ifdef __cplusplus } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: allow for uniform packing when adding uniforms to param list
Module: Mesa Branch: master Commit: 40711a7a6063eb6b5069c5f7849fe3cefb70526a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=40711a7a6063eb6b5069c5f7849fe3cefb70526a Author: Timothy Arceri Date: Tue Jun 20 10:44:08 2017 +1000 mesa: allow for uniform packing when adding uniforms to param list Reviewed-by: Nicolai Hähnle --- src/mesa/program/ir_to_mesa.cpp | 32 +++- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index f26eddc900..0dad6f9c71 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2449,10 +2449,26 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, _mesa_reserve_parameter_storage(params, num_params); index = params->NumParameters; - for (unsigned i = 0; i < num_params; i++) { - unsigned comps = 4; - _mesa_add_parameter(params, PROGRAM_UNIFORM, name, comps, - type->gl_type, NULL, NULL, true); + + if (ctx->Const.PackedDriverUniformStorage) { + for (unsigned i = 0; i < num_params; i++) { + unsigned dmul = type->without_array()->is_64bit() ? 2 : 1; + unsigned comps = type->without_array()->vector_elements * dmul; + if (is_dual_slot) { +if (i & 0x1) + comps -= 4; +else + comps = 4; + } + + _mesa_add_parameter(params, PROGRAM_UNIFORM, name, comps, + type->gl_type, NULL, NULL, false); + } + } else { + for (unsigned i = 0; i < num_params; i++) { + _mesa_add_parameter(params, PROGRAM_UNIFORM, name, 4, + type->gl_type, NULL, NULL, true); + } } /* The first part of the uniform that's processed determines the base @@ -2527,7 +2543,13 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, if (location != last_location) { enum gl_uniform_driver_format format = uniform_native; unsigned columns = 0; - int dmul = 4 * sizeof(float); + + int dmul; + if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm) { +dmul = storage->type->vector_elements * sizeof(float); + } else { +dmul = 4 * sizeof(float); + } switch (storage->type->base_type) { case GLSL_TYPE_UINT64: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): st/radeonsi: enable uniform packing in NIR backend
Module: Mesa Branch: master Commit: 632d5e97efa3d38155d290fa397af7a729de8682 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=632d5e97efa3d38155d290fa397af7a729de8682 Author: Timothy Arceri Date: Wed Mar 14 09:51:23 2018 +1100 st/radeonsi: enable uniform packing in NIR backend Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_get.c | 6 +- src/mesa/state_tracker/st_glsl_to_nir.cpp | 10 ++ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 323700d425..b4ca5bea94 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -251,6 +251,11 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return RADEON_SPARSE_PAGE_SIZE; return 0; + case PIPE_CAP_PACKED_UNIFORMS: + if (sscreen->debug_flags & DBG(NIR)) + return 1; + return 0; + /* Unsupported features. */ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: @@ -269,7 +274,6 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TILE_RASTER_ORDER: case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: case PIPE_CAP_CONTEXT_PRIORITY_MASK: - case PIPE_CAP_PACKED_UNIFORMS: return 0; case PIPE_CAP_FENCE_SIGNAL: diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 9006650517..2eb2ece5b1 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -753,14 +753,8 @@ st_finalize_nir(struct st_context *st, struct gl_program *prog, st_nir_assign_uniform_locations(st->ctx, prog, shader_program, &nir->uniforms, &nir->num_uniforms); - /* Below is a quick hack so that uniform lowering only runs on radeonsi -* (the only NIR backend that currently supports tess) once we enable -* uniform packing support we will just use -* ctx->Const.PackedDriverUniformStorage for this check. -*/ - if (screen->get_shader_param(screen, PIPE_SHADER_TESS_CTRL, -PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { - NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, type_size, + if (st->ctx->Const.PackedDriverUniformStorage) { + NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_type_dword_size, (nir_lower_io_options)0); NIR_PASS_V(nir, st_nir_lower_uniforms_to_ubo, prog->Parameters); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: add packing support for setting uniforms
Module: Mesa Branch: master Commit: 6cfa15b8031b30b987b7c62fcdbc5813765e692c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6cfa15b8031b30b987b7c62fcdbc5813765e692c Author: Timothy Arceri Date: Tue Jun 20 10:38:05 2017 +1000 mesa: add packing support for setting uniforms Reviewed-by: Nicolai Hähnle --- src/mesa/main/uniform_query.cpp | 72 ++--- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index eaed536f68..14ecfdca2f 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -336,8 +336,14 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, /* Calculate the source base address *BEFORE* modifying elements to * account for the size of the user's buffer. */ - const union gl_constant_value *const src = - &uni->storage[offset * elements * dmul]; + const union gl_constant_value *src; + if (ctx->Const.PackedDriverUniformStorage && + (uni->is_bindless || !uni->type->contains_opaque())) { + src = (gl_constant_value *) uni->driver_storage[0].data + +(offset * elements * dmul); + } else { + src = &uni->storage[offset * elements * dmul]; + } assert(returnType == GLSL_TYPE_FLOAT || returnType == GLSL_TYPE_INT || returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE || @@ -1030,19 +1036,20 @@ _mesa_flush_vertices_for_uniforms(struct gl_context *ctx, } static void -copy_uniforms_to_storage(struct gl_uniform_storage *uni, +copy_uniforms_to_storage(gl_constant_value *storage, + struct gl_uniform_storage *uni, struct gl_context *ctx, GLsizei count, const GLvoid *values, const int size_mul, const unsigned offset, const unsigned components, enum glsl_base_type basicType) { if (!uni->type->is_boolean() && !uni->is_bindless) { - memcpy(&uni->storage[size_mul * components * offset], values, - sizeof(uni->storage[0]) * components * count * size_mul); + memcpy(storage, values, + sizeof(storage[0]) * components * count * size_mul); } else if (uni->is_bindless) { const union gl_constant_value *src = (const union gl_constant_value *) values; - GLuint64 *dst = (GLuint64 *)&uni->storage[components * offset].i; + GLuint64 *dst = (GLuint64 *)&storage->i; const unsigned elems = components * count; for (unsigned i = 0; i < elems; i++) { @@ -1051,7 +1058,7 @@ copy_uniforms_to_storage(struct gl_uniform_storage *uni, } else { const union gl_constant_value *src = (const union gl_constant_value *) values; - union gl_constant_value *dst = &uni->storage[components * offset]; + union gl_constant_value *dst = storage; const unsigned elems = components * count; for (unsigned i = 0; i < elems; i++) { @@ -1127,10 +1134,23 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values, /* Store the data in the "actual type" backing storage for the uniform. */ - copy_uniforms_to_storage(uni, ctx, count, values, size_mul, offset, -components, basicType); + gl_constant_value *storage; + if (ctx->Const.PackedDriverUniformStorage && + (uni->is_bindless || !uni->type->contains_opaque())) { + for (unsigned s = 0; s < uni->num_driver_storage; s++) { + storage = (gl_constant_value *) +uni->driver_storage[s].data + (size_mul * offset * components); + + copy_uniforms_to_storage(storage, uni, ctx, count, values, size_mul, + offset, components, basicType); + } + } else { + storage = &uni->storage[size_mul * components * offset]; + copy_uniforms_to_storage(storage, uni, ctx, count, values, size_mul, + offset, components, basicType); - _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); + _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); + } /* If the uniform is a sampler, do the extra magic necessary to propagate * the changes through. @@ -1222,7 +1242,7 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values, static void -copy_uniform_matrix_to_storage(struct gl_uniform_storage *const uni, +copy_uniform_matrix_to_storage(gl_constant_value *storage, GLsizei count, const void *values, const unsigned size_mul, const unsigned offset, const unsigned components, @@ -1233,13 +1253,13 @@ copy_uniform_matrix_to_storage(struct gl_uniform_storage *const uni, const unsigned elements = components * vectors; if (!transpose) { - memcpy(&uni->storage[size_mul *
Mesa (master): st/nir/radeonsi: move nir_lower_uniforms_to_ubo() to the state tracker
Module: Mesa Branch: master Commit: ffa4bbe4665f95bab7779ded57d6a4de13cb8ffc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ffa4bbe4665f95bab7779ded57d6a4de13cb8ffc Author: Timothy Arceri Date: Fri Mar 9 11:57:52 2018 +1100 st/nir/radeonsi: move nir_lower_uniforms_to_ubo() to the state tracker This will only ever be used by gallium drivers so it probably doesn't belong in the nir toolkit. Also we want to pass it some non NIR things in the following patch. To avoid regressions we wrap the lowering calls that have been moved to st_glsl_to_nir with a quick hack so that they are only called for radeonsi, we will replace the hack with a check for uniform packing in a following patch. Reviewed-by: Marek Olšák --- src/compiler/Makefile.sources | 1 - src/compiler/nir/meson.build| 1 - src/compiler/nir/nir.h | 1 - src/gallium/drivers/radeonsi/si_shader_nir.c| 10 -- src/mesa/Makefile.sources | 1 + src/mesa/meson.build| 1 + src/mesa/state_tracker/st_glsl_to_nir.cpp | 13 + src/mesa/state_tracker/st_nir.h | 1 + .../state_tracker/st_nir_lower_uniforms_to_ubo.c} | 3 ++- 9 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 55143dbc66..b231f2fa97 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -244,7 +244,6 @@ NIR_FILES = \ nir/nir_lower_tex.c \ nir/nir_lower_to_source_mods.c \ nir/nir_lower_two_sided_color.c \ - nir/nir_lower_uniforms_to_ubo.c \ nir/nir_lower_vars_to_ssa.c \ nir/nir_lower_var_copies.c \ nir/nir_lower_vec_to_movs.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 289bb9ea78..e97ce0d1e2 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -138,7 +138,6 @@ files_libnir = files( 'nir_lower_tex.c', 'nir_lower_to_source_mods.c', 'nir_lower_two_sided_color.c', - 'nir_lower_uniforms_to_ubo.c', 'nir_lower_vars_to_ssa.c', 'nir_lower_var_copies.c', 'nir_lower_vec_to_movs.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7ad19b42c1..d7baabd6f6 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2712,7 +2712,6 @@ void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *option bool nir_lower_atomics(nir_shader *shader, const struct gl_shader_program *shader_program); bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset); -bool nir_lower_uniforms_to_ubo(nir_shader *shader); bool nir_lower_to_source_mods(nir_shader *shader); bool nir_lower_gs_intrinsics(nir_shader *shader); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index acb796b331..7f17affa4d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -32,12 +32,6 @@ #include "compiler/nir_types.h" -static int -type_size(const struct glsl_type *type) -{ - return glsl_count_attribute_slots(type, false); -} - static void scan_instruction(struct tgsi_shader_info *info, nir_instr *instr) { @@ -650,10 +644,6 @@ si_lower_nir(struct si_shader_selector* sel) * - ensure constant offsets for texture instructions are folded * and copy-propagated */ - NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, type_size, - (nir_lower_io_options)0); - NIR_PASS_V(sel->nir, nir_lower_uniforms_to_ubo); - NIR_PASS_V(sel->nir, nir_lower_returns); NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa); NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar); diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 0a9aad52d0..0446078136 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -532,6 +532,7 @@ STATETRACKER_FILES = \ state_tracker/st_nir.h \ state_tracker/st_nir_lower_builtin.c \ state_tracker/st_nir_lower_tex_src_plane.c \ + state_tracker/st_nir_lower_uniforms_to_ubo.c \ state_tracker/st_pbo.c \ state_tracker/st_pbo.h \ state_tracker/st_program.c \ diff --git a/src/mesa/meson.build b/src/mesa/meson.build index aa27d59264..b74d169377 100644 --- a/src/mesa/meson.build +++ b/src/mesa/meson.build @@ -579,6 +579,7 @@ files_libmesa_gallium = files( 'state_tracker/st_nir.h', 'state_tracker/st_nir_lower_builtin.c', 'state_tracker/st_nir_lower_tex_src_plane.c', + 'state_tracker/st_nir_lower_uniforms_to_ubo.c', 'state_tracker/st_pbo.c', 'state_tracker/st_pbo.h', 'state_tracker/st_program.c', diff --git a/src/mesa/state_tracker/st_gls
Mesa (master): mesa: create copy uniform to storage helpers
Module: Mesa Branch: master Commit: 4a7c5c079b8b9df3ed28ba8fc10aa64e13d17413 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a7c5c079b8b9df3ed28ba8fc10aa64e13d17413 Author: Timothy Arceri Date: Fri Jun 16 15:45:00 2017 +1000 mesa: create copy uniform to storage helpers These will be used in the following patch to allow copying directly to the param list when packing is enabled. Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák --- src/mesa/main/uniform_query.cpp | 154 1 file changed, 91 insertions(+), 63 deletions(-) diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 52b04c9243..eaed536f68 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -1029,6 +1029,42 @@ _mesa_flush_vertices_for_uniforms(struct gl_context *ctx, ctx->NewDriverState |= new_driver_state; } +static void +copy_uniforms_to_storage(struct gl_uniform_storage *uni, + struct gl_context *ctx, GLsizei count, + const GLvoid *values, const int size_mul, + const unsigned offset, const unsigned components, + enum glsl_base_type basicType) +{ + if (!uni->type->is_boolean() && !uni->is_bindless) { + memcpy(&uni->storage[size_mul * components * offset], values, + sizeof(uni->storage[0]) * components * count * size_mul); + } else if (uni->is_bindless) { + const union gl_constant_value *src = + (const union gl_constant_value *) values; + GLuint64 *dst = (GLuint64 *)&uni->storage[components * offset].i; + const unsigned elems = components * count; + + for (unsigned i = 0; i < elems; i++) { + dst[i] = src[i].i; + } + } else { + const union gl_constant_value *src = + (const union gl_constant_value *) values; + union gl_constant_value *dst = &uni->storage[components * offset]; + const unsigned elems = components * count; + + for (unsigned i = 0; i < elems; i++) { + if (basicType == GLSL_TYPE_FLOAT) { +dst[i].i = src[i].f != 0.0f ? ctx->Const.UniformBooleanTrue : 0; + } else { +dst[i].i = src[i].i != 0? ctx->Const.UniformBooleanTrue : 0; + } + } + } +} + + /** * Called via glUniform*() functions. */ @@ -1091,32 +1127,8 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values, /* Store the data in the "actual type" backing storage for the uniform. */ - if (!uni->type->is_boolean() && !uni->is_bindless) { - memcpy(&uni->storage[size_mul * components * offset], values, - sizeof(uni->storage[0]) * components * count * size_mul); - } else if (uni->is_bindless) { - const union gl_constant_value *src = - (const union gl_constant_value *) values; - GLuint64 *dst = (GLuint64 *)&uni->storage[components * offset].i; - const unsigned elems = components * count; - - for (unsigned i = 0; i < elems; i++) { - dst[i] = src[i].i; - } - } else { - const union gl_constant_value *src = - (const union gl_constant_value *) values; - union gl_constant_value *dst = &uni->storage[components * offset]; - const unsigned elems = components * count; - - for (unsigned i = 0; i < elems; i++) { - if (basicType == GLSL_TYPE_FLOAT) { -dst[i].i = src[i].f != 0.0f ? ctx->Const.UniformBooleanTrue : 0; - } else { -dst[i].i = src[i].i != 0? ctx->Const.UniformBooleanTrue : 0; - } - } - } + copy_uniforms_to_storage(uni, ctx, count, values, size_mul, offset, +components, basicType); _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); @@ -1208,6 +1220,56 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values, } } + +static void +copy_uniform_matrix_to_storage(struct gl_uniform_storage *const uni, + GLsizei count, const void *values, + const unsigned size_mul, const unsigned offset, + const unsigned components, + const unsigned vectors, bool transpose, + unsigned cols, unsigned rows, + enum glsl_base_type basicType) +{ + const unsigned elements = components * vectors; + + if (!transpose) { + memcpy(&uni->storage[size_mul * elements * offset], values, + sizeof(uni->storage[0]) * elements * count * size_mul); + } else if (basicType == GLSL_TYPE_FLOAT) { + /* Copy and transpose the matrix. + */ + const float *src = (const float *)values; + float *dst = &uni->storage[elements * offset].f; + + for (int i = 0; i < count; i++) { + for (unsigned r = 0; r < rows; r++) { +for (unsigned c = 0; c < cols; c++) { + dst[(c * comp
Mesa (master): mesa: add support propagate uniform support for packed uniforms
Module: Mesa Branch: master Commit: 23777543290b9dfa17bfecb5b389ad3fc8e75820 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=23777543290b9dfa17bfecb5b389ad3fc8e75820 Author: Timothy Arceri Date: Tue Mar 13 16:44:06 2018 +1100 mesa: add support propagate uniform support for packed uniforms Reviewed-by: Marek Olšák --- src/mesa/program/ir_to_mesa.cpp | 20 ++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 0dad6f9c71..b0b322865b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2639,8 +2639,24 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, */ if (propagate_to_storage) { unsigned array_elements = MAX2(1, storage->array_elements); -_mesa_propagate_uniforms_to_driver_storage(storage, 0, - array_elements); +if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm && +(storage->is_bindless || !storage->type->contains_opaque())) { + const int dmul = storage->type->is_64bit() ? 2 : 1; + const unsigned components = + storage->type->vector_elements * + storage->type->matrix_columns; + + for (unsigned s = 0; s < storage->num_driver_storage; s++) { + gl_constant_value *uni_storage = (gl_constant_value *) + storage->driver_storage[s].data; + memcpy(uni_storage, storage->storage, + sizeof(storage->storage[0]) * components * + array_elements * dmul); + } +} else { + _mesa_propagate_uniforms_to_driver_storage(storage, 0, + array_elements); +} } last_location = location; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): st: add uniform packing support to lower_uniforms_to_ubo()
Module: Mesa Branch: master Commit: 231333a20d88336cd0474c573b46a2509b43245e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=231333a20d88336cd0474c573b46a2509b43245e Author: Timothy Arceri Date: Fri Mar 9 12:30:01 2018 +1100 st: add uniform packing support to lower_uniforms_to_ubo() Reviewed-by: Marek Olšák --- src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +- src/mesa/state_tracker/st_nir.h | 3 ++- src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c | 16 +++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 2ca64231e0..9006650517 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -762,7 +762,7 @@ st_finalize_nir(struct st_context *st, struct gl_program *prog, PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, type_size, (nir_lower_io_options)0); - NIR_PASS_V(nir, st_nir_lower_uniforms_to_ubo); + NIR_PASS_V(nir, st_nir_lower_uniforms_to_ubo, prog->Parameters); } if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF)) diff --git a/src/mesa/state_tracker/st_nir.h b/src/mesa/state_tracker/st_nir.h index 1c2e32a5e6..3dd78fb115 100644 --- a/src/mesa/state_tracker/st_nir.h +++ b/src/mesa/state_tracker/st_nir.h @@ -36,7 +36,8 @@ struct nir_shader; void st_nir_lower_builtin(struct nir_shader *shader); void st_nir_lower_tex_src_plane(struct nir_shader *shader, unsigned free_slots, unsigned lower_2plane, unsigned lower_3plane); -bool st_nir_lower_uniforms_to_ubo(struct nir_shader *shader); +bool st_nir_lower_uniforms_to_ubo(struct nir_shader *shader, + const struct gl_program_parameter_list *params); void st_finalize_nir(struct st_context *st, struct gl_program *prog, struct gl_shader_program *shader_program, diff --git a/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c b/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c index 1d2cca065b..e2a477ecc7 100644 --- a/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c +++ b/src/mesa/state_tracker/st_nir_lower_uniforms_to_ubo.c @@ -33,8 +33,11 @@ #include "nir_builder.h" #include "st_nir.h" +#include "program/prog_parameter.h" + static bool -lower_instr(nir_intrinsic_instr *instr, nir_builder *b) +lower_instr(nir_intrinsic_instr *instr, nir_builder *b, +const struct gl_program_parameter_list *params) { b->cursor = nir_before_instr(&instr->instr); @@ -47,10 +50,12 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b) } if (instr->intrinsic == nir_intrinsic_load_uniform) { + unsigned pvo = params->ParameterValueOffset[nir_intrinsic_base(instr)]; + nir_ssa_def *ubo_idx = nir_imm_int(b, 0); nir_ssa_def *ubo_offset = - nir_imul(b, nir_imm_int(b, 16), - nir_iadd(b, nir_imm_int(b, nir_intrinsic_base(instr)), + nir_iadd(b, nir_imm_int(b, 4 * pvo), + nir_imul(b, nir_imm_int(b, 4), nir_ssa_for_src(b, instr->src[0], 1))); nir_intrinsic_instr *load = @@ -72,7 +77,8 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b) } bool -st_nir_lower_uniforms_to_ubo(nir_shader *shader) +st_nir_lower_uniforms_to_ubo(nir_shader *shader, + const struct gl_program_parameter_list *params) { bool progress = false; @@ -84,7 +90,7 @@ st_nir_lower_uniforms_to_ubo(nir_shader *shader) nir_foreach_instr_safe(instr, block) { if (instr->type == nir_instr_type_intrinsic) progress |= lower_instr(nir_instr_as_intrinsic(instr), - &builder); + &builder, params); } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: add PackedDriverUniformStorage const
Module: Mesa Branch: master Commit: b13b9eb432a3b67efb29ca25c3e244b467c3c4af URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b13b9eb432a3b67efb29ca25c3e244b467c3c4af Author: Timothy Arceri Date: Wed Jun 14 15:48:45 2017 +1000 mesa: add PackedDriverUniformStorage const Will be used to determine whether to take packing code paths or not. Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák --- src/mesa/main/mtypes.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 5ee27d9977..08db8062ec 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -4132,6 +4132,9 @@ struct gl_constants /** GL_ARB_get_program_binary */ GLuint NumProgramBinaryFormats; + + /** Is the drivers uniform storage packed or padded to 16 bytes. */ + bool PackedDriverUniformStorage; }; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): st/glsl_to_nir: add support for packed builtin uniforms
Module: Mesa Branch: master Commit: 54881667308aea17f7f21720f5e8b41cf45f2020 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=54881667308aea17f7f21720f5e8b41cf45f2020 Author: Timothy Arceri Date: Tue Mar 13 20:50:27 2018 +1100 st/glsl_to_nir: add support for packed builtin uniforms Reviewed-by: Marek Olšák --- src/mesa/state_tracker/st_glsl_to_nir.cpp | 42 +++ 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 0bd9c4e4e3..1fd553fdf8 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -218,7 +218,8 @@ st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params, } static void -st_nir_assign_uniform_locations(struct gl_program *prog, +st_nir_assign_uniform_locations(struct gl_context *ctx, +struct gl_program *prog, struct gl_shader_program *shader_program, struct exec_list *uniform_list, unsigned *size) { @@ -247,7 +248,21 @@ st_nir_assign_uniform_locations(struct gl_program *prog, /* This state reference has already been setup by ir_to_mesa, but we'll * get the same index back here. */ - loc = _mesa_add_state_reference(prog->Parameters, stateTokens); + + unsigned comps; + const struct glsl_type *type = glsl_without_array(uniform->type); + if (glsl_type_is_struct(type)) { +comps = 4; + } else { +comps = glsl_get_vector_elements(type); + } + + if (ctx->Const.PackedDriverUniformStorage) { +loc = _mesa_add_sized_state_reference(prog->Parameters, + stateTokens, comps, false); + } else { +loc = _mesa_add_state_reference(prog->Parameters, stateTokens); + } } else { loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name); } @@ -359,9 +374,26 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog, const nir_state_slot *const slots = var->state_slots; assert(var->state_slots != NULL); + const struct glsl_type *type = glsl_without_array(var->type); for (unsigned int i = 0; i < var->num_state_slots; i++) { -_mesa_add_state_reference(prog->Parameters, - slots[i].tokens); +unsigned comps; +if (glsl_type_is_struct(type)) { + /* Builtin struct require specical handling for now we just +* make all members vec4. See st_nir_lower_builtin. +*/ + comps = 4; +} else { + comps = glsl_get_vector_elements(type); +} + +if (st->ctx->Const.PackedDriverUniformStorage) { + _mesa_add_sized_state_reference(prog->Parameters, + slots[i].tokens, + comps, false); +} else { + _mesa_add_state_reference(prog->Parameters, + slots[i].tokens); +} } } } @@ -717,7 +749,7 @@ st_finalize_nir(struct st_context *st, struct gl_program *prog, NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers); - st_nir_assign_uniform_locations(prog, shader_program, + st_nir_assign_uniform_locations(st->ctx, prog, shader_program, &nir->uniforms, &nir->num_uniforms); if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF)) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: add packed uniform CAP
Module: Mesa Branch: master Commit: 9c51a7ea2956495fd15d0ee6fc47fe99e14b9f36 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9c51a7ea2956495fd15d0ee6fc47fe99e14b9f36 Author: Timothy Arceri Date: Fri Aug 18 15:51:48 2017 +1000 gallium: add packed uniform CAP Reviewed-by: Marek Olšák --- src/gallium/docs/source/screen.rst | 2 ++ src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 + src/gallium/drivers/freedreno/freedreno_screen.c | 1 + src/gallium/drivers/i915/i915_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_screen.c | 1 + src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/r300/r300_screen.c | 1 + src/gallium/drivers/r600/r600_pipe.c | 1 + src/gallium/drivers/radeonsi/si_get.c| 1 + src/gallium/drivers/softpipe/sp_screen.c | 1 + src/gallium/drivers/svga/svga_screen.c | 1 + src/gallium/drivers/swr/swr_screen.cpp | 1 + src/gallium/drivers/vc4/vc4_screen.c | 1 + src/gallium/drivers/vc5/vc5_screen.c | 1 + src/gallium/drivers/virgl/virgl_screen.c | 1 + src/gallium/include/pipe/p_defines.h | 1 + src/mesa/state_tracker/st_context.c | 3 +++ 19 files changed, 22 insertions(+) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index e375d67a4b..3837360fb4 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -418,6 +418,8 @@ The integer capabilities: set when binding that buffer as constant buffer 0. If the buffer doesn't have those bits set, pipe_context::set_constant_buffer(.., 0, ..) is ignored by the driver, and the driver can throw assertion failures. +* ``PIPE_CAP_PACKED_UNIFORMS``: True if the driver supports packed uniforms + as opposed to padding to vec4s. .. _pipe_capf: diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index 6c5c00bf2d..2ae4e86c63 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -269,6 +269,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONTEXT_PRIORITY_MASK: case PIPE_CAP_FENCE_SIGNAL: case PIPE_CAP_CONSTBUF0_FLAGS: + case PIPE_CAP_PACKED_UNIFORMS: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index f9cafbabeb..f338d756df 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -339,6 +339,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: case PIPE_CAP_FENCE_SIGNAL: case PIPE_CAP_CONSTBUF0_FLAGS: + case PIPE_CAP_PACKED_UNIFORMS: return 0; case PIPE_CAP_CONTEXT_PRIORITY_MASK: diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 345e82b573..59d2ec6628 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -326,6 +326,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_CONTEXT_PRIORITY_MASK: case PIPE_CAP_FENCE_SIGNAL: case PIPE_CAP_CONSTBUF0_FLAGS: + case PIPE_CAP_PACKED_UNIFORMS: return 0; case PIPE_CAP_MAX_VIEWPORTS: diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 01ef348e3b..3f5d0327bf 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -361,6 +361,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_CONTEXT_PRIORITY_MASK: case PIPE_CAP_FENCE_SIGNAL: case PIPE_CAP_CONSTBUF0_FLAGS: + case PIPE_CAP_PACKED_UNIFORMS: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index d7fcff16fb..1d1fbaad60 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -228,6 +228,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONTEXT_PRIORITY_MASK: case PIPE_CAP_FENCE_SIGNAL: case PIPE_CAP_CONSTBUF0_FLAGS: + case PIPE_CAP_PACKED_UNIFORMS: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index a9a4dde508..6fd2982e3c 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -280,6 +280,7 @@ nv50_screen_get_param(struct pipe_sc
Mesa (master): broadcom/vc5: Add a QPU helper for instructions using the TLB.
Module: Mesa Branch: master Commit: c3a504f470b8116ebcd892ce1f48125549817467 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3a504f470b8116ebcd892ce1f48125549817467 Author: Eric Anholt Date: Mon Mar 19 11:30:27 2018 -0700 broadcom/vc5: Add a QPU helper for instructions using the TLB. This will be used for detecting last thread segment in register spilling. --- src/broadcom/qpu/qpu_instr.c | 22 ++ src/broadcom/qpu/qpu_instr.h | 1 + 2 files changed, 23 insertions(+) diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 978d470cc6..213a0826a5 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -569,6 +569,28 @@ v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op) } bool +v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) +{ +if (inst->sig.ldtlb || +inst->sig.ldtlbu) +return true; + +if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { +if (inst->alu.add.magic_write && +v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) { +return true; +} + +if (inst->alu.mul.magic_write && +v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) { +return true; +} +} + +return false; +} + +bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) { return (inst->type == V3D_QPU_INSTR_TYPE_ALU && diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index 9568857f11..e5e9a9a3f1 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -437,6 +437,7 @@ bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): broadcom/vc5: The ldvpm signal also a case of using the VPM.
Module: Mesa Branch: master Commit: 407f21ef1bcbd4054927aa8cc7a9f9252b389a87 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=407f21ef1bcbd4054927aa8cc7a9f9252b389a87 Author: Eric Anholt Date: Mon Mar 19 11:05:03 2018 -0700 broadcom/vc5: The ldvpm signal also a case of using the VPM. The QPU scheduling code calling this function already separately checked this signal. --- src/broadcom/qpu/qpu_instr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 9603373943..506cf06d55 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -569,6 +569,9 @@ v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) { +if (inst->sig.ldvpm) +return true; + if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { if (v3d_qpu_add_op_uses_vpm(inst->alu.add.op)) return true; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): broadcom/vc5: Re-do live variables after removing thrsws.
Module: Mesa Branch: master Commit: 55bf2983330dffafce53a2772cac078f4477988e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=55bf2983330dffafce53a2772cac078f4477988e Author: Eric Anholt Date: Fri Feb 23 17:46:35 2018 -0800 broadcom/vc5: Re-do live variables after removing thrsws. Otherwise our start/ends ips won't line up with the actual instructions. --- src/broadcom/compiler/nir_to_vir.c | 1 + src/broadcom/compiler/vir_live_variables.c | 16 +--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 595689d244..a8098fc320 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1929,6 +1929,7 @@ vir_remove_thrsw(struct v3d_compile *c) vir_remove_instruction(c, inst); } } +vir_calculate_live_intervals(c); c->last_thrsw = NULL; } diff --git a/src/broadcom/compiler/vir_live_variables.c b/src/broadcom/compiler/vir_live_variables.c index 217b716fd9..20acace1fa 100644 --- a/src/broadcom/compiler/vir_live_variables.c +++ b/src/broadcom/compiler/vir_live_variables.c @@ -311,10 +311,20 @@ vir_calculate_live_intervals(struct v3d_compile *c) { int bitset_words = BITSET_WORDS(c->num_temps); -/* If we called this function more than once, then we should be - * freeing the previous arrays. +/* We may be called more than once if we've rearranged the program to + * try to get register allocation to succeed. */ -assert(!c->temp_start); +if (c->temp_start) { +ralloc_free(c->temp_start); +ralloc_free(c->temp_end); + +vir_for_each_block(block, c) { +ralloc_free(block->def); +ralloc_free(block->use); +ralloc_free(block->live_in); +ralloc_free(block->live_out); +} +} c->temp_start = rzalloc_array(c, int, c->num_temps); c->temp_end = rzalloc_array(c, int, c->num_temps); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): broadcom/vc5: Move the umul macro to a header.
Module: Mesa Branch: master Commit: c81d6817422c83ba990fac19b165d4dedb1150fe URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c81d6817422c83ba990fac19b165d4dedb1150fe Author: Eric Anholt Date: Tue Mar 13 16:23:33 2018 -0700 broadcom/vc5: Move the umul macro to a header. Anywhere we want to multiply, we probably want this. --- src/broadcom/compiler/nir_to_vir.c | 9 + src/broadcom/compiler/v3d_compiler.h | 7 +++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index a8098fc320..61486870dc 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -256,13 +256,6 @@ vir_SAT(struct v3d_compile *c, struct qreg val) } static struct qreg -ntq_umul(struct v3d_compile *c, struct qreg src0, struct qreg src1) -{ -vir_MULTOP(c, src0, src1); -return vir_UMUL24(c, src0, src1); -} - -static struct qreg ntq_minify(struct v3d_compile *c, struct qreg size, struct qreg level) { return vir_MAX(c, vir_SHR(c, size, level), vir_uniform_ui(c, 1)); @@ -765,7 +758,7 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) break; case nir_op_imul: -result = ntq_umul(c, src[0], src[1]); +result = vir_UMUL(c, src[0], src[1]); break; case nir_op_seq: diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 94cbd0523c..f777cfcd87 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -916,6 +916,13 @@ vir_LDTMU(struct v3d_compile *c) } } +static inline struct qreg +vir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1) +{ +vir_MULTOP(c, src0, src1); +return vir_UMUL24(c, src0, src1); +} + /* static inline struct qreg vir_LOAD_IMM(struct v3d_compile *c, uint32_t val) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): broadcom/vc5: Add cursors to the compiler infrastructure, like NIR's.
Module: Mesa Branch: master Commit: d721348dcdb3658572c5952563d1f4d1ca0321af URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d721348dcdb3658572c5952563d1f4d1ca0321af Author: Eric Anholt Date: Tue Mar 13 15:41:16 2018 -0700 broadcom/vc5: Add cursors to the compiler infrastructure, like NIR's. This will let me do lowering late in compilation using the same instruction builder as we use in nir_to_vir. --- src/broadcom/compiler/nir_to_vir.c | 9 +++- src/broadcom/compiler/v3d_compiler.h | 43 src/broadcom/compiler/vir.c | 29 ++-- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 61486870dc..c1ba1e3049 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -198,14 +198,11 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan, if (c->execute.file != QFILE_NULL) { last_inst->dst.index = qregs[chan].index; -/* Set the flags to the current exec mask. To insert - * the flags push, we temporarily remove our SSA - * instruction. +/* Set the flags to the current exec mask. */ -list_del(&last_inst->link); +c->cursor = vir_before_inst(last_inst); vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ); -list_addtail(&last_inst->link, - &c->cur_block->instructions); +c->cursor = vir_after_inst(last_inst); vir_set_cond(last_inst, V3D_QPU_COND_IFA); last_inst->cond_is_exec_mask = true; diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index f777cfcd87..fdf1b13197 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -384,6 +384,48 @@ struct qblock { /** @} */ }; +/** Which util/list.h add mode we should use when inserting an instruction. */ +enum vir_cursor_mode { +vir_cursor_add, +vir_cursor_addtail, +}; + +/** + * Tracking structure for where new instructions should be inserted. Create + * with one of the vir_after_inst()-style helper functions. + * + * This does not protect against removal of the block or instruction, so we + * have an assert in instruction removal to try to catch it. + */ +struct vir_cursor { +enum vir_cursor_mode mode; +struct list_head *link; +}; + +static inline struct vir_cursor +vir_before_inst(struct qinst *inst) +{ +return (struct vir_cursor){ vir_cursor_addtail, &inst->link }; +} + +static inline struct vir_cursor +vir_after_inst(struct qinst *inst) +{ +return (struct vir_cursor){ vir_cursor_add, &inst->link }; +} + +static inline struct vir_cursor +vir_before_block(struct qblock *block) +{ +return (struct vir_cursor){ vir_cursor_add, &block->instructions }; +} + +static inline struct vir_cursor +vir_after_block(struct qblock *block) +{ +return (struct vir_cursor){ vir_cursor_addtail, &block->instructions }; +} + /** * Compiler state saved across compiler invocations, for any expensive global * setup. @@ -500,6 +542,7 @@ struct v3d_compile { struct qreg undef; uint32_t num_temps; +struct vir_cursor cursor; struct list_head blocks; int next_block_index; struct qblock *cur_block; diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 7ea431036e..0b2bbf0e79 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -418,7 +418,16 @@ vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src) static void vir_emit(struct v3d_compile *c, struct qinst *inst) { -list_addtail(&inst->link, &c->cur_block->instructions); +switch (c->cursor.mode) { +case vir_cursor_add: +list_add(&inst->link, c->cursor.link); +break; +case vir_cursor_addtail: +list_addtail(&inst->link, c->cursor.link); +break; +} + +c->cursor = vir_after_inst(inst); } /* Updates inst to write to a new temporary, emits it, and notes the def. */ @@ -468,6 +477,7 @@ void vir_set_emit_block(struct v3d_compile *c, struct qblock *block) { c->cur_block = block; +c->cursor = vir_after_block(block); list_addtail(&block->link, &c->blocks); } @@ -791,6 +801,8 @@ vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst) if (qinst->dst.file == QFILE_TEMP) c->defs[qinst->dst.index] = NULL; +assert(&qinst->link != c->cursor.link); + list_del(&qinst->link); free(qinst); } @@ -818,6 +830,10 @@ vir_follo
Mesa (master): broadcom/vc5: Add support for register spilling.
Module: Mesa Branch: master Commit: facc3c6f58de88ac3707a1b8435b7fc655d13124 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=facc3c6f58de88ac3707a1b8435b7fc655d13124 Author: Eric Anholt Date: Tue Mar 13 15:13:00 2018 -0700 broadcom/vc5: Add support for register spilling. Our register spilling support is nice to have since vc4 couldn't at all, but we're still very restricted due to needing to not spill during a TMU operation, or during the last segment of the program (which would be nice to spill a value of, when there's a long-lived value being passed through with little modification from the start to the end). We could do better by emitting unspills for the last-segment values just before the last thrsw, since the last segment is probably not the maximum interference area. Fixes GTF uniform_buffer_object_arrays_of_all_valid_basic_types and 3 others. --- src/broadcom/compiler/nir_to_vir.c| 11 +- src/broadcom/compiler/v3d_compiler.h | 24 ++- src/broadcom/compiler/vir.c | 8 + src/broadcom/compiler/vir_register_allocate.c | 244 +- src/gallium/drivers/vc5/vc5_context.h | 3 + src/gallium/drivers/vc5/vc5_program.c | 15 ++ src/gallium/drivers/vc5/vc5_uniforms.c| 12 ++ 7 files changed, 306 insertions(+), 11 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index c1ba1e3049..75e35067f2 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1919,12 +1919,11 @@ vir_remove_thrsw(struct v3d_compile *c) vir_remove_instruction(c, inst); } } -vir_calculate_live_intervals(c); c->last_thrsw = NULL; } -static void +void vir_emit_last_thrsw(struct v3d_compile *c) { /* On V3D before 4.1, we need a TMU op to be outstanding when thread @@ -2012,16 +2011,16 @@ v3d_nir_to_vir(struct v3d_compile *c) fprintf(stderr, "\n"); } -/* Compute the live ranges so we can figure out interference. */ -vir_calculate_live_intervals(c); - /* Attempt to allocate registers for the temporaries. If we fail, * reduce thread count and try again. */ int min_threads = (c->devinfo->ver >= 41) ? 2 : 1; struct qpu_reg *temp_registers; while (true) { -temp_registers = v3d_register_allocate(c); +bool spilled; +temp_registers = v3d_register_allocate(c, &spilled); +if (spilled) +continue; if (temp_registers) break; diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index fdf1b13197..84cc4d290a 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -248,6 +248,12 @@ enum quniform_contents { QUNIFORM_ALPHA_REF, QUNIFORM_SAMPLE_MASK, + +/** + * Returns the the offset of the scratch buffer for register spilling. + */ +QUNIFORM_SPILL_OFFSET, +QUNIFORM_SPILL_SIZE_PER_THREAD, }; struct v3d_varying_slot { @@ -506,6 +512,20 @@ struct v3d_compile { uint8_t vattr_sizes[V3D_MAX_VS_INPUTS]; uint32_t num_vpm_writes; +/* Size in bytes of registers that have been spilled. This is how much + * space needs to be available in the spill BO per thread per QPU. + */ +uint32_t spill_size; +/* Shader-db stats for register spilling. */ +uint32_t spills, fills; +/** + * Register spilling's per-thread base address, shared between each + * spill/fill's addressing calculations. + */ +struct qreg spill_base; +/* Bit vector of which temps may be spilled */ +BITSET_WORD *spillable; + /** * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads. * @@ -600,6 +620,7 @@ struct v3d_prog_data { struct v3d_ubo_range *ubo_ranges; uint32_t num_ubo_ranges; uint32_t ubo_size; +uint32_t spill_size; uint8_t num_inputs; uint8_t threads; @@ -697,6 +718,7 @@ void vir_set_unpack(struct qinst *inst, int src, enum v3d_qpu_input_unpack unpack); struct qreg vir_get_temp(struct v3d_compile *c); +void vir_emit_last_thrsw(struct v3d_compile *c); void vir_calculate_live_intervals(struct v3d_compile *c); bool vir_has_implicit_uniform(struct qinst *inst); int vir_get_implicit_uniform_src(struct qinst *inst); @@ -746,7 +768,7 @@ void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers); uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c); void qpu_validate(struct v3d_compile *c); -struct qpu_reg *v3d_register_allocate(struc
Mesa (master): broadcom/vc5: Don't annotate dumps with stale live intervals.
Module: Mesa Branch: master Commit: 00910e3057588de3fe9b5dc2ae9263c2e4ba6cc4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=00910e3057588de3fe9b5dc2ae9263c2e4ba6cc4 Author: Eric Anholt Date: Wed Mar 14 11:03:23 2018 -0700 broadcom/vc5: Don't annotate dumps with stale live intervals. As you're debugging register allocation, you may have changed the intervals and not recomputed yet. Just skip the dump in that case. --- src/broadcom/compiler/v3d_compiler.h | 1 + src/broadcom/compiler/vir.c| 3 +++ src/broadcom/compiler/vir_dump.c | 4 ++-- src/broadcom/compiler/vir_live_variables.c | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 84cc4d290a..df81f0757e 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -548,6 +548,7 @@ struct v3d_compile { /* Live ranges of temps. */ int *temp_start, *temp_end; +bool live_intervals_valid; uint32_t *uniform_data; enum quniform_contents *uniform_contents; diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 0cbdc986d3..05f557fbcd 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -435,6 +435,7 @@ vir_emit(struct v3d_compile *c, struct qinst *inst) } c->cursor = vir_after_inst(inst); +c->live_intervals_valid = false; } /* Updates inst to write to a new temporary, emits it, and notes the def. */ @@ -813,6 +814,8 @@ vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst) list_del(&qinst->link); free(qinst); + +c->live_intervals_valid = false; } struct qreg diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c index ef860cbb5c..90a3fb0ac6 100644 --- a/src/broadcom/compiler/vir_dump.c +++ b/src/broadcom/compiler/vir_dump.c @@ -321,7 +321,7 @@ vir_dump(struct v3d_compile *c) vir_for_each_block(block, c) { fprintf(stderr, "BLOCK %d:\n", block->index); vir_for_each_inst(inst, block) { -if (c->temp_start) { +if (c->live_intervals_valid) { bool first = true; for (int i = 0; i < c->num_temps; i++) { @@ -342,7 +342,7 @@ vir_dump(struct v3d_compile *c) fprintf(stderr, " "); } -if (c->temp_end) { +if (c->live_intervals_valid) { bool first = true; for (int i = 0; i < c->num_temps; i++) { diff --git a/src/broadcom/compiler/vir_live_variables.c b/src/broadcom/compiler/vir_live_variables.c index 20acace1fa..019cde1456 100644 --- a/src/broadcom/compiler/vir_live_variables.c +++ b/src/broadcom/compiler/vir_live_variables.c @@ -347,4 +347,6 @@ vir_calculate_live_intervals(struct v3d_compile *c) ; vir_compute_start_end(c, c->num_temps); + +c->live_intervals_valid = true; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): broadcom/vc5: Correct the arg count of TIDX/EIDX.
Module: Mesa Branch: master Commit: 9e28c18cd1b99cb5dc5842da74426db21d5dcc8f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9e28c18cd1b99cb5dc5842da74426db21d5dcc8f Author: Eric Anholt Date: Tue Mar 13 16:08:25 2018 -0700 broadcom/vc5: Correct the arg count of TIDX/EIDX. --- src/broadcom/compiler/v3d_compiler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index cccb54184a..94cbd0523c 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -835,8 +835,8 @@ VIR_A_ALU1(FLBPUSH) VIR_A_ALU1(FLBPOP) VIR_A_ALU1(SETMSF) VIR_A_ALU1(SETREVF) -VIR_A_ALU1(TIDX) -VIR_A_ALU1(EIDX) +VIR_A_ALU0(TIDX) +VIR_A_ALU0(EIDX) VIR_A_ALU1(LDVPMV_IN) VIR_A_ALU1(LDVPMV_OUT) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): broadcom/vc5: Remove redundant last_inst lookup.
Module: Mesa Branch: master Commit: 271fc58ba1b9e6a0245c7ab262834705f2e20372 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=271fc58ba1b9e6a0245c7ab262834705f2e20372 Author: Eric Anholt Date: Wed Mar 14 14:43:15 2018 -0700 broadcom/vc5: Remove redundant last_inst lookup. The point was to get the MOV, which the MOV_dest already returned. --- src/broadcom/compiler/vir.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 0b2bbf0e79..6a315dd482 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -900,7 +900,6 @@ vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf) last_inst != c->defs[src.index]) { /* XXX: Make the MOV be the appropriate type */ last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src); -last_inst = (struct qinst *)c->cur_block->instructions.prev; } vir_set_pf(last_inst, pf); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): broadcom/vc5: Introduce v3d_qpu_reads_vpm()/v3d_qpu_writes_vpm().
Module: Mesa Branch: master Commit: 09c4dd19713b3155bd744f873e91e0328be62978 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09c4dd19713b3155bd744f873e91e0328be62978 Author: Eric Anholt Date: Mon Mar 19 11:03:47 2018 -0700 broadcom/vc5: Introduce v3d_qpu_reads_vpm()/v3d_qpu_writes_vpm(). These helpers will be used in register spilling to determine where to add a last thrsw if needed, and might help refactor QPU scheduling. --- src/broadcom/qpu/qpu_instr.c | 35 --- src/broadcom/qpu/qpu_instr.h | 4 +++- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 506cf06d55..978d470cc6 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -535,7 +535,7 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) } static bool -v3d_qpu_add_op_uses_vpm(enum v3d_qpu_add_op op) +v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op) { switch (op) { case V3D_QPU_A_VPMSETUP: @@ -547,6 +547,18 @@ v3d_qpu_add_op_uses_vpm(enum v3d_qpu_add_op op) case V3D_QPU_A_LDVPMP: case V3D_QPU_A_LDVPMG_IN: case V3D_QPU_A_LDVPMG_OUT: +return true; +default: +return false; +} +} + +static bool +v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op) +{ +switch (op) { +case V3D_QPU_A_VPMSETUP: +case V3D_QPU_A_VPMWT: case V3D_QPU_A_STVPMV: case V3D_QPU_A_STVPMD: case V3D_QPU_A_STVPMP: @@ -567,13 +579,24 @@ v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) } bool -v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) +v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) { if (inst->sig.ldvpm) return true; if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { -if (v3d_qpu_add_op_uses_vpm(inst->alu.add.op)) +if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op)) +return true; +} + +return false; +} + +bool +v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) +{ +if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { +if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op)) return true; if (inst->alu.add.magic_write && @@ -591,6 +614,12 @@ v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) } bool +v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) +{ +return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst); +} + +bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *inst) { diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index 39232b0e61..9568857f11 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -445,7 +445,9 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); -bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst); +bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; +bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; +bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): broadcom/vc5: Extract v3d_qpu_writes_tmu() helper.
Module: Mesa Branch: master Commit: 4760040c0980a8921120d517d5e5809f7f0e488c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4760040c0980a8921120d517d5e5809f7f0e488c Author: Eric Anholt Date: Wed Mar 14 15:04:32 2018 -0700 broadcom/vc5: Extract v3d_qpu_writes_tmu() helper. This will be reused in register spilling. --- src/broadcom/compiler/qpu_schedule.c | 7 +-- src/broadcom/qpu/qpu_instr.c | 10 ++ src/broadcom/qpu/qpu_instr.h | 1 + 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 3ced2a4949..b404390a79 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -588,13 +588,8 @@ get_instruction_priority(const struct v3d_qpu_instr *inst) next_score++; /* Schedule texture read setup early to hide their latency better. */ -if (inst->type == V3D_QPU_INSTR_TYPE_ALU && -((inst->alu.add.magic_write && - v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) || - (inst->alu.mul.magic_write && - v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr { +if (v3d_qpu_writes_tmu(inst)) return next_score; -} next_score++; return baseline_score; diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index f31c81f8ca..9603373943 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -557,6 +557,16 @@ v3d_qpu_add_op_uses_vpm(enum v3d_qpu_add_op op) } bool +v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) +{ +return (inst->type == V3D_QPU_INSTR_TYPE_ALU && +((inst->alu.add.magic_write && + v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) || + (inst->alu.mul.magic_write && + v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr; +} + +bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) { if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index 2289e18225..39232b0e61 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -437,6 +437,7 @@ bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): broadcom/vc5: On QPU pack error, dump the instruction and return cleanly.
Module: Mesa Branch: master Commit: 34dc64f6274db73851b0f1e5f0440a9785cafd6c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=34dc64f6274db73851b0f1e5f0440a9785cafd6c Author: Eric Anholt Date: Wed Mar 14 14:39:51 2018 -0700 broadcom/vc5: On QPU pack error, dump the instruction and return cleanly. This is nice for debugging when you've made a bad instruction. --- src/broadcom/compiler/vir_to_qpu.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c index 568a004803..83b1936cbd 100644 --- a/src/broadcom/compiler/vir_to_qpu.c +++ b/src/broadcom/compiler/vir_to_qpu.c @@ -388,7 +388,13 @@ v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers) vir_for_each_inst_inorder(inst, c) { bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu, &c->qpu_insts[i++]); -assert(ok); (void) ok; +if (!ok) { +fprintf(stderr, "Failed to pack instruction:\n"); +vir_dump_inst(c, inst); +fprintf(stderr, "\n"); +c->failed = true; +return; +} } assert(i == c->qpu_inst_count); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: don't export NULL layer.
Module: Mesa Branch: master Commit: 32791a05024d54736eab21379e849480fea78559 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=32791a05024d54736eab21379e849480fea78559 Author: Dave Airlie Date: Mon Mar 19 20:02:58 2018 + radv: don't export NULL layer. We have some cases where in subpass we want the layer but having it be 0 and loaded in the frag shader without the vertex shader exporting it is fine. So don't export the layer if we don't have a value to put in it. Fixes: d4c74aed7a8 (radv/multiview: mark layer_input if we have input attachments.) Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_nir_to_llvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 7379f348d8..ad046adfdb 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2363,7 +2363,7 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, outinfo->export_prim_id = true; } - if (export_layer_id) { + if (export_layer_id && layer_value) { LLVMValueRef values[4]; values[0] = layer_value; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: adjust incorrect comment in texture_buffer_range
Module: Mesa Branch: master Commit: f674b50d0ead3fc3f67e2579f2854dae991e9b78 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f674b50d0ead3fc3f67e2579f2854dae991e9b78 Author: Marek Olšák Date: Tue Mar 6 17:32:09 2018 -0500 mesa: adjust incorrect comment in texture_buffer_range --- src/mesa/main/teximage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 9c13e6d7cf..9e139d746f 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -5472,8 +5472,8 @@ texture_buffer_range(struct gl_context *ctx, GLsizeiptr oldSize = texObj->BufferSize; mesa_format format; - /* NOTE: ARB_texture_buffer_object has interactions with -* the compatibility profile that are not implemented. + /* NOTE: ARB_texture_buffer_object might not be supported in +* the compatibility profile. */ if (!_mesa_has_ARB_texture_buffer_object(ctx) && !_mesa_has_OES_texture_buffer(ctx)) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): nir: Don't compare b2f or b2i with zero
Module: Mesa Branch: master Commit: 6aeaa7d363d45f4d09ff51ad925e893f931f502e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6aeaa7d363d45f4d09ff51ad925e893f931f502e Author: Ian Romanick Date: Tue Mar 1 19:05:14 2016 -0800 nir: Don't compare b2f or b2i with zero All of the shaders that had loops changed were in Tomb Raider. The one shader that lost SIMD16 is one of those. Skylake total instructions in shared programs: 14391653 -> 14390468 (<.01%) instructions in affected programs: 111891 -> 110706 (-1.06%) helped: 501 HURT: 0 helped stats (abs) min: 1 max: 155 x̄: 2.37 x̃: 1 helped stats (rel) min: 0.05% max: 21.54% x̄: 1.61% x̃: 1.01% 95% mean confidence interval for instructions value: -3.23 -1.50 95% mean confidence interval for instructions %-change: -1.77% -1.45% Instructions are helped. total cycles in shared programs: 532793024 -> 532776598 (<.01%) cycles in affected programs: 987682 -> 971256 (-1.66%) helped: 348 nnHURT: 41 helped stats (abs) min: 1 max: 3074 x̄: 54.91 x̃: 18 helped stats (rel) min: 0.05% max: 32.24% x̄: 3.36% x̃: 1.68% HURT stats (abs) min: 1 max: 422 x̄: 65.39 x̃: 24 HURT stats (rel) min: 0.09% max: 39.29% x̄: 9.50% x̃: 2.02% 95% mean confidence interval for cycles value: -64.08 -20.38 95% mean confidence interval for cycles %-change: -2.78% -1.23% Cycles are helped. total loops in shared programs: 4854 -> 4829 (-0.52%) loops in affected programs: 27 -> 2 (-92.59%) helped: 18 HURT: 0 LOST: 1 GAINED: 0 Signed-off-by: Ian Romanick Reviewed-by: Timothy Arceri --- src/compiler/nir/nir_opt_algebraic.py | 5 + 1 file changed, 5 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index c9575e6be4..b9565cea7b 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -183,6 +183,11 @@ optimizations = [ (('fge', ('fmin', ('fneg', ('b2f', a)), b), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))), (('feq', ('fmin', ('fneg', ('b2f', a)), b), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))), + (('feq', ('b2f', a), 0.0), ('inot', a)), + (('fne', ('b2f', a), 0.0), a), + (('ieq', ('b2i', a), 0), ('inot', a)), + (('ine', ('b2i', a), 0), a), + # 0.0 < fabs(a) # fabs(a) > 0.0 # fabs(a) != 0.0 because fabs(a) must be >= 0 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/query: handle multiview queries properly. (v3)
Module: Mesa Branch: master Commit: 32b4f3c38dc25694437af6f017b45b9658eac3bc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=32b4f3c38dc25694437af6f017b45b9658eac3bc Author: Dave Airlie Date: Thu Mar 15 20:23:30 2018 + radv/query: handle multiview queries properly. (v3) For multiview we need to emit a number of sequential queries depending on the view mask. This avoids dEQP-VK.multiview.queries.15 waiting forever on the CPU for query results that are never coming. We only really want to emit one query, and the rest should be blank (amdvlk does the same), so we emit begin/end pairs for all the others except the first query. v2: fix tests v3: split out patch. Fixes: dEQP-VK.multiview.queries* Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_query.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 5fae8b6565..7a20314f61 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1178,6 +1178,25 @@ void radv_CmdBeginQuery( va += pool->stride * query; emit_begin_query(cmd_buffer, va, pool->type); + + /* +* For multiview we have to emit a query for each bit in the mask, +* however the first query we emit will get the totals for all the +* operations, so we don't want to get a real value in the other +* queries. This emits a fake begin/end sequence so the waiting +* code gets a completed query value and doesn't hang, but the +* query returns 0. +*/ + if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) { + uint64_t avail_va = va + pool->availability_offset + 4 * query; + + for (unsigned i = 0; i < util_bitcount(cmd_buffer->state.subpass->view_mask); i++) { + va += pool->stride; + avail_va += 4; + emit_begin_query(cmd_buffer, va, pool->type); + emit_end_query(cmd_buffer, va, avail_va, pool->type); + } + } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/multiview: mark layer_input if we have input attachments.
Module: Mesa Branch: master Commit: d4c74aed7a81c65ef91d4d3065b3f822355746e7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d4c74aed7a81c65ef91d4d3065b3f822355746e7 Author: Dave Airlie Date: Mon Mar 19 03:41:18 2018 + radv/multiview: mark layer_input if we have input attachments. This fixes: dEQP-VK.multiview.input_attachments* Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_shader_info.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 7208bd2f58..9c18791524 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -122,8 +122,10 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); if (dim == GLSL_SAMPLER_DIM_SUBPASS || - dim == GLSL_SAMPLER_DIM_SUBPASS_MS) + dim == GLSL_SAMPLER_DIM_SUBPASS_MS) { + info->ps.layer_input = true; info->ps.uses_input_attachments = true; + } mark_sampler_desc(instr->variables[0]->var, info); if (nir_intrinsic_image_store || ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: lower constant initializers on output variables earlier
Module: Mesa Branch: master Commit: e8d9b7ab02fc56cadffc7a2bb993b39cccde2b66 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e8d9b7ab02fc56cadffc7a2bb993b39cccde2b66 Author: Dave Airlie Date: Mon Mar 19 04:27:49 2018 + radv: lower constant initializers on output variables earlier If a shader only writes to an output via a constant initializer we need to lower it before we call nir_remove_dead_variables so that this pass sees the stores from the initializer and doesn't kill the output. Fixes test failures in new work-in-progress CTS tests: dEQP-VK.spirv_assembly.instruction.graphics.variable_init.output.float This is ported from anv: 99b57daf4a anv/pipeline: lower constant initializers on output variables earlier from Iago Toral Quiroga Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_shader.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 180b427a44..ac577c36e9 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -244,6 +244,11 @@ radv_shader_compile_to_nir(struct radv_device *device, assert(exec_list_length(&nir->functions) == 1); entry_point->name = ralloc_strdup(entry_point, "main"); + /* Make sure we lower constant initializers on output variables so that +* nir_remove_dead_variables below sees the corresponding stores +*/ + NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_shader_out); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out | nir_var_system_value); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/query: split out begin/end query emission
Module: Mesa Branch: master Commit: 4034dc5c72791e010eb64dece4bca542f56cec09 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4034dc5c72791e010eb64dece4bca542f56cec09 Author: Dave Airlie Date: Mon Mar 19 01:24:52 2018 + radv/query: split out begin/end query emission This just splits out the begin/end query hw emissions, it makes it easier to add multiview support for queries. Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_query.c | 98 ++--- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 9fee4d2b49..5fae8b6565 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1077,33 +1077,12 @@ void radv_CmdResetQueryPool( } } -void radv_CmdBeginQuery( -VkCommandBuffer commandBuffer, -VkQueryPool queryPool, -uint32_tquery, -VkQueryControlFlags flags) +static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer, +uint64_t va, +VkQueryType query_type) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); struct radeon_winsys_cs *cs = cmd_buffer->cs; - uint64_t va = radv_buffer_get_va(pool->bo); - va += pool->stride * query; - - radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo, 8); - - if (cmd_buffer->pending_reset_query) { - if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) { - /* Only need to flush caches if the query pool size is -* large enough to be resetted using the compute shader -* path. Small pools don't need any cache flushes -* because we use a CP dma clear. -*/ - si_emit_cache_flush(cmd_buffer); - cmd_buffer->pending_reset_query = false; - } - } - - switch (pool->type) { + switch (query_type) { case VK_QUERY_TYPE_OCCLUSION: radeon_check_space(cmd_buffer->device->ws, cs, 7); @@ -1127,26 +1106,15 @@ void radv_CmdBeginQuery( default: unreachable("beginning unhandled query type"); } -} +} -void radv_CmdEndQuery( -VkCommandBuffer commandBuffer, -VkQueryPool queryPool, -uint32_tquery) +static void emit_end_query(struct radv_cmd_buffer *cmd_buffer, + uint64_t va, uint64_t avail_va, + VkQueryType query_type) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); struct radeon_winsys_cs *cs = cmd_buffer->cs; - uint64_t va = radv_buffer_get_va(pool->bo); - uint64_t avail_va = va + pool->availability_offset + 4 * query; - va += pool->stride * query; - - /* Do not need to add the pool BO to the list because the query must -* currently be active, which means the BO is already in the list. -*/ - - switch (pool->type) { + switch (query_type) { case VK_QUERY_TYPE_OCCLUSION: radeon_check_space(cmd_buffer->device->ws, cs, 14); @@ -1182,6 +1150,54 @@ void radv_CmdEndQuery( } } +void radv_CmdBeginQuery( +VkCommandBuffer commandBuffer, +VkQueryPool queryPool, +uint32_tquery, +VkQueryControlFlags flags) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); + struct radeon_winsys_cs *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(pool->bo); + + radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo, 8); + + if (cmd_buffer->pending_reset_query) { + if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) { + /* Only need to flush caches if the query pool size is +* large enough to be resetted using the compute shader +* path. Small pools don't need any cache flushes +* because we use a CP dma clear. +*/ + si_emit_cache_flush(cmd_buffer); + cmd_buffer->pending_reset_query = false; + } + } + + va += pool->stride * query; + + emit_begin_query(cmd_buffer, va, pool->type); +} + + +void radv_CmdEndQuery( +VkCommandBuffer commandBuffer, +VkQueryPool qu
Mesa (master): radv/query: handle multiview timestamp queries.
Module: Mesa Branch: master Commit: 032014ac01a2dfd6c8e689b3d59989eb6fa2396b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=032014ac01a2dfd6c8e689b3d59989eb6fa2396b Author: Dave Airlie Date: Mon Mar 19 01:27:37 2018 + radv/query: handle multiview timestamp queries. For each view bit we need to emit a timestamp query. Fixes: dEQP-VK.multiview.queries* Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_query.c | 79 - 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 7a20314f61..cc943d5de0 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1233,42 +1233,49 @@ void radv_CmdWriteTimestamp( radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo, 5); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28); - - switch(pipelineStage) { - case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM | - COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | - COPY_DATA_DST_SEL(V_370_MEM_ASYNC)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, query_va); - radeon_emit(cs, query_va >> 32); - - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, avail_va); - radeon_emit(cs, avail_va >> 32); - radeon_emit(cs, 1); - break; - default: - si_cs_emit_write_event_eop(cs, - false, - cmd_buffer->device->physical_device->rad_info.chip_class, - mec, - V_028A90_BOTTOM_OF_PIPE_TS, 0, - 3, query_va, 0, 0); - si_cs_emit_write_event_eop(cs, - false, - cmd_buffer->device->physical_device->rad_info.chip_class, - mec, - V_028A90_BOTTOM_OF_PIPE_TS, 0, - 1, avail_va, 0, 1); - break; - } + int num_queries = 1; + if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) + num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask); + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28 * num_queries); + + for (unsigned i = 0; i < num_queries; i++) { + switch(pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM | + COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | + COPY_DATA_DST_SEL(V_370_MEM_ASYNC)); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, query_va); + radeon_emit(cs, query_va >> 32); + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(cs, avail_va); + radeon_emit(cs, avail_va >> 32); + radeon_emit(cs, 1); + break; + default: + si_cs_emit_write_event_eop(cs, + false, + cmd_buffer->device->physical_device->rad_info.chip_class, + mec, + V_028A90_BOTTOM_OF_PIPE_TS, 0, + 3, query_va, 0, 0); + si_cs_emit_write_event_eop(cs, + false, + cmd_buffer->device->physical_device->rad_info.chip_class, + mec, + V_028A90_BOTTOM_OF_PIPE_TS, 0, + 1, avail_va, 0, 1); + bre
Mesa (master): anv/pipeline: fail if TCS/TES compile fail
Module: Mesa Branch: master Commit: 318073ce660ca72b47ba83e37d1d0bc756f779b7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=318073ce660ca72b47ba83e37d1d0bc756f779b7 Author: Caio Marcelo de Oliveira Filho Date: Thu Mar 15 13:09:29 2018 -0700 anv/pipeline: fail if TCS/TES compile fail v2: Add Fixes tag. (Lionel) Fixes: e50d4807a35e679 ("anv: Compile TCS/TES shaders.") Reviewed-by: Lionel Landwerlin --- src/intel/vulkan/anv_pipeline.c | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 9cfd16df2a..cb34f3be77 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1349,13 +1349,15 @@ anv_pipeline_init(struct anv_pipeline *pipeline, } if (modules[MESA_SHADER_TESS_EVAL]) { - anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo, - modules[MESA_SHADER_TESS_CTRL], - pStages[MESA_SHADER_TESS_CTRL]->pName, - pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo, - modules[MESA_SHADER_TESS_EVAL], - pStages[MESA_SHADER_TESS_EVAL]->pName, - pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo); + result = anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo, +modules[MESA_SHADER_TESS_CTRL], + pStages[MESA_SHADER_TESS_CTRL]->pName, + pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo, +modules[MESA_SHADER_TESS_EVAL], + pStages[MESA_SHADER_TESS_EVAL]->pName, + pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo); + if (result != VK_SUCCESS) + goto compile_fail; } if (modules[MESA_SHADER_GEOMETRY]) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): anv/pipeline: set active_stages early
Module: Mesa Branch: master Commit: f6338c3b856711d6a399b7f6dccbf3a7062b4586 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f6338c3b856711d6a399b7f6dccbf3a7062b4586 Author: Caio Marcelo de Oliveira Filho Date: Thu Mar 15 13:09:30 2018 -0700 anv/pipeline: set active_stages early Since the intermediate states of active_stages are not used, i.e. active_stages is read only after all stages were set into it, just set its value before compiling the shaders. This will allow to conditionally run certain passes based on what other shaders are being used, e.g. a certain pass might only be applicable to the vertex shader if there's no geometry or tessellation shader being used. v2: Use vk_to_mesa_shader_stage. (Lionel) Reviewed-by: Lionel Landwerlin --- src/intel/vulkan/anv_pipeline.c | 12 +--- src/intel/vulkan/genX_pipeline.c | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index cb34f3be77..4ca1e0be34 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -501,7 +501,6 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, struct anv_shader_bin *shader) { pipeline->shaders[stage] = shader; - pipeline->active_stages |= mesa_to_vk_shader_stage(stage); } static VkResult @@ -1334,11 +1333,18 @@ anv_pipeline_init(struct anv_pipeline *pipeline, const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = {}; struct anv_shader_module *modules[MESA_SHADER_STAGES] = {}; for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1; + VkShaderStageFlagBits vk_stage = pCreateInfo->pStages[i].stage; + gl_shader_stage stage = vk_to_mesa_shader_stage(vk_stage); pStages[stage] = &pCreateInfo->pStages[i]; modules[stage] = anv_shader_module_from_handle(pStages[stage]->module); + pipeline->active_stages |= vk_stage; } + if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) + pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + + assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); + if (modules[MESA_SHADER_VERTEX]) { result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, modules[MESA_SHADER_VERTEX], @@ -1378,7 +1384,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, goto compile_fail; } - assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); + assert(pipeline->shaders[MESA_SHADER_VERTEX]); anv_pipeline_setup_l3_config(pipeline, false); diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 9c08bc2033..eb2d414735 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1787,6 +1787,7 @@ compute_pipeline_create( pipeline->needs_data_cache = false; assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); + pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT; ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, pCreateInfo->stage.pName, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Allow disk shader cache usage with transform feedback
Module: Mesa Branch: master Commit: fc4a7aaa8297370ba505b306b91a71a3b8545d18 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fc4a7aaa8297370ba505b306b91a71a3b8545d18 Author: Jordan Justen Date: Tue Mar 13 10:47:19 2018 -0700 i965: Allow disk shader cache usage with transform feedback Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105444 Signed-off-by: Jordan Justen Reviewed-by: Tapani Pälli Reviewed-by: Timothy Arceri --- src/mesa/drivers/dri/i965/brw_disk_cache.c | 8 1 file changed, 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c index 0671dd20f8..c77e921b6a 100644 --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c @@ -280,14 +280,6 @@ brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage) if (prog == NULL) return false; - /* FIXME: For now we don't read from the cache if transform feedback is -* enabled via the API. However the shader cache does support transform -* feedback when enabled via in shader xfb qualifiers. -*/ - if (prog->sh.LinkedTransformFeedback && - prog->sh.LinkedTransformFeedback->api_enabled) - return false; - if (brw->ctx._Shader->Flags & GLSL_CACHE_FALLBACK) goto fail; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Remove api_enabled tracking for transform feedback
Module: Mesa Branch: master Commit: 9b473f9e3cc6820a6d1441e046be5ece22e03d17 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b473f9e3cc6820a6d1441e046be5ece22e03d17 Author: Jordan Justen Date: Tue Mar 13 10:49:28 2018 -0700 glsl: Remove api_enabled tracking for transform feedback We used this to prevent usage of the disk shader cache when transform feedback was enabled via the GL API. This is no longer used. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105444 Signed-off-by: Jordan Justen Reviewed-by: Tapani Pälli Reviewed-by: Timothy Arceri --- src/compiler/glsl/link_varyings.cpp | 2 -- src/mesa/main/mtypes.h | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 0a484ce132..1fdfcb877d 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -1336,8 +1336,6 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, if (has_xfb_qualifiers) { qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls), cmp_xfb_offset); - } else { - xfb_prog->sh.LinkedTransformFeedback->api_enabled = true; } xfb_prog->sh.LinkedTransformFeedback->Varyings = diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 734fefc97f..5ee27d9977 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1861,9 +1861,6 @@ struct gl_transform_feedback_buffer /** Post-link transform feedback info. */ struct gl_transform_feedback_info { - /* Was xfb enabled via the api or in shader layout qualifiers */ - bool api_enabled; - unsigned NumOutputs; /* Bitmask of active buffer indices. */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Allow disk shader cache usage with LINKING_SUCCESS status
Module: Mesa Branch: master Commit: d2b74ca2b503e1b8c1e58ac1c33d3631e1b30d6e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d2b74ca2b503e1b8c1e58ac1c33d3631e1b30d6e Author: Jordan Justen Date: Tue Mar 13 12:14:23 2018 -0700 i965: Allow disk shader cache usage with LINKING_SUCCESS status Currently, we only look in the disk shader cache if we see that the shader program is in the cache during the link step. If the shader cache entry isn't found during the program link, there are still some (fairly unlikely) scenarios where later it might be useful to search the cache for gen binary programs. 1. If the cache evicts the serialized glsl cache, there might still be valid gen program entries in the disk cache. 2. If two applications are running in parallel, then it is possible that one may write out the cached gen program item which the other application can then make use of. Signed-off-by: Jordan Justen Reviewed-by: Tapani Pälli Reviewed-by: Timothy Arceri --- src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c index c77e921b6a..ee6067ca51 100644 --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c @@ -283,9 +283,6 @@ brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage) if (brw->ctx._Shader->Flags & GLSL_CACHE_FALLBACK) goto fail; - if (prog->sh.data->LinkStatus != LINKING_SKIPPED) - goto fail; - if (!read_and_upload(brw, cache, prog, stage)) goto fail; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl/shader_cache: Allow shader cache usage with transform feedback
Module: Mesa Branch: master Commit: 6d830940f78109db44293d41e74d9ec0a47da49b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d830940f78109db44293d41e74d9ec0a47da49b Author: Jordan Justen Date: Tue Mar 13 10:44:39 2018 -0700 glsl/shader_cache: Allow shader cache usage with transform feedback Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105444 Suggested-by: Timothy Arceri Signed-off-by: Jordan Justen Reviewed-by: Tapani Pälli Reviewed-by: Timothy Arceri --- src/compiler/glsl/linker.cpp | 11 +-- src/compiler/glsl/shader_cache.cpp | 6 ++ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 1444b68cb0..af09b7d03e 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -4773,16 +4773,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) } #ifdef ENABLE_SHADER_CACHE - /* If transform feedback used on the program then compile all shaders. */ - bool skip_cache = false; - if (prog->TransformFeedback.NumVarying > 0) { - for (unsigned i = 0; i < prog->NumShaders; i++) { - _mesa_glsl_compile_shader(ctx, prog->Shaders[i], false, false, true); - } - skip_cache = true; - } - - if (!skip_cache && shader_cache_read_program_metadata(ctx, prog)) + if (shader_cache_read_program_metadata(ctx, prog)) return; #endif diff --git a/src/compiler/glsl/shader_cache.cpp b/src/compiler/glsl/shader_cache.cpp index bf884af790..e43ed7aa67 100644 --- a/src/compiler/glsl/shader_cache.cpp +++ b/src/compiler/glsl/shader_cache.cpp @@ -160,6 +160,12 @@ shader_cache_read_program_metadata(struct gl_context *ctx, prog->FragDataBindings->iterate(create_binding_str, &buf); ralloc_strcat(&buf, "fbi: "); prog->FragDataIndexBindings->iterate(create_binding_str, &buf); + ralloc_asprintf_append(&buf, "tf: %d ", prog->TransformFeedback.BufferMode); + for (unsigned int i = 0; i < prog->TransformFeedback.NumVarying; i++) { + ralloc_asprintf_append(&buf, "%s:%d ", + prog->TransformFeedback.VaryingNames[i], + prog->TransformFeedback.BufferStride[i]); + } /* SSO has an effect on the linked program so include this when generating * the sha also. ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl/serialize: Save shader program metadata sha1
Module: Mesa Branch: master Commit: b5baaee0d6b06a2c021d1b2673a056ada733a2a9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b5baaee0d6b06a2c021d1b2673a056ada733a2a9 Author: Jordan Justen Date: Sat Mar 10 01:59:47 2018 -0800 glsl/serialize: Save shader program metadata sha1 When the shader cache is used, this can be generated. In fact, the shader cache uses this sha1 to lookup the serialized GL shader program. If a GL shader program is restored with ProgramBinary, the shaders are not available, and therefore the correct sha1 cannot be generated. If this is restored, then we can use the shader cache to restore the binary programs to the program that was loaded with ProgramBinary. Signed-off-by: Jordan Justen Reviewed-by: Timothy Arceri Reviewed-by: Tapani Pälli --- src/compiler/glsl/serialize.cpp | 4 1 file changed, 4 insertions(+) diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp index 9d2033bddf..1fdbaa990f 100644 --- a/src/compiler/glsl/serialize.cpp +++ b/src/compiler/glsl/serialize.cpp @@ -1163,6 +1163,8 @@ extern "C" void serialize_glsl_program(struct blob *blob, struct gl_context *ctx, struct gl_shader_program *prog) { + blob_write_bytes(blob, prog->data->sha1, sizeof(prog->data->sha1)); + write_uniforms(blob, prog); write_hash_tables(blob, prog); @@ -1219,6 +1221,8 @@ deserialize_glsl_program(struct blob_reader *blob, struct gl_context *ctx, assert(prog->data->UniformStorage == NULL); + blob_copy_bytes(blob, prog->data->sha1, sizeof(prog->data->sha1)); + read_uniforms(blob, prog); read_hash_tables(blob, prog); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): main/program_binary: In ProgramBinary set link status as LINKING_SKIPPED
Module: Mesa Branch: master Commit: 2ed288363fe8dced45f06b7cd66adbbf703a2012 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2ed288363fe8dced45f06b7cd66adbbf703a2012 Author: Jordan Justen Date: Sun Mar 11 01:18:55 2018 -0800 main/program_binary: In ProgramBinary set link status as LINKING_SKIPPED This change allows the disk shader cache to work with programs loaded with ProgramBinary. Drivers check for LINKING_SKIPPED, and if set, then they try to use the shader cache. Since the program loaded by ProgramBinary is similar to loading the shader from the disk cache, this is probably more appropriate. Signed-off-by: Jordan Justen Reviewed-by: Tapani Pälli Reviewed-by: Timothy Arceri --- src/mesa/main/program_binary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/program_binary.c b/src/mesa/main/program_binary.c index 3df7005934..021f6315e7 100644 --- a/src/mesa/main/program_binary.c +++ b/src/mesa/main/program_binary.c @@ -287,5 +287,5 @@ _mesa_program_binary(struct gl_context *ctx, struct gl_shader_program *sh_prog, return; } - sh_prog->data->LinkStatus = LINKING_SUCCESS; + sh_prog->data->LinkStatus = LINKING_SKIPPED; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): scons: need to split CC or things might fail
Module: Mesa Branch: master Commit: e10dc12f6f2f7513d96bbea87b93b8e338222188 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e10dc12f6f2f7513d96bbea87b93b8e338222188 Author: Jose Fonseca Date: Mon Mar 19 16:41:57 2018 +0100 scons: need to split CC or things might fail We've seen this fail internally. Reviewed-by: Roland Scheidegger --- scons/gallium.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scons/gallium.py b/scons/gallium.py index ef3b2ee81a..75200b89c1 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -134,7 +134,9 @@ def check_cc(env, cc, expr, cpp_opt = '-E'): source.write('#if !(%s)\n#error\n#endif\n' % expr) source.close() -pipe = SCons.Action._subproc(env, [env['CC'], cpp_opt, source.name], +# sys.stderr.write('%r %s %s\n' % (env['CC'], cpp_opt, source.name)); + +pipe = SCons.Action._subproc(env, env.Split(env['CC']) + [cpp_opt, source.name], stdin = 'devnull', stderr = 'devnull', stdout = 'devnull') ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (17.3): configure.ac: blacklist libdrm 2.4.90
Module: Mesa Branch: 17.3 Commit: 820e63b49e7283865db7bbef36639e0607009561 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=820e63b49e7283865db7bbef36639e0607009561 Author: Marek Olšák Date: Tue Mar 13 20:22:11 2018 -0400 configure.ac: blacklist libdrm 2.4.90 Cc: 18.0 17.3 17.2 Reviewed-by: Emil Velikov (cherry picked from commit 5d0acff39e4274ab803c17855115b231efcbef80) --- configure.ac | 8 1 file changed, 8 insertions(+) diff --git a/configure.ac b/configure.ac index 7283cab73b..dea094ce05 100644 --- a/configure.ac +++ b/configure.ac @@ -2494,6 +2494,14 @@ if test -n "$with_gallium_drivers"; then HAVE_GALLIUM_RADEONSI=yes PKG_CHECK_MODULES([RADEON], [libdrm >= $LIBDRM_RADEON_REQUIRED libdrm_radeon >= $LIBDRM_RADEON_REQUIRED]) PKG_CHECK_MODULES([AMDGPU], [libdrm >= $LIBDRM_AMDGPU_REQUIRED libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED]) + +# Blacklist libdrm_amdgpu 2.4.90 because it causes a crash in older +# radeonsi with pretty much any app. +libdrm_version=`pkg-config libdrm_amdgpu --modversion` +if test "x$libdrm_version" = x2.4.90; then +AC_MSG_ERROR([radeonsi can't use libdrm 2.4.90 due to a compatibility issue. Use a newer or older version.]) +fi + require_libdrm "radeonsi" radeon_llvm_check $LLVM_REQUIRED_RADEONSI "radeonsi" if test "x$enable_egl" = xyes; then ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (17.3): radv: Increase the number of dynamic uniform buffers.
Module: Mesa Branch: 17.3 Commit: c1563394f7b419170e3ec89d2f07e1fd812f9e53 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c1563394f7b419170e3ec89d2f07e1fd812f9e53 Author: Bas Nieuwenhuizen Date: Fri Mar 9 17:18:03 2018 +0100 radv: Increase the number of dynamic uniform buffers. The vulkan API is not ideal as it does not allow us have a shared limit. Feral needs 15+6 for one of their games, and I'm not a fan of overcommitting the limits, so increase the number of dynamic uniform buffers to 16. CC: CC: Alex Smith Reviewed-by: Dave Airlie (cherry picked from commit 997306c031327b7e034e617736c868d6d21919eb) --- src/amd/vulkan/radv_device.c | 4 ++-- src/amd/vulkan/radv_private.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index b43a802fd8..d957079b9d 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -622,9 +622,9 @@ void radv_GetPhysicalDeviceProperties( .maxPerStageResources = max_descriptor_set_size, .maxDescriptorSetSamplers = max_descriptor_set_size, .maxDescriptorSetUniformBuffers = max_descriptor_set_size, - .maxDescriptorSetUniformBuffersDynamic= MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetUniformBuffersDynamic= MAX_DYNAMIC_UNIFORM_BUFFERS, .maxDescriptorSetStorageBuffers = max_descriptor_set_size, - .maxDescriptorSetStorageBuffersDynamic= MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetStorageBuffersDynamic= MAX_DYNAMIC_STORAGE_BUFFERS, .maxDescriptorSetSampledImages= max_descriptor_set_size, .maxDescriptorSetStorageImages= max_descriptor_set_size, .maxDescriptorSetInputAttachments = max_descriptor_set_size, diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index f07ec28df6..525fd8dd75 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -83,7 +83,9 @@ typedef uint32_t xcb_window_t; #define MAX_SCISSORS16 #define MAX_PUSH_CONSTANTS_SIZE 128 #define MAX_PUSH_DESCRIPTORS 32 -#define MAX_DYNAMIC_BUFFERS 16 +#define MAX_DYNAMIC_UNIFORM_BUFFERS 16 +#define MAX_DYNAMIC_STORAGE_BUFFERS 8 +#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) #define MAX_SAMPLES_LOG2 4 #define NUM_META_FS_KEYS 13 #define RADV_MAX_DRM_DEVICES 8 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (17.3): r600: implement callstack workaround for evergreen.
Module: Mesa Branch: 17.3 Commit: 12015dbc71972b88cdeaae6e1ea052e5c3346863 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=12015dbc71972b88cdeaae6e1ea052e5c3346863 Author: Dave Airlie Date: Fri Mar 9 16:03:53 2018 +1000 r600: implement callstack workaround for evergreen. This is ported from the sb backend, there are some issues with evergreen stacks on the boundary between entries and ALU_PUSH_BEFORE instructions. Whenever we are going to use a push before, we check the stack usage and if we have to use the workaround, then we switch to a separate push. I noticed this problem dealing with some of the soft fp64 shaders, in nosb mode, they are quite stack happy. This fixes all the glitches and inconsistencies I've seen with them Reviewed-by: Roland Scheidegger Tested-by: Elie Tournier Cc: Signed-off-by: Dave Airlie (cherry picked from commit 5d4fbc2b54cb2aaea1cbb52ec087f31009f3ac76) [Juan A. Suarez: resolve trivial conflicts] Signed-off-by: Juan A. Suarez Romero Conflicts: src/gallium/drivers/r600/r600_shader.c --- src/gallium/drivers/r600/r600_shader.c | 39 +++--- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e2f2e94666..ebcb745d13 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -360,7 +360,7 @@ struct r600_shader_tgsi_instruction { static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason); static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); static int tgsi_else(struct r600_shader_ctx *ctx); static int tgsi_endif(struct r600_shader_ctx *ctx); @@ -376,6 +376,15 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, unsigned dst_reg); +static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx) +{ + if (ctx->bc->family == CHIP_HEMLOCK || + ctx->bc->family == CHIP_CYPRESS || + ctx->bc->family == CHIP_JUNIPER) + return false; + return true; +} + static int tgsi_last_instruction(unsigned writemask) { int i, lasti = 0; @@ -8338,7 +8347,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops) return 0; } -static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, +static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx, unsigned reason) { struct r600_stack_info *stack = &ctx->bc->stack; @@ -8355,7 +8364,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on * the stack must be reserved to hold the current active/continue * masks */ - if (reason == FC_PUSH_VPM) { + if (reason == FC_PUSH_VPM || stack->push > 0) { elements += 2; } break; @@ -8381,7 +8390,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, *NOTE: it seems we also need to reserve additional element in some *other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, *then STACK_SIZE should be 2 instead of 1 */ - if (reason == FC_PUSH_VPM) { + if (reason == FC_PUSH_VPM || stack->push > 0) { elements += 1; } break; @@ -8400,6 +8409,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, if (entries > stack->max_entries) stack->max_entries = entries; + return elements; } static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) @@ -8423,7 +8433,7 @@ static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) } } -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason) { switch (reason) { case FC_PUSH_VPM: @@ -8431,6 +8441,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) break; case FC_PUSH_WQM: ++ctx->bc->stack.push_wqm; + break; case FC_LOOP:
Mesa (17.3): r600: Take ALU_EXTENDED into account when evaluating jump offsets
Module: Mesa Branch: 17.3 Commit: 91325ea754fc1c23910b44e8efd04f6c3c76fecd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=91325ea754fc1c23910b44e8efd04f6c3c76fecd Author: Gert Wollny Date: Sat Feb 24 11:31:22 2018 +0100 r600: Take ALU_EXTENDED into account when evaluating jump offsets ALU_EXTENDED needs 4 DWORDS instead of the usual 2, hence if the last ALU clause within a IF-JUMP or ELSE branch is ALU_EXTENDED the target jump offset needs to be adjusted accordingly. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104654 Cc: Signed-off-by: Gert Wollny Reviewed-by: Dave Airlie (cherry picked from commit c7cadcbda47537d474eea52b9e77e57ef9287f9b) [Juan A. Suarez: resolve trivial conflicts] Signed-off-by: Juan A. Suarez Romero Conflicts: src/gallium/drivers/r600/r600_shader.c --- src/gallium/drivers/r600/r600_shader.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 1fd009f5db..e2f2e94666 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -8564,17 +8564,22 @@ static int tgsi_else(struct r600_shader_ctx *ctx) static int tgsi_endif(struct r600_shader_ctx *ctx) { + int offset = 2; pops(ctx, 1); if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_IF) { R600_ERR("if/endif unbalanced in shader\n"); return -1; } + /* ALU_EXTENDED needs 4 DWords instead of two, adjust jump target offset accordingly */ + if (ctx->bc->cf_last->eg_alu_extended) + offset += 2; + if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid == NULL) { - ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2; + ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + offset; ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->pop_count = 1; } else { - ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; + ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + offset; } fc_poplevel(ctx); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (17.3): Revert "mesa: do not trigger _NEW_TEXTURE_STATE in glActiveTexture()"
Module: Mesa Branch: 17.3 Commit: 929a4473d47f2987534e7f3d446aab577725fe88 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=929a4473d47f2987534e7f3d446aab577725fe88 Author: Samuel Pitoiset Date: Wed Mar 14 09:46:33 2018 +0100 Revert "mesa: do not trigger _NEW_TEXTURE_STATE in glActiveTexture()" This reverts commit f314a532fdc7af8381586144d2631d9968331f05. This appears to introduce some blinking textures in UT2004. Not sure exactly what's the root cause because we don't have much information about the issue. Anyway, this was just a micro optimization that actually breaks, at least, one app almost one year later. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105436 Cc: Signed-off-by: Samuel Pitoiset Reviewed-by: Timothy Arceri (cherry picked from commit f02f1ad13fa4123986d17a5d04b0e2831c3a7091) --- src/mesa/main/texstate.c | 13 + 1 file changed, 13 insertions(+) diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index 2146723d08..fa876fa80b 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -307,6 +307,19 @@ active_texture(GLenum texture, bool no_error) } } + + /* The below flush call seems useless because +* gl_context::Texture::CurrentUnit is not used by +* _mesa_update_texture_state() and friends. +* +* However removing the flush +* introduced some blinking textures in UT2004. More investigation is +* needed to find the root cause. +* +* https://bugs.freedesktop.org/show_bug.cgi?id=105436 +*/ + FLUSH_VERTICES(ctx, _NEW_TEXTURE_STATE); + ctx->Texture.CurrentUnit = texUnit; if (ctx->Transform.MatrixMode == GL_TEXTURE) { /* update current stack pointer */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (17.3): intel/vulkan: Hard code CS scratch_ids_per_subslice for Cherryview
Module: Mesa Branch: 17.3 Commit: 0eaa2fc8832ad490cf4710ddb57f67ffd9045837 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0eaa2fc8832ad490cf4710ddb57f67ffd9045837 Author: Jordan Justen Date: Tue Mar 6 23:28:00 2018 -0800 intel/vulkan: Hard code CS scratch_ids_per_subslice for Cherryview Ken suggested that we might be underallocating scratch space on HD 400. Allocating scratch space as though there was actually 8 EUs seems to help with a GPU hang seen on synmark CSDof. Cc: Signed-off-by: Jordan Justen Reviewed-by: Kenneth Graunke (cherry picked from commit 24b415270ffeef873ba4772d1b3c7c185c9b1958) --- src/intel/vulkan/anv_allocator.c | 45 +--- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 4698ff4b63..82726f18bb 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -1117,24 +1117,35 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, &device->instance->physicalDevice; const struct gen_device_info *devinfo = &physical_device->info; - /* WaCSScratchSize:hsw -* -* Haswell's scratch space address calculation appears to be sparse -* rather than tightly packed. The Thread ID has bits indicating which -* subslice, EU within a subslice, and thread within an EU it is. -* There's a maximum of two slices and two subslices, so these can be -* stored with a single bit. Even though there are only 10 EUs per -* subslice, this is stored in 4 bits, so there's an effective maximum -* value of 16 EUs. Similarly, although there are only 7 threads per EU, -* this is stored in a 3 bit number, giving an effective maximum value -* of 8 threads per EU. -* -* This means that we need to use 16 * 8 instead of 10 * 7 for the -* number of threads per subslice. -*/ const unsigned subslices = MAX2(physical_device->subslice_total, 1); - const unsigned scratch_ids_per_subslice = - device->info.is_haswell ? 16 * 8 : devinfo->max_cs_threads; + + unsigned scratch_ids_per_subslice; + if (devinfo->is_haswell) { + /* WaCSScratchSize:hsw + * + * Haswell's scratch space address calculation appears to be sparse + * rather than tightly packed. The Thread ID has bits indicating + * which subslice, EU within a subslice, and thread within an EU it + * is. There's a maximum of two slices and two subslices, so these + * can be stored with a single bit. Even though there are only 10 EUs + * per subslice, this is stored in 4 bits, so there's an effective + * maximum value of 16 EUs. Similarly, although there are only 7 + * threads per EU, this is stored in a 3 bit number, giving an + * effective maximum value of 8 threads per EU. + * + * This means that we need to use 16 * 8 instead of 10 * 7 for the + * number of threads per subslice. + */ + scratch_ids_per_subslice = 16 * 8; + } else if (devinfo->is_cherryview) { + /* Cherryview devices have either 6 or 8 EUs per subslice, and each EU + * has 7 threads. The 6 EU devices appear to calculate thread IDs as if + * it had 8 EUs. + */ + scratch_ids_per_subslice = 8 * 7; + } else { + scratch_ids_per_subslice = devinfo->max_cs_threads; + } uint32_t max_threads[] = { [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (17.3): radv: Fix CmdCopyImage between uncompressed and compressed images
Module: Mesa Branch: 17.3 Commit: 1d7c3ee7291d936c5fdc48aa703ab84cd5e80590 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1d7c3ee7291d936c5fdc48aa703ab84cd5e80590 Author: Alex Smith Date: Mon Mar 12 14:30:05 2018 + radv: Fix CmdCopyImage between uncompressed and compressed images From the spec: "When copying between compressed and uncompressed formats the extent members represent the texel dimensions of the source image and not the destination." However, as per 7b890a36, we must still use the destination image type when clamping the extent so that we copy the correct number of layers for 2D to 3D copies. Fixes: 7b890a36 "radv: Fix vkCmdCopyImage for 2d slices into 3d Images" Cc: Signed-off-by: Alex Smith Reviewed-by: Dave Airlie (cherry picked from commit fcf267ba087dd00c48ceaf9277424dac079f9319) --- src/amd/vulkan/radv_meta_copy.c | 23 +-- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c index ff8bd9dff9..b82ea4877a 100644 --- a/src/amd/vulkan/radv_meta_copy.c +++ b/src/amd/vulkan/radv_meta_copy.c @@ -37,10 +37,11 @@ meta_image_block_size(const struct radv_image *image) */ static struct VkExtent3D meta_region_extent_el(const struct radv_image *image, + const VkImageType imageType, const struct VkExtent3D *extent) { const VkExtent3D block = meta_image_block_size(image); - return radv_sanitize_image_extent(image->type, (VkExtent3D) { + return radv_sanitize_image_extent(imageType, (VkExtent3D) { .width = DIV_ROUND_UP(extent->width , block.width), .height = DIV_ROUND_UP(extent->height, block.height), .depth = DIV_ROUND_UP(extent->depth , block.depth), @@ -146,11 +147,11 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, pRegions[r].bufferImageHeight : pRegions[r].imageExtent.height, }; const VkExtent3D buf_extent_el = - meta_region_extent_el(image, &bufferExtent); + meta_region_extent_el(image, image->type, &bufferExtent); /* Start creating blit rect */ const VkExtent3D img_extent_el = - meta_region_extent_el(image, &pRegions[r].imageExtent); + meta_region_extent_el(image, image->type, &pRegions[r].imageExtent); struct radv_meta_blit2d_rect rect = { .width = img_extent_el.width, .height = img_extent_el.height, @@ -259,11 +260,11 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, pRegions[r].bufferImageHeight : pRegions[r].imageExtent.height, }; const VkExtent3D buf_extent_el = - meta_region_extent_el(image, &bufferExtent); + meta_region_extent_el(image, image->type, &bufferExtent); /* Start creating blit rect */ const VkExtent3D img_extent_el = - meta_region_extent_el(image, &pRegions[r].imageExtent); + meta_region_extent_el(image, image->type, &pRegions[r].imageExtent); struct radv_meta_blit2d_rect rect = { .width = img_extent_el.width, .height = img_extent_el.height, @@ -385,8 +386,18 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, meta_region_offset_el(dest_image, &pRegions[r].dstOffset); const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &pRegions[r].srcOffset); + + /* +* From Vulkan 1.0.68, "Copying Data Between Images": +*"When copying between compressed and uncompressed formats +* the extent members represent the texel dimensions of the +* source image and not the destination." +* However, we must use the destination image type to avoid +* clamping depth when copying multiple layers of a 2D image to +* a 3D image. +*/ const VkExtent3D img_extent_el = - meta_region_extent_el(dest_image, &pRegions[r].extent); + meta_region_extent_el(src_image, dest_image->type, &pRegions[r].extent); /* Start creating blit rect */ struct radv_meta_blit2d_rect rect = { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (17.3): radeonsi: align command buffer starting address to fix some Raven hangs
Module: Mesa Branch: 17.3 Commit: 4fa8c1f52532651134d1497a824ca9be30a0caf6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4fa8c1f52532651134d1497a824ca9be30a0caf6 Author: Marek Olšák Date: Tue Mar 6 19:07:58 2018 -0500 radeonsi: align command buffer starting address to fix some Raven hangs Cc: 17.3 18.0 Reviewed-by: Christian König Reviewed-by: Alex Deucher (cherry picked from commit 75c5d25f0f34cd70246ee1b0b77a75ec82dfcecb) [Juan A. Suarez: resolve trivial conflicts] Signed-off-by: Juan A. Suarez Romero Conflicts: src/amd/common/ac_gpu_info.c Squashed with: radeonsi: add a workaround for GFX9 hang with init_config alignment Fixes: 75c5d25f0f34cd702 "radeonsi: align command buffer starting address to fix some Raven hangs" Cc: 17.3 18.0 (cherry picked from commit 2bdb54bce77828ef20b730ad869b66c5889b5347) --- src/amd/common/ac_gpu_info.c | 21 - src/amd/common/ac_gpu_info.h | 1 + src/gallium/drivers/radeonsi/si_pm4.c | 6 -- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 5 +++-- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 1 + 5 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 2e56012550..40493a62c6 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -98,7 +98,9 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, { struct amdgpu_buffer_size_alignments alignment_info = {}; struct amdgpu_heap_info vram, vram_vis, gtt; - struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {}; + struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}; + struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {}; + struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {}; uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0; int r, i, j; drmDevicePtr devinfo; @@ -154,6 +156,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, return false; } + r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_GFX, 0, &gfx); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(gfx) failed.\n"); + return false; + } + r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_COMPUTE, 0, &compute); if (r) { fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n"); @@ -315,6 +323,17 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, if (info->chip_class == SI) info->gfx_ib_pad_with_type2 = TRUE; + unsigned ib_align = 0; + ib_align = MAX2(ib_align, gfx.ib_start_alignment); + ib_align = MAX2(ib_align, compute.ib_start_alignment); + ib_align = MAX2(ib_align, dma.ib_start_alignment); + ib_align = MAX2(ib_align, uvd.ib_start_alignment); + ib_align = MAX2(ib_align, uvd_enc.ib_start_alignment); + ib_align = MAX2(ib_align, vce.ib_start_alignment); + ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment); + ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment); + info->ib_start_alignment = ib_align; + return true; } diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 92c94f046b..c7e75fdd9b 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -61,6 +61,7 @@ struct radeon_info { boolhas_virtual_memory; boolgfx_ib_pad_with_type2; boolhas_hw_decode; + unsignedib_start_alignment; uint32_tnum_sdma_rings; uint32_tnum_compute_rings; uint32_tuvd_fw_version; diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 1ae1861a83..32f69d4cc1 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -169,9 +169,11 @@ void si_pm4_upload_indirect_buffer(struct si_context *sctx, assert(aligned_ndw <= SI_PM4_MAX_DW); r600_resource_reference(&state->indirect_buffer, NULL); + /* TODO: this hangs with 1024 or higher alignment on GFX9. */ state->indirect_buffer = (struct r600_resource*) - pipe_buffer_create(screen, 0, - PIPE_USAGE_DEFAULT, aligned_ndw * 4); + si_aligned_buffer_create(screen, 0, +PIPE_USAGE_DEFAULT, aligned_ndw * 4, +256); if (!state->indirect_buffer) return; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index fad8c6fd98..e2555813e1 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++
Mesa (master): i965: Add INTEL_DEBUG stages support for disk shader cache
Module: Mesa Branch: master Commit: d07a49fb1840bb441e600ce942cb0088e7ea15c7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d07a49fb1840bb441e600ce942cb0088e7ea15c7 Author: Jordan Justen Date: Fri Mar 16 16:44:22 2018 -0700 i965: Add INTEL_DEBUG stages support for disk shader cache Signed-off-by: Jordan Justen Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_disk_cache.c | 26 ++ 1 file changed, 26 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c index 41f742e858..0671dd20f8 100644 --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c @@ -31,6 +31,9 @@ #include "util/macros.h" #include "util/mesa-sha1.h" +#include "compiler/brw_eu.h" +#include "common/gen_debug.h" + #include "brw_context.h" #include "brw_program.h" #include "brw_cs.h" @@ -39,6 +42,16 @@ #include "brw_vs.h" #include "brw_wm.h" +static bool +debug_enabled_for_stage(gl_shader_stage stage) +{ + static const uint64_t stage_debug_flags[] = { + DEBUG_VS, DEBUG_TCS, DEBUG_TES, DEBUG_GS, DEBUG_WM, DEBUG_CS, + }; + assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_debug_flags)); + return (INTEL_DEBUG & stage_debug_flags[stage]) != 0; +} + static void gen_shader_sha1(struct brw_context *brw, struct gl_program *prog, gl_shader_stage stage, void *key, unsigned char *out_sha1) @@ -230,6 +243,19 @@ read_and_upload(struct brw_context *brw, struct disk_cache *cache, brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch); + if (unlikely(debug_enabled_for_stage(stage))) { + fprintf(stderr, "NIR for %s program %d loaded from disk shader cache:\n", + _mesa_shader_stage_to_abbrev(stage), brw_program(prog)->id); + brw_program_deserialize_nir(&brw->ctx, prog, stage); + nir_shader *nir = prog->nir; + nir_print_shader(nir, stderr); + fprintf(stderr, "Native code for %s %s shader %s from disk cache:\n", + nir->info.label ? nir->info.label : "unnamed", + _mesa_shader_stage_to_string(nir->info.stage), nir->info.name); + brw_disassemble(&brw->screen->devinfo, program, 0, + prog_data->program_size, stderr); + } + brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage), program, prog_data->program_size, prog_data, brw_prog_data_size(stage), &stage_state->prog_offset, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit