Re: [Mesa-dev] [PATCH 6/6] anv: Add support for the on-disk shader cache
Series: Reviewed-by: Timothy Arceri On 30/06/18 13:44, Jason Ekstrand wrote: --- src/intel/vulkan/anv_device.c | 36 ++ src/intel/vulkan/anv_pipeline_cache.c | 98 --- src/intel/vulkan/anv_private.h| 3 + 3 files changed, 126 insertions(+), 11 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index a864c702c3f..ca6e1c0cace 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -35,6 +35,7 @@ #include "util/strtod.h" #include "util/debug.h" #include "util/build_id.h" +#include "util/disk_cache.h" #include "util/mesa-sha1.h" #include "vk_util.h" #include "common/gen_defines.h" @@ -233,6 +234,8 @@ anv_physical_device_init_uuids(struct anv_physical_device *device) "build-id too short. It needs to be a SHA"); } + memcpy(device->driver_build_sha1, build_id_data(note), 20); + struct mesa_sha1 sha1_ctx; uint8_t sha1[20]; STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1)); @@ -271,6 +274,35 @@ anv_physical_device_init_uuids(struct anv_physical_device *device) return VK_SUCCESS; } +static void +anv_physical_device_init_disk_cache(struct anv_physical_device *device) +{ +#ifdef ENABLE_SHADER_CACHE + char renderer[9]; + MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "anv_%04x", + device->chipset_id); + assert(len == sizeof(renderer) - 1); + + char timestamp[41]; + _mesa_sha1_format(timestamp, device->driver_build_sha1); + + device->disk_cache = disk_cache_create(renderer, timestamp, 0); +#else + device->disk_cache = NULL; +#endif +} + +static void +anv_physical_device_free_disk_cache(struct anv_physical_device *device) +{ +#ifdef ENABLE_SHADER_CACHE + if (device->disk_cache) + disk_cache_destroy(device->disk_cache); +#else + assert(device->disk_cache == NULL); +#endif +} + static VkResult anv_physical_device_init(struct anv_physical_device *device, struct anv_instance *instance, @@ -442,6 +474,8 @@ anv_physical_device_init(struct anv_physical_device *device, if (result != VK_SUCCESS) goto fail; + anv_physical_device_init_disk_cache(device); + if (instance->enabled_extensions.KHR_display) { master_fd = open(primary_path, O_RDWR | O_CLOEXEC); if (master_fd >= 0) { @@ -459,6 +493,7 @@ anv_physical_device_init(struct anv_physical_device *device, result = anv_init_wsi(device); if (result != VK_SUCCESS) { ralloc_free(device->compiler); + anv_physical_device_free_disk_cache(device); goto fail; } @@ -481,6 +516,7 @@ static void anv_physical_device_finish(struct anv_physical_device *device) { anv_finish_wsi(device); + anv_physical_device_free_disk_cache(device); ralloc_free(device->compiler); close(device->local_fd); if (device->master_fd >= 0) diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index e57cd1c75c6..d4c7262dc05 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -24,6 +24,8 @@ #include "compiler/blob.h" #include "util/hash_table.h" #include "util/debug.h" +#include "util/disk_cache.h" +#include "util/mesa-sha1.h" #include "anv_private.h" struct anv_shader_bin * @@ -280,6 +282,25 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, return shader; } +static void +anv_pipeline_cache_add_shader_bin(struct anv_pipeline_cache *cache, + struct anv_shader_bin *bin) +{ + if (!cache->cache) + return; + + pthread_mutex_lock(>mutex); + + struct hash_entry *entry = _mesa_hash_table_search(cache->cache, bin->key); + if (entry == NULL) { + /* Take a reference for the cache */ + anv_shader_bin_ref(bin); + _mesa_hash_table_insert(cache->cache, bin->key, bin); + } + + pthread_mutex_unlock(>mutex); +} + static struct anv_shader_bin * anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, @@ -540,7 +561,38 @@ anv_device_search_for_kernel(struct anv_device *device, struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size) { - return cache ? anv_pipeline_cache_search(cache, key_data, key_size) : NULL; + struct anv_shader_bin *bin; + + if (cache) { + bin = anv_pipeline_cache_search(cache, key_data, key_size); + if (bin) + return bin; + } + +#ifdef ENABLE_SHADER_CACHE + struct disk_cache *disk_cache = device->instance->physicalDevice.disk_cache; + if (disk_cache) { + cache_key cache_key; + disk_cache_compute_key(disk_cache, key_data, key_size, cache_key); + + size_t buffer_size; + uint8_t *buffer = disk_cache_get(disk_cache,
Re: [Mesa-dev] [PATCH v2 6/9] nir: Add a large constants optimization pass
On Fri, Jun 29, 2018 at 9:56 PM, Timothy Arceri wrote: > On 30/06/18 10:13, Jason Ekstrand wrote: > >> This pass searches for reasonably large local variables which can be >> statically proven to be constant and moves them into shader constant >> data. This is especially useful when large tables are baked into the >> shader source code because they can be moved into a UBO by the driver to >> reduce register pressure and make indirect access cheaper. >> >> v2 (Jason Ekstrand): >> - Use a size/align function to ensure we get the right alignments >> - Use the newly added deref offset helpers >> --- >> src/compiler/Makefile.sources | 1 + >> src/compiler/nir/meson.build | 1 + >> src/compiler/nir/nir.h | 4 + >> src/compiler/nir/nir_opt_large_constants.c | 301 + >> 4 files changed, 307 insertions(+) >> create mode 100644 src/compiler/nir/nir_opt_large_constants.c >> >> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.source >> s >> index 0fcbc5c5c5b..9e3fbdc2612 100644 >> --- a/src/compiler/Makefile.sources >> +++ b/src/compiler/Makefile.sources >> @@ -276,6 +276,7 @@ NIR_FILES = \ >> nir/nir_opt_if.c \ >> nir/nir_opt_intrinsics.c \ >> nir/nir_opt_loop_unroll.c \ >> + nir/nir_opt_large_constants.c \ >> nir/nir_opt_move_comparisons.c \ >> nir/nir_opt_move_load_ubo.c \ >> nir/nir_opt_peephole_select.c \ >> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build >> index eb7fb7b121e..28aa8de7014 100644 >> --- a/src/compiler/nir/meson.build >> +++ b/src/compiler/nir/meson.build >> @@ -160,6 +160,7 @@ files_libnir = files( >> 'nir_opt_global_to_local.c', >> 'nir_opt_if.c', >> 'nir_opt_intrinsics.c', >> + 'nir_opt_large_constants.c', >> 'nir_opt_loop_unroll.c', >> 'nir_opt_move_comparisons.c', >> 'nir_opt_move_load_ubo.c', >> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h >> index cc5f88d6f54..92ab3a699cc 100644 >> --- a/src/compiler/nir/nir.h >> +++ b/src/compiler/nir/nir.h >> @@ -2905,6 +2905,10 @@ bool nir_opt_if(nir_shader *shader); >> bool nir_opt_intrinsics(nir_shader *shader); >> +bool nir_opt_large_constants(nir_shader *shader, >> + glsl_type_size_align_func size_align, >> + unsigned threshold); >> + >> bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode >> indirect_mask); >> bool nir_opt_move_comparisons(nir_shader *shader); >> diff --git a/src/compiler/nir/nir_opt_large_constants.c >> b/src/compiler/nir/nir_opt_large_constants.c >> new file mode 100644 >> index 000..027c6e8e5b5 >> --- /dev/null >> +++ b/src/compiler/nir/nir_opt_large_constants.c >> @@ -0,0 +1,301 @@ >> +/* >> + * Copyright © 2018 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining >> a >> + * copy of this software and associated documentation files (the >> "Software"), >> + * to deal in the Software without restriction, including without >> limitation >> + * the rights to use, copy, modify, merge, publish, distribute, >> sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the >> next >> + * paragraph) shall be included in all copies or substantial portions of >> the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT >> SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> ARISING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> DEALINGS >> + * IN THE SOFTWARE. >> + */ >> + >> +#include "nir.h" >> +#include "nir_builder.h" >> +#include "nir_deref.h" >> + >> +struct var_info { >> + bool is_constant; >> + bool found_read; >> +}; >> + >> +static nir_ssa_def * >> +build_constant_load(nir_builder *b, nir_deref_instr *deref, >> +glsl_type_size_align_func size_align) >> +{ >> + nir_variable *var = nir_deref_instr_get_variable(deref); >> + >> + const unsigned bit_size = glsl_get_bit_size(deref->type); >> + const unsigned num_components = glsl_get_vector_elements(deref >> ->type); >> + >> + UNUSED unsigned var_size, var_align; >> + size_align(var->type, _size, _align); >> + assert(var->data.location % var_align == 0); >> + >> + nir_intrinsic_instr *load = >> + nir_intrinsic_instr_create(b->shader, >> nir_intrinsic_load_constant); >> + load->num_components = num_components; >> + nir_intrinsic_set_base(load,
Re: [Mesa-dev] [PATCH] glsl/cache: save and restore ExternalSamplersUsed
Reviewed-by: Timothy Arceri On 30/06/18 14:59, Marek Olšák wrote: From: Marek Olšák Shaders that need special code for external samplers were broken if they were loaded from the cache. Cc: 18.1 --- src/compiler/glsl/serialize.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp index 9c21453f91e..889038fb5e2 100644 --- a/src/compiler/glsl/serialize.cpp +++ b/src/compiler/glsl/serialize.cpp @@ -1037,20 +1037,21 @@ write_shader_metadata(struct blob *metadata, gl_linked_shader *shader) blob_write_bytes(metadata, glprog->TexturesUsed, sizeof(glprog->TexturesUsed)); blob_write_uint64(metadata, glprog->SamplersUsed); blob_write_bytes(metadata, glprog->SamplerUnits, sizeof(glprog->SamplerUnits)); blob_write_bytes(metadata, glprog->sh.SamplerTargets, sizeof(glprog->sh.SamplerTargets)); blob_write_uint32(metadata, glprog->ShadowSamplers); + blob_write_uint32(metadata, glprog->ExternalSamplersUsed); blob_write_bytes(metadata, glprog->sh.ImageAccess, sizeof(glprog->sh.ImageAccess)); blob_write_bytes(metadata, glprog->sh.ImageUnits, sizeof(glprog->sh.ImageUnits)); size_t ptr_size = sizeof(GLvoid *); blob_write_uint32(metadata, glprog->sh.NumBindlessSamplers); blob_write_uint32(metadata, glprog->sh.HasBoundBindlessSampler); @@ -1089,20 +1090,21 @@ read_shader_metadata(struct blob_reader *metadata, blob_copy_bytes(metadata, (uint8_t *) glprog->TexturesUsed, sizeof(glprog->TexturesUsed)); glprog->SamplersUsed = blob_read_uint64(metadata); blob_copy_bytes(metadata, (uint8_t *) glprog->SamplerUnits, sizeof(glprog->SamplerUnits)); blob_copy_bytes(metadata, (uint8_t *) glprog->sh.SamplerTargets, sizeof(glprog->sh.SamplerTargets)); glprog->ShadowSamplers = blob_read_uint32(metadata); + glprog->ExternalSamplersUsed = blob_read_uint32(metadata); blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageAccess, sizeof(glprog->sh.ImageAccess)); blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageUnits, sizeof(glprog->sh.ImageUnits)); size_t ptr_size = sizeof(GLvoid *); glprog->sh.NumBindlessSamplers = blob_read_uint32(metadata); glprog->sh.HasBoundBindlessSampler = blob_read_uint32(metadata); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] glsl/cache: save and restore ExternalSamplersUsed
From: Marek Olšák Shaders that need special code for external samplers were broken if they were loaded from the cache. Cc: 18.1 --- src/compiler/glsl/serialize.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp index 9c21453f91e..889038fb5e2 100644 --- a/src/compiler/glsl/serialize.cpp +++ b/src/compiler/glsl/serialize.cpp @@ -1037,20 +1037,21 @@ write_shader_metadata(struct blob *metadata, gl_linked_shader *shader) blob_write_bytes(metadata, glprog->TexturesUsed, sizeof(glprog->TexturesUsed)); blob_write_uint64(metadata, glprog->SamplersUsed); blob_write_bytes(metadata, glprog->SamplerUnits, sizeof(glprog->SamplerUnits)); blob_write_bytes(metadata, glprog->sh.SamplerTargets, sizeof(glprog->sh.SamplerTargets)); blob_write_uint32(metadata, glprog->ShadowSamplers); + blob_write_uint32(metadata, glprog->ExternalSamplersUsed); blob_write_bytes(metadata, glprog->sh.ImageAccess, sizeof(glprog->sh.ImageAccess)); blob_write_bytes(metadata, glprog->sh.ImageUnits, sizeof(glprog->sh.ImageUnits)); size_t ptr_size = sizeof(GLvoid *); blob_write_uint32(metadata, glprog->sh.NumBindlessSamplers); blob_write_uint32(metadata, glprog->sh.HasBoundBindlessSampler); @@ -1089,20 +1090,21 @@ read_shader_metadata(struct blob_reader *metadata, blob_copy_bytes(metadata, (uint8_t *) glprog->TexturesUsed, sizeof(glprog->TexturesUsed)); glprog->SamplersUsed = blob_read_uint64(metadata); blob_copy_bytes(metadata, (uint8_t *) glprog->SamplerUnits, sizeof(glprog->SamplerUnits)); blob_copy_bytes(metadata, (uint8_t *) glprog->sh.SamplerTargets, sizeof(glprog->sh.SamplerTargets)); glprog->ShadowSamplers = blob_read_uint32(metadata); + glprog->ExternalSamplersUsed = blob_read_uint32(metadata); blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageAccess, sizeof(glprog->sh.ImageAccess)); blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageUnits, sizeof(glprog->sh.ImageUnits)); size_t ptr_size = sizeof(GLvoid *); glprog->sh.NumBindlessSamplers = blob_read_uint32(metadata); glprog->sh.HasBoundBindlessSampler = blob_read_uint32(metadata); -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 6/9] nir: Add a large constants optimization pass
On 30/06/18 10:13, Jason Ekstrand wrote: This pass searches for reasonably large local variables which can be statically proven to be constant and moves them into shader constant data. This is especially useful when large tables are baked into the shader source code because they can be moved into a UBO by the driver to reduce register pressure and make indirect access cheaper. v2 (Jason Ekstrand): - Use a size/align function to ensure we get the right alignments - Use the newly added deref offset helpers --- src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 4 + src/compiler/nir/nir_opt_large_constants.c | 301 + 4 files changed, 307 insertions(+) create mode 100644 src/compiler/nir/nir_opt_large_constants.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 0fcbc5c5c5b..9e3fbdc2612 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -276,6 +276,7 @@ NIR_FILES = \ nir/nir_opt_if.c \ nir/nir_opt_intrinsics.c \ nir/nir_opt_loop_unroll.c \ + nir/nir_opt_large_constants.c \ nir/nir_opt_move_comparisons.c \ nir/nir_opt_move_load_ubo.c \ nir/nir_opt_peephole_select.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index eb7fb7b121e..28aa8de7014 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -160,6 +160,7 @@ files_libnir = files( 'nir_opt_global_to_local.c', 'nir_opt_if.c', 'nir_opt_intrinsics.c', + 'nir_opt_large_constants.c', 'nir_opt_loop_unroll.c', 'nir_opt_move_comparisons.c', 'nir_opt_move_load_ubo.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index cc5f88d6f54..92ab3a699cc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2905,6 +2905,10 @@ bool nir_opt_if(nir_shader *shader); bool nir_opt_intrinsics(nir_shader *shader); +bool nir_opt_large_constants(nir_shader *shader, + glsl_type_size_align_func size_align, + unsigned threshold); + bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); bool nir_opt_move_comparisons(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_large_constants.c b/src/compiler/nir/nir_opt_large_constants.c new file mode 100644 index 000..027c6e8e5b5 --- /dev/null +++ b/src/compiler/nir/nir_opt_large_constants.c @@ -0,0 +1,301 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_deref.h" + +struct var_info { + bool is_constant; + bool found_read; +}; + +static nir_ssa_def * +build_constant_load(nir_builder *b, nir_deref_instr *deref, +glsl_type_size_align_func size_align) +{ + nir_variable *var = nir_deref_instr_get_variable(deref); + + const unsigned bit_size = glsl_get_bit_size(deref->type); + const unsigned num_components = glsl_get_vector_elements(deref->type); + + UNUSED unsigned var_size, var_align; + size_align(var->type, _size, _align); + assert(var->data.location % var_align == 0); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_constant); + load->num_components = num_components; + nir_intrinsic_set_base(load, var->data.location); + nir_intrinsic_set_range(load, var_size); + load->src[0] = nir_src_for_ssa(nir_build_deref_offset(b, deref, size_align)); + nir_ssa_dest_init(>instr, >dest, + num_components, bit_size, NULL); + nir_builder_instr_insert(b, >instr); + + return >dest.ssa; +} + +static void +handle_constant_store(nir_builder *b, nir_intrinsic_instr *store, + glsl_type_size_align_func
[Mesa-dev] [PATCH] ac: move all LLVM module initialization into ac_create_module
From: Marek Olšák This removes some ugly code around module initialization. --- Dave, please rebase your code on top of this, and we don't need triple and data_layout in ac_llvm_compiler_info. src/amd/common/ac_llvm_helper.cpp | 10 ++ src/amd/common/ac_llvm_util.h | 1 + src/amd/vulkan/radv_nir_to_llvm.c | 12 ++-- src/gallium/drivers/radeonsi/si_pipe.c | 14 +++--- src/gallium/drivers/radeonsi/si_shader.h | 2 -- .../drivers/radeonsi/si_shader_tgsi_setup.c| 5 + 6 files changed, 17 insertions(+), 27 deletions(-) diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp index 1a2aee3bc9a..495bd98da9c 100644 --- a/src/amd/common/ac_llvm_helper.cpp +++ b/src/amd/common/ac_llvm_helper.cpp @@ -54,20 +54,30 @@ bool ac_is_sgpr_param(LLVMValueRef arg) LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call) { return LLVMGetCalledValue(call); } bool ac_llvm_is_function(LLVMValueRef v) { return LLVMGetValueKind(v) == LLVMFunctionValueKind; } +LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx) +{ + llvm::TargetMachine *TM = reinterpret_cast(tm); + LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx); + + llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple()); + llvm::unwrap(module)->setDataLayout(TM->createDataLayout()); + return module; +} + LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode) { LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx); llvm::FastMathFlags flags; switch (float_mode) { case AC_FLOAT_MODE_DEFAULT: break; diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 0aa803c5bc1..6e6d15bb56c 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -76,20 +76,21 @@ LLVMTargetRef ac_get_llvm_target(const char *triple); void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); bool ac_is_sgpr_param(LLVMValueRef param); void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, enum ac_func_attr attr); void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask); void ac_dump_module(LLVMModuleRef module); LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call); bool ac_llvm_is_function(LLVMValueRef v); +LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx); LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode); void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value); static inline unsigned ac_get_load_intr_attribs(bool can_speculate) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index cd8d86603bc..ce6d5e1547d 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3158,28 +3158,21 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, const struct radv_nir_compiler_options *options) { struct radv_shader_context ctx = {0}; unsigned i; ctx.options = options; ctx.shader_info = shader_info; ctx.context = LLVMContextCreate(); ac_llvm_context_init(, ctx.context, options->chip_class, options->family); - ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); - LLVMSetTarget(ctx.ac.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); - - LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); - char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); - LLVMSetDataLayout(ctx.ac.module, data_layout_str); - LLVMDisposeTargetData(data_layout); - LLVMDisposeMessage(data_layout_str); + ctx.ac.module = ac_create_module(tm, ctx.context); enum ac_float_mode float_mode = options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : AC_FLOAT_MODE_DEFAULT; ctx.ac.builder = ac_create_builder(ctx.context, float_mode); memset(shader_info, 0, sizeof(*shader_info)); for(int i = 0; i < shader_count; ++i) @@ -3606,24 +3599,23 @@ radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, struct radv_shader_variant_info *shader_info, const struct radv_nir_compiler_options *options) { struct radv_shader_context ctx = {0}; ctx.context = LLVMContextCreate(); ctx.options = options; ctx.shader_info = shader_info; ac_llvm_context_init(,
Re: [Mesa-dev] [PATCH 11/11] ac/radv: using tls to store llvm related info and speed up compiles (v3)
I wonder if we can somehow make the TLS magic apply to RADV only. Radeonsi can do it without TLS. Then, the RADV-specific TLS code can be moved to RADV, and other code (if any) can be shared. It doesn't make much sense to do the TLS initialization in ac_llvm_compiler_init. It could be done in compile_to_memory_buffer and the fail path there could be removed. The call to LLVMTargetMachineEmitToMemoryBuffer can be removed. There is no use for it. Things like if(radv) and if(radeonsi) don't look good in the common code even if you don't write it exactly like that. Marek On Tue, Jun 26, 2018 at 11:58 PM, Dave Airlie wrote: > From: Dave Airlie > > I'd like to encourage people to test this to see if it helps (like > does it make app startup better or less hitching in dxvk). > > The basic idea is to store a bunch of LLVM related data structs > in thread local storage so we can avoid reiniting them every time > we compile a shader. Since we know llvm objects aren't thread safe > it has to be stored using TLS to avoid any collisions. > > This should remove all the fixed overheads setup costs of creating > the pass manager each time. > > This takes a demo app time to compile the radv meta shaders on nocache > and exit from 1.7s to 1s. > > TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS > works if you have radeonsi and radv loaded at the same time, if > they'll magically try and use the same tls stuff, in which case > this might explode all over the place. > > v2: fix llvm6 build, inline emit function, handle multiple targets > in one thread > v3: rebase and port onto new structure > --- > src/amd/common/ac_llvm_helper.cpp | 120 -- > src/amd/common/ac_llvm_util.c | 10 +-- > src/amd/common/ac_llvm_util.h | 9 +++ > src/amd/vulkan/radv_debug.h | 1 + > src/amd/vulkan/radv_device.c | 1 + > src/amd/vulkan/radv_shader.c | 2 + > 6 files changed, 132 insertions(+), 11 deletions(-) > > diff --git a/src/amd/common/ac_llvm_helper.cpp > b/src/amd/common/ac_llvm_helper.cpp > index 27403dbe085..f1f1399b3fb 100644 > --- a/src/amd/common/ac_llvm_helper.cpp > +++ b/src/amd/common/ac_llvm_helper.cpp > @@ -31,12 +31,21 @@ > > #include "ac_llvm_util.h" > #include > -#include > -#include > -#include > -#include > +#include > #include > #include > +#include > + > +#include > +#include > +#if HAVE_LLVM >= 0x0700 > +#include > +#endif > + > +#if HAVE_LLVM < 0x0700 > +#include "llvm/Support/raw_ostream.h" > +#endif > +#include > > void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) > { > @@ -101,11 +110,110 @@ > ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info) > delete reinterpret_cast(library_info); > } > > +class ac_llvm_per_thread_info { > +public: > + ac_llvm_per_thread_info(enum radeon_family arg_family, > + enum ac_target_machine_options arg_tm_options) > + : family(arg_family), tm_options(arg_tm_options), > + OStream(CodeString) {} > + ~ac_llvm_per_thread_info() { > + ac_llvm_compiler_dispose_internal(_info); > + } > + > + struct ac_llvm_compiler_info llvm_info; > + enum radeon_family family; > + enum ac_target_machine_options tm_options; > + llvm::SmallString<0> CodeString; > + llvm::raw_svector_ostream OStream; > + llvm::legacy::PassManager pass; > +}; > + > +/* we have to store a linked list per thread due to the possiblity of > multiple gpus being required */ > +static thread_local std::list > ac_llvm_per_thread_list; > + > bool ac_compile_to_memory_buffer(struct ac_llvm_compiler_info *info, > LLVMModuleRef M, > char **ErrorMessage, > LLVMMemoryBufferRef *OutMemBuf) > { > - return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, > LLVMObjectFile, > - ErrorMessage, OutMemBuf); > + ac_llvm_per_thread_info *thread_info = nullptr; > + if (info->thread_stored) { > + for (auto : ac_llvm_per_thread_list) { > + if (I.llvm_info.tm == info->tm) { > + thread_info = > + break; > + } > + } > + > + if (!thread_info) { > + assert(0); > + return false; > + } > + } else { > + return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, > LLVMObjectFile, > + ErrorMessage, > OutMemBuf); > + } > + > + llvm::TargetMachine *TM = > reinterpret_cast(thread_info->llvm_info.tm); > + llvm::Module *Mod = llvm::unwrap(M); > + llvm::StringRef Data; > + > + Mod->setDataLayout(TM->createDataLayout()); > + >
Re: [Mesa-dev] [PATCH 09/11] radeonsi: port to shared ac llvm per-thread structs.
On Tue, Jun 26, 2018 at 11:58 PM, Dave Airlie wrote: > From: Dave Airlie > > The new structs are pretty much based on the radeonsi code, > so it just ports over the uses to them. > --- > src/gallium/drivers/radeonsi/si_compute.c | 2 +- > src/gallium/drivers/radeonsi/si_pipe.c| 50 --- > src/gallium/drivers/radeonsi/si_pipe.h| 6 +-- > src/gallium/drivers/radeonsi/si_shader.c | 24 - > src/gallium/drivers/radeonsi/si_shader.h | 18 ++- > .../drivers/radeonsi/si_shader_internal.h | 6 +-- > .../drivers/radeonsi/si_shader_tgsi_setup.c | 9 ++-- > .../drivers/radeonsi/si_state_shaders.c | 4 +- > 8 files changed, 40 insertions(+), 79 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_compute.c > b/src/gallium/drivers/radeonsi/si_compute.c > index cb320323db3..e8ad6ce0e16 100644 > --- a/src/gallium/drivers/radeonsi/si_compute.c > +++ b/src/gallium/drivers/radeonsi/si_compute.c > @@ -86,7 +86,7 @@ static void si_create_compute_state_async(void *job, int > thread_index) > struct si_compute *program = (struct si_compute *)job; > struct si_shader *shader = >shader; > struct si_shader_selector sel; > - struct si_compiler *compiler; > + struct ac_llvm_compiler_info *compiler; > struct pipe_debug_callback *debug = > >compiler_ctx_state.debug; > > assert(!debug->debug_message || debug->async); > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index aca61670765..e4906a479c9 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -105,51 +105,21 @@ static const struct debug_named_value debug_options[] = > { > }; > > static void si_init_compiler(struct si_screen *sscreen, > -struct si_compiler *compiler) > +struct ac_llvm_compiler_info *compiler) > { > enum ac_target_machine_options tm_options = > (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | > (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK > : 0) | > (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK > : 0) | > - (!sscreen->llvm_has_working_vgpr_indexing ? > AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0); > + (!sscreen->llvm_has_working_vgpr_indexing ? > AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) | > + (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0); > > ac_init_llvm_once(); > - compiler->tm = ac_create_target_machine(sscreen->info.family, > - tm_options, > >triple); > - if (!compiler->tm) > - return; > - > - compiler->target_library_info = > - gallivm_create_target_library_info(compiler->triple); > - if (!compiler->target_library_info) > - return; > - > - compiler->passmgr = ac_init_passmgr(compiler->target_library_info, > - (sscreen->debug_flags & > DBG(CHECK_IR))); > - if (!compiler->passmgr) > - return; > > - /* Get the data layout. */ > - LLVMTargetDataRef data_layout = > LLVMCreateTargetDataLayout(compiler->tm); > - if (!data_layout) > - return; > - compiler->data_layout = LLVMCopyStringRepOfTargetData(data_layout); > - LLVMDisposeTargetData(data_layout); > -} > - > -static void si_destroy_compiler(struct si_compiler *compiler) > -{ > - if (compiler->data_layout) > - LLVMDisposeMessage((char*)compiler->data_layout); > - if (compiler->passmgr) > - LLVMDisposePassManager(compiler->passmgr); > -#if HAVE_LLVM >= 0x0700 > - /* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it > there. */ > - if (compiler->target_library_info) > - > gallivm_dispose_target_library_info(compiler->target_library_info); > -#endif > - if (compiler->tm) > - LLVMDisposeTargetMachine(compiler->tm); > + ac_llvm_compiler_init(compiler, > + true, > + sscreen->info.family, > + tm_options); > } > > /* > @@ -250,7 +220,7 @@ static void si_destroy_context(struct pipe_context > *context) > sctx->ws->fence_reference(>last_sdma_fence, NULL); > r600_resource_reference(>eop_bug_scratch, NULL); > > - si_destroy_compiler(>compiler); > + ac_llvm_compiler_dispose(>compiler); > > si_saved_cs_reference(>current_saved_cs, NULL); > > @@ -659,10 +629,10 @@ static void si_destroy_screen(struct pipe_screen* > pscreen) > util_queue_destroy(>shader_compiler_queue_low_priority); > > for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++) > - si_destroy_compiler(>compiler[i]); > +
[Mesa-dev] [PATCH 0/6] anv: Add support for an on-disk transparent pipeline
I've been resisting this for a long time and I'm still a bit grumpy about it but I think Pierre-Loup has convinced me that it really is needed for steam shader caching. This series gives apps that don't use a pipeline cache a default in-memory cache as well as backing them with the disk cache in case they don't save their pipeline cache off. Fortunately, it's pretty straightforward and non-invasive to add disk cache support. Goodby explicit pipeline caching, you will be missed... Jason Ekstrand (6): anv: Be more careful about hashing pipeline layouts anv: Use a default pipeline cache if none is specified anv/pipeline: Stop optimizing for not having a cache anv: Add device-level helpers for searching for and uploading kernels anv/pipeline_cache: Add a _locked suffix to a function anv: Add support for the on-disk shader cache src/intel/vulkan/anv_blorp.c | 12 +- src/intel/vulkan/anv_descriptor_set.c | 41 +- src/intel/vulkan/anv_device.c | 43 ++ src/intel/vulkan/anv_pipeline.c | 184 +++--- src/intel/vulkan/anv_pipeline_cache.c | 151 + src/intel/vulkan/anv_private.h| 23 +++- src/intel/vulkan/genX_pipeline.c | 8 ++ 7 files changed, 318 insertions(+), 144 deletions(-) -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] anv: Add support for the on-disk shader cache
--- src/intel/vulkan/anv_device.c | 36 ++ src/intel/vulkan/anv_pipeline_cache.c | 98 --- src/intel/vulkan/anv_private.h| 3 + 3 files changed, 126 insertions(+), 11 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index a864c702c3f..ca6e1c0cace 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -35,6 +35,7 @@ #include "util/strtod.h" #include "util/debug.h" #include "util/build_id.h" +#include "util/disk_cache.h" #include "util/mesa-sha1.h" #include "vk_util.h" #include "common/gen_defines.h" @@ -233,6 +234,8 @@ anv_physical_device_init_uuids(struct anv_physical_device *device) "build-id too short. It needs to be a SHA"); } + memcpy(device->driver_build_sha1, build_id_data(note), 20); + struct mesa_sha1 sha1_ctx; uint8_t sha1[20]; STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1)); @@ -271,6 +274,35 @@ anv_physical_device_init_uuids(struct anv_physical_device *device) return VK_SUCCESS; } +static void +anv_physical_device_init_disk_cache(struct anv_physical_device *device) +{ +#ifdef ENABLE_SHADER_CACHE + char renderer[9]; + MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "anv_%04x", + device->chipset_id); + assert(len == sizeof(renderer) - 1); + + char timestamp[41]; + _mesa_sha1_format(timestamp, device->driver_build_sha1); + + device->disk_cache = disk_cache_create(renderer, timestamp, 0); +#else + device->disk_cache = NULL; +#endif +} + +static void +anv_physical_device_free_disk_cache(struct anv_physical_device *device) +{ +#ifdef ENABLE_SHADER_CACHE + if (device->disk_cache) + disk_cache_destroy(device->disk_cache); +#else + assert(device->disk_cache == NULL); +#endif +} + static VkResult anv_physical_device_init(struct anv_physical_device *device, struct anv_instance *instance, @@ -442,6 +474,8 @@ anv_physical_device_init(struct anv_physical_device *device, if (result != VK_SUCCESS) goto fail; + anv_physical_device_init_disk_cache(device); + if (instance->enabled_extensions.KHR_display) { master_fd = open(primary_path, O_RDWR | O_CLOEXEC); if (master_fd >= 0) { @@ -459,6 +493,7 @@ anv_physical_device_init(struct anv_physical_device *device, result = anv_init_wsi(device); if (result != VK_SUCCESS) { ralloc_free(device->compiler); + anv_physical_device_free_disk_cache(device); goto fail; } @@ -481,6 +516,7 @@ static void anv_physical_device_finish(struct anv_physical_device *device) { anv_finish_wsi(device); + anv_physical_device_free_disk_cache(device); ralloc_free(device->compiler); close(device->local_fd); if (device->master_fd >= 0) diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index e57cd1c75c6..d4c7262dc05 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -24,6 +24,8 @@ #include "compiler/blob.h" #include "util/hash_table.h" #include "util/debug.h" +#include "util/disk_cache.h" +#include "util/mesa-sha1.h" #include "anv_private.h" struct anv_shader_bin * @@ -280,6 +282,25 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, return shader; } +static void +anv_pipeline_cache_add_shader_bin(struct anv_pipeline_cache *cache, + struct anv_shader_bin *bin) +{ + if (!cache->cache) + return; + + pthread_mutex_lock(>mutex); + + struct hash_entry *entry = _mesa_hash_table_search(cache->cache, bin->key); + if (entry == NULL) { + /* Take a reference for the cache */ + anv_shader_bin_ref(bin); + _mesa_hash_table_insert(cache->cache, bin->key, bin); + } + + pthread_mutex_unlock(>mutex); +} + static struct anv_shader_bin * anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, @@ -540,7 +561,38 @@ anv_device_search_for_kernel(struct anv_device *device, struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size) { - return cache ? anv_pipeline_cache_search(cache, key_data, key_size) : NULL; + struct anv_shader_bin *bin; + + if (cache) { + bin = anv_pipeline_cache_search(cache, key_data, key_size); + if (bin) + return bin; + } + +#ifdef ENABLE_SHADER_CACHE + struct disk_cache *disk_cache = device->instance->physicalDevice.disk_cache; + if (disk_cache) { + cache_key cache_key; + disk_cache_compute_key(disk_cache, key_data, key_size, cache_key); + + size_t buffer_size; + uint8_t *buffer = disk_cache_get(disk_cache, cache_key, _size); + if (buffer) { + struct blob_reader blob; + blob_reader_init(, buffer, buffer_size); + bin =
[Mesa-dev] [PATCH 2/6] anv: Use a default pipeline cache if none is specified
If a client is dumb enough to not specify a pipeline cache, give it a default. We have to create one anyway for blorp so we may as well let the client cache shaders in it. --- src/intel/vulkan/anv_blorp.c | 12 +--- src/intel/vulkan/anv_device.c | 7 +++ src/intel/vulkan/anv_pipeline_cache.c | 12 ++-- src/intel/vulkan/anv_private.h| 4 +++- src/intel/vulkan/genX_pipeline.c | 8 5 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 4dbfb7a83fd..8e6d7db6e40 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -30,11 +30,11 @@ lookup_blorp_shader(struct blorp_context *blorp, { struct anv_device *device = blorp->driver_ctx; - /* The blorp cache must be a real cache */ - assert(device->blorp_shader_cache.cache); + /* The default cache must be a real cache */ + assert(device->default_pipeline_cache.cache); struct anv_shader_bin *bin = - anv_pipeline_cache_search(>blorp_shader_cache, key, key_size); + anv_pipeline_cache_search(>default_pipeline_cache, key, key_size); if (!bin) return false; @@ -60,7 +60,7 @@ upload_blorp_shader(struct blorp_context *blorp, struct anv_device *device = blorp->driver_ctx; /* The blorp cache must be a real cache */ - assert(device->blorp_shader_cache.cache); + assert(device->default_pipeline_cache.cache); struct anv_pipeline_bind_map bind_map = { .surface_count = 0, @@ -68,7 +68,7 @@ upload_blorp_shader(struct blorp_context *blorp, }; struct anv_shader_bin *bin = - anv_pipeline_cache_upload_kernel(>blorp_shader_cache, + anv_pipeline_cache_upload_kernel(>default_pipeline_cache, key, key_size, kernel, kernel_size, NULL, 0, prog_data, prog_data_size, _map); @@ -90,7 +90,6 @@ upload_blorp_shader(struct blorp_context *blorp, void anv_device_init_blorp(struct anv_device *device) { - anv_pipeline_cache_init(>blorp_shader_cache, device, true); blorp_init(>blorp, device, >isl_dev); device->blorp.compiler = device->instance->physicalDevice.compiler; device->blorp.lookup_shader = lookup_blorp_shader; @@ -124,7 +123,6 @@ void anv_device_finish_blorp(struct anv_device *device) { blorp_finish(>blorp); - anv_pipeline_cache_finish(>blorp_shader_cache); } static void diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 077f5c16e46..a864c702c3f 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -604,6 +604,9 @@ VkResult anv_CreateInstance( return vk_error(result); } + instance->pipeline_cache_enabled = + env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); + _mesa_locale_init(); VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); @@ -1728,6 +1731,8 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_workaround_bo; + anv_pipeline_cache_init(>default_pipeline_cache, device, true); + anv_device_init_blorp(device); anv_device_init_border_colors(device); @@ -1778,6 +1783,8 @@ void anv_DestroyDevice( anv_device_finish_blorp(device); + anv_pipeline_cache_finish(>default_pipeline_cache); + anv_queue_finish(>queue); #ifdef HAVE_VALGRIND diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 07b745b9c7a..5262753f725 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -394,15 +394,6 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, } } -static bool -pipeline_cache_enabled() -{ - static int enabled = -1; - if (enabled < 0) - enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); - return enabled; -} - VkResult anv_CreatePipelineCache( VkDevice_device, const VkPipelineCacheCreateInfo*pCreateInfo, @@ -421,7 +412,8 @@ VkResult anv_CreatePipelineCache( if (cache == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_pipeline_cache_init(cache, device, pipeline_cache_enabled()); + anv_pipeline_cache_init(cache, device, + device->instance->pipeline_cache_enabled); if (pCreateInfo->initialDataSize > 0) anv_pipeline_cache_load(cache, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 139c48b7e46..4fa23357dd6 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -891,6 +891,8 @@ struct anv_instance { int physicalDeviceCount; struct anv_physical_device physicalDevice; +boolpipeline_cache_enabled; + struct vk_debug_report_instance
[Mesa-dev] [PATCH 5/6] anv/pipeline_cache: Add a _locked suffix to a function
--- src/intel/vulkan/anv_pipeline_cache.c | 29 ++- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 2bc64f5bdd1..e57cd1c75c6 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -281,15 +281,16 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, } static struct anv_shader_bin * -anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, - const void *key_data, uint32_t key_size, - const void *kernel_data, uint32_t kernel_size, - const void *constant_data, - uint32_t constant_data_size, - const struct brw_stage_prog_data *prog_data, - uint32_t prog_data_size, - const void *prog_data_param, - const struct anv_pipeline_bind_map *bind_map) +anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, + uint32_t kernel_size, + const void *constant_data, + uint32_t constant_data_size, + const struct brw_stage_prog_data *prog_data, + uint32_t prog_data_size, + const void *prog_data_param, + const struct anv_pipeline_bind_map *bind_map) { struct anv_shader_bin *shader = anv_pipeline_cache_search_locked(cache, key_data, key_size); @@ -324,11 +325,11 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, pthread_mutex_lock(>mutex); struct anv_shader_bin *bin = - anv_pipeline_cache_add_shader(cache, key_data, key_size, - kernel_data, kernel_size, - constant_data, constant_data_size, - prog_data, prog_data_size, - prog_data->param, bind_map); + anv_pipeline_cache_add_shader_locked(cache, key_data, key_size, + kernel_data, kernel_size, + constant_data, constant_data_size, + prog_data, prog_data_size, + prog_data->param, bind_map); pthread_mutex_unlock(>mutex); -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] anv/pipeline: Stop optimizing for not having a cache
Before, we were only hashing the shader if we had a shader cache to cache things in. This means that if we ever get it wrong, we could end up trying to cache a shader with an undefined hash. Since not having a shader cache is an extremely uncommon case, let's optimize for code clarity and obvious correctness over avoiding a hash operation. --- src/intel/vulkan/anv_pipeline.c | 65 - 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index dc3b4a0e3ba..e97df58d554 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -527,18 +527,17 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; struct brw_vs_prog_key key; struct anv_shader_bin *bin = NULL; - unsigned char sha1[20]; populate_vs_prog_key(>device->info, ); ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); - if (cache) { - anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, - MESA_SHADER_VERTEX, spec_info, - , sizeof(key), sha1); + unsigned char sha1[20]; + anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, +MESA_SHADER_VERTEX, spec_info, +, sizeof(key), sha1); + if (cache) bin = anv_pipeline_cache_search(cache, sha1, 20); - } if (bin == NULL) { struct brw_vs_prog_data prog_data = {}; @@ -653,8 +652,6 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline, struct brw_tes_prog_key tes_key = {}; struct anv_shader_bin *tcs_bin = NULL; struct anv_shader_bin *tes_bin = NULL; - unsigned char tcs_sha1[40]; - unsigned char tes_sha1[40]; populate_sampler_prog_key(>device->info, _key.tex); populate_sampler_prog_key(>device->info, _key.tex); @@ -662,15 +659,18 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline, ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); + unsigned char tcs_sha1[40]; + unsigned char tes_sha1[40]; + anv_pipeline_hash_shader(pipeline, layout, tcs_module, tcs_entrypoint, +MESA_SHADER_TESS_CTRL, tcs_spec_info, +_key, sizeof(tcs_key), tcs_sha1); + anv_pipeline_hash_shader(pipeline, layout, tes_module, tes_entrypoint, +MESA_SHADER_TESS_EVAL, tes_spec_info, +_key, sizeof(tes_key), tes_sha1); + memcpy(_sha1[20], tes_sha1, 20); + memcpy(_sha1[20], tcs_sha1, 20); + if (cache) { - anv_pipeline_hash_shader(pipeline, layout, tcs_module, tcs_entrypoint, - MESA_SHADER_TESS_CTRL, tcs_spec_info, - _key, sizeof(tcs_key), tcs_sha1); - anv_pipeline_hash_shader(pipeline, layout, tes_module, tes_entrypoint, - MESA_SHADER_TESS_EVAL, tes_spec_info, - _key, sizeof(tes_key), tes_sha1); - memcpy(_sha1[20], tes_sha1, 20); - memcpy(_sha1[20], tcs_sha1, 20); tcs_bin = anv_pipeline_cache_search(cache, tcs_sha1, sizeof(tcs_sha1)); tes_bin = anv_pipeline_cache_search(cache, tes_sha1, sizeof(tes_sha1)); } @@ -802,18 +802,17 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; struct brw_gs_prog_key key; struct anv_shader_bin *bin = NULL; - unsigned char sha1[20]; populate_gs_prog_key(>device->info, ); ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); - if (cache) { - anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, - MESA_SHADER_GEOMETRY, spec_info, - , sizeof(key), sha1); + unsigned char sha1[20]; + anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, +MESA_SHADER_GEOMETRY, spec_info, +, sizeof(key), sha1); + if (cache) bin = anv_pipeline_cache_search(cache, sha1, 20); - } if (bin == NULL) { struct brw_gs_prog_data prog_data = {}; @@ -884,18 +883,17 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, pipeline->device->instance->physicalDevice.compiler; struct brw_wm_prog_key key; struct anv_shader_bin *bin = NULL; - unsigned char sha1[20]; populate_wm_prog_key(pipeline, info, ); ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); - if (cache) { - anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, - MESA_SHADER_FRAGMENT, spec_info, - , sizeof(key), sha1); + unsigned char sha1[20]; + anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, +MESA_SHADER_FRAGMENT, spec_info, +
[Mesa-dev] [PATCH 4/6] anv: Add device-level helpers for searching for and uploading kernels
--- src/intel/vulkan/anv_pipeline.c | 127 ++ src/intel/vulkan/anv_pipeline_cache.c | 34 +++ src/intel/vulkan/anv_private.h| 16 3 files changed, 98 insertions(+), 79 deletions(-) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index e97df58d554..2a36f2e6bc1 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -480,33 +480,6 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias) prog_data->binding_table.image_start = bias; } -static struct anv_shader_bin * -anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const void *key_data, uint32_t key_size, - const void *kernel_data, uint32_t kernel_size, - const void *constant_data, - uint32_t constant_data_size, - const struct brw_stage_prog_data *prog_data, - uint32_t prog_data_size, - const struct anv_pipeline_bind_map *bind_map) -{ - if (cache) { - return anv_pipeline_cache_upload_kernel(cache, key_data, key_size, - kernel_data, kernel_size, - constant_data, constant_data_size, - prog_data, prog_data_size, - bind_map); - } else { - return anv_shader_bin_create(pipeline->device, key_data, key_size, - kernel_data, kernel_size, - constant_data, constant_data_size, - prog_data, prog_data_size, - prog_data->param, bind_map); - } -} - - static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, @@ -536,8 +509,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, MESA_SHADER_VERTEX, spec_info, , sizeof(key), sha1); - if (cache) - bin = anv_pipeline_cache_search(cache, sha1, 20); + bin = anv_device_search_for_kernel(pipeline->device, cache, sha1, 20); if (bin == NULL) { struct brw_vs_prog_data prog_data = {}; @@ -576,12 +548,12 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } unsigned code_size = prog_data.base.base.program_size; - bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, - shader_code, code_size, - nir->constant_data, - nir->constant_data_size, - _data.base.base, sizeof(prog_data), - ); + bin = anv_device_upload_kernel(pipeline->device, cache, sha1, 20, + shader_code, code_size, + nir->constant_data, + nir->constant_data_size, + _data.base.base, sizeof(prog_data), + ); if (!bin) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -670,10 +642,10 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline, memcpy(_sha1[20], tes_sha1, 20); memcpy(_sha1[20], tcs_sha1, 20); - if (cache) { - tcs_bin = anv_pipeline_cache_search(cache, tcs_sha1, sizeof(tcs_sha1)); - tes_bin = anv_pipeline_cache_search(cache, tes_sha1, sizeof(tes_sha1)); - } + tcs_bin = anv_device_search_for_kernel(pipeline->device, cache, + tcs_sha1, sizeof(tcs_sha1)); + tes_bin = anv_device_search_for_kernel(pipeline->device, cache, + tes_sha1, sizeof(tes_sha1)); if (tcs_bin == NULL || tes_bin == NULL) { struct brw_tcs_prog_data tcs_prog_data = {}; @@ -745,14 +717,14 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline, } unsigned code_size = tcs_prog_data.base.base.program_size; - tcs_bin = anv_pipeline_upload_kernel(pipeline, cache, - tcs_sha1, sizeof(tcs_sha1), - shader_code, code_size, - tcs_nir->constant_data, - tcs_nir->constant_data_size, - _prog_data.base.base, - sizeof(tcs_prog_data), - _map); + tcs_bin =
[Mesa-dev] [PATCH 1/6] anv: Be more careful about hashing pipeline layouts
Previously, we just hashed the entire descriptor set layout verbatim. This meant that a bunch of extra stuff such as pointers and reference counts made its way into the cache. It also meant that we weren't properly hashing in the Y'CbCr conversion information information from bound immutable samplers. Cc: mesa-sta...@lists.freedesktop.org --- src/intel/vulkan/anv_descriptor_set.c | 41 +-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 9534ba81cdb..8f7f1f3ba38 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -257,13 +257,48 @@ void anv_DestroyDescriptorSetLayout( anv_descriptor_set_layout_unref(device, set_layout); } +#define SHA1_UPDATE_VALUE(ctx, x) _mesa_sha1_update(ctx, &(x), sizeof(x)); + +static void +sha1_update_immutable_sampler(struct mesa_sha1 *ctx, + const struct anv_sampler *sampler) +{ + if (!sampler->conversion) + return; + + /* The only thing that affects the shader is ycbcr conversion */ + _mesa_sha1_update(ctx, sampler->conversion, + sizeof(*sampler->conversion)); +} + +static void +sha1_update_descriptor_set_binding_layout(struct mesa_sha1 *ctx, + const struct anv_descriptor_set_binding_layout *layout) +{ + SHA1_UPDATE_VALUE(ctx, layout->array_size); + SHA1_UPDATE_VALUE(ctx, layout->descriptor_index); + SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_index); + SHA1_UPDATE_VALUE(ctx, layout->buffer_index); + _mesa_sha1_update(ctx, layout->stage, sizeof(layout->stage)); + + if (layout->immutable_samplers) { + for (uint16_t i = 0; i < layout->array_size; i++) + sha1_update_immutable_sampler(ctx, layout->immutable_samplers[i]); + } +} + static void sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx, const struct anv_descriptor_set_layout *layout) { - size_t size = sizeof(*layout) + - sizeof(layout->binding[0]) * layout->binding_count; - _mesa_sha1_update(ctx, layout, size); + SHA1_UPDATE_VALUE(ctx, layout->binding_count); + SHA1_UPDATE_VALUE(ctx, layout->size); + SHA1_UPDATE_VALUE(ctx, layout->shader_stages); + SHA1_UPDATE_VALUE(ctx, layout->buffer_count); + SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_count); + + for (uint16_t i = 0; i < layout->binding_count; i++) + sha1_update_descriptor_set_binding_layout(ctx, >binding[i]); } /* -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/11] ac/radv: move llvm compiler info to struct and init in one place
On Tue, Jun 26, 2018 at 11:58 PM, Dave Airlie wrote: > From: Dave Airlie > > This creates a common per-thread compiler info struct, and adds > the init code to it. This is mostly ported from radeonsi. > > The common info struct is used in radv first and replaces the > current code. > --- > src/amd/common/ac_llvm_util.c | 50 +++ > src/amd/common/ac_llvm_util.h | 14 + > src/amd/vulkan/radv_nir_to_llvm.c | 39 ++-- > src/amd/vulkan/radv_private.h | 7 ++--- > src/amd/vulkan/radv_shader.c | 16 +- > 5 files changed, 91 insertions(+), 35 deletions(-) > > diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c > index dd2469d4606..85dc9d72a5c 100644 > --- a/src/amd/common/ac_llvm_util.c > +++ b/src/amd/common/ac_llvm_util.c > @@ -188,6 +188,56 @@ LLVMPassManagerRef > ac_init_passmgr(LLVMTargetLibraryInfoRef target_library_info, > return passmgr; > } > > +bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info, > + bool add_target_library_info, > + enum radeon_family family, > + enum ac_target_machine_options tm_options) > +{ > + memset(info, 0, sizeof(*info)); > + info->tm = ac_create_target_machine(family, tm_options, > >triple); > + if (!info->tm) > + return false; > + > + /* Get the data layout. */ > + LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(info->tm); > + if (!data_layout) > + goto fail; > + info->data_layout = LLVMCopyStringRepOfTargetData(data_layout); > + LLVMDisposeTargetData(data_layout); > + > +#if HAVE_LLVM < 0x0700 This #if is not needed. You already have a bool flag coming from radv. You can modify the bool value in radv. > + if (add_target_library_info) > +#endif Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/11] ac/radv: split the non-common init_once code from the common target code.
On Tue, Jun 26, 2018 at 11:58 PM, Dave Airlie wrote: > From: Dave Airlie > > This just splits out the non-shared code and reuses ac_get_llvm_target in > radv. > --- > src/amd/common/ac_llvm_util.c | 6 -- > src/amd/common/ac_llvm_util.h | 2 ++ > src/amd/vulkan/radv_shader.c | 20 > src/gallium/drivers/radeonsi/si_pipe.c | 1 + > 4 files changed, 11 insertions(+), 18 deletions(-) > > diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c > index be2d92b4c08..f33691dcc92 100644 > --- a/src/amd/common/ac_llvm_util.c > +++ b/src/amd/common/ac_llvm_util.c > @@ -57,13 +57,15 @@ static void ac_init_llvm_target() > > static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT; > > +void ac_init_llvm_once(void) { LLVM-ism ({ not on the next line) Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] nvc0/ir: return 0 in imageLoad on incomplete textures
We already guarded all OP_SULDP against out of bound accesses, but those ended up just reusing whatever value was stored in the dest registers. fixes CTS test shader_image_load_store.incomplete_textures v2: fix for loads not ending up with predicates (bindless_texture) Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 33 +-- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 5723847234e..f55e9a34c59 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2180,13 +2180,36 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) } } +void +NVC0LoweringPass::insertOOBSurfaceOpResult(TexInstruction *su) +{ + if (!su->getPredicate()) + return; + + bld.setPosition(su, true); + + for (unsigned i = 0; su->defExists(i); ++i) { + ValueDef = su->def(i); + + Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); + assert(su->cc == CC_NOT_P); + mov->setPredicate(CC_P, su->getPredicate()); + Instruction *uni = bld.mkOp2(OP_UNION, TYPE_U32, bld.getSSA(), NULL, mov->getDef(0)); + + def.replace(uni->getDef(0), true); + uni->setSrc(0, def.get()); + } +} + void NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) { processSurfaceCoordsNVE4(su); - if (su->op == OP_SULDP) + if (su->op == OP_SULDP) { convertSurfaceFormat(su); + insertOOBSurfaceOpResult(su); + } if (su->op == OP_SUREDB || su->op == OP_SUREDP) { assert(su->getPredicate()); @@ -2296,8 +2319,10 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su) processSurfaceCoordsNVC0(su); - if (su->op == OP_SULDP) + if (su->op == OP_SULDP) { convertSurfaceFormat(su); + insertOOBSurfaceOpResult(su); + } if (su->op == OP_SUREDB || su->op == OP_SUREDP) { const int dim = su->tex.target.getDim(); @@ -2397,8 +2422,10 @@ NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su) { processSurfaceCoordsGM107(su); - if (su->op == OP_SULDP) + if (su->op == OP_SULDP) { convertSurfaceFormat(su); + insertOOBSurfaceOpResult(su); + } if (su->op == OP_SUREDP) { Value *def = su->getDef(0); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 91771fbf7e9..d7350d03b78 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -143,6 +143,7 @@ private: void processSurfaceCoordsNVE4(TexInstruction *); void processSurfaceCoordsNVC0(TexInstruction *); void convertSurfaceFormat(TexInstruction *); + void insertOOBSurfaceOpResult(TexInstruction *); Value *calculateSampleOffset(Value *sampleID); protected: -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 19/19] i965: Support saving the gen program with glGetProgramBinary
I've sent some minor nits. Otherwise the series looks good, although I did only skim most of the actual new i965 blob changes. Series: Reviewed-by: Timothy Arceri You might want to wait for Tapani to take a look also. On 15/05/18 02:52, Jordan Justen wrote: Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_program_binary.c | 72 +++--- 1 file changed, 66 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program_binary.c b/src/mesa/drivers/dri/i965/brw_program_binary.c index 1fe3ffd5bf9..db03332241e 100644 --- a/src/mesa/drivers/dri/i965/brw_program_binary.c +++ b/src/mesa/drivers/dri/i965/brw_program_binary.c @@ -126,6 +126,16 @@ driver_blob_is_ready(void *blob, uint32_t size, bool with_gen_program) } } +static void +serialize_nir_part(struct blob *writer, struct gl_program *prog) +{ + blob_write_uint32(writer, NIR_PART); + intptr_t size_offset = blob_reserve_uint32(writer); + size_t nir_start = writer->size; + nir_serialize(writer, prog->nir); + blob_overwrite_uint32(writer, size_offset, writer->size - nir_start); +} + void brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog) { @@ -138,11 +148,7 @@ brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog) struct blob writer; blob_init(); - blob_write_uint32(, NIR_PART); - intptr_t size_offset = blob_reserve_uint32(); - size_t nir_start = writer.size; - nir_serialize(, prog->nir); - blob_overwrite_uint32(, size_offset, writer.size - nir_start); + serialize_nir_part(, prog); blob_write_uint32(, END_PART); prog->driver_cache_blob = ralloc_size(NULL, writer.size); memcpy(prog->driver_cache_blob, writer.data, writer.size); @@ -237,12 +243,66 @@ brw_deserialize_program_binary(struct gl_context *ctx, brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage); } +static void +serialize_gen_part(struct blob *writer, struct gl_context *ctx, + struct gl_shader_program *sh_prog, + struct gl_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + + union brw_any_prog_key key; + brw_populate_default_key(>screen->devinfo, , sh_prog, prog); + + const gl_shader_stage stage = prog->info.stage; + uint32_t offset = 0; + void *prog_data = NULL; + if (brw_search_cache(>cache, brw_stage_cache_id(stage), , +brw_prog_key_size(stage), , _data, +false)) { + const void *program_map = brw->cache.map + offset; + /* TODO: Improve perf for non-LLC. It would be best to save it at + * program generation time when the program is in normal memory + * accessible with cache to the CPU. Another easier change would be to + * use _mesa_streaming_load_memcpy to read from the program mapped + * memory. + */ + blob_write_uint32(writer, GEN_PART); + intptr_t size_offset = blob_reserve_uint32(writer); + size_t gen_start = writer->size; + blob_write_bytes(writer, , brw_prog_key_size(stage)); + brw_write_blob_program_data(writer, stage, program_map, prog_data); + blob_overwrite_uint32(writer, size_offset, writer->size - gen_start); + } +} + void brw_serialize_program_binary(struct gl_context *ctx, struct gl_shader_program *sh_prog, struct gl_program *prog) { - brw_program_serialize_nir(ctx, prog); + if (driver_blob_is_ready(prog->driver_cache_blob, +prog->driver_cache_blob_size, true)) + return; + + if (prog->driver_cache_blob) { + if (!prog->nir) { + /* If we loaded from the disk shader cache, then the nir might not + * have been deserialized yet. + */ + brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage); + } + ralloc_free(prog->driver_cache_blob); + } + + struct blob writer; + blob_init(); + serialize_nir_part(, prog); + serialize_gen_part(, ctx, sh_prog, prog); + blob_write_uint32(, END_PART); + prog->driver_cache_blob = ralloc_size(NULL, writer.size); + memcpy(prog->driver_cache_blob, writer.data, writer.size); + prog->driver_cache_blob_size = writer.size; + blob_finish(); } void ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 16/19] i965: Add brw_populate_default_key
On 15/05/18 02:52, Jordan Justen wrote: We will need to populate the default key for ARB_get_program_binary to allow us to retrieve the default gen program to store save in the store save - > store ??? program binary. Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_cs.c | 15 ++--- src/mesa/drivers/dri/i965/brw_cs.h | 4 +++ src/mesa/drivers/dri/i965/brw_gs.c | 16 ++--- src/mesa/drivers/dri/i965/brw_gs.h | 4 +++ src/mesa/drivers/dri/i965/brw_program.c | 35 src/mesa/drivers/dri/i965/brw_program.h | 15 + src/mesa/drivers/dri/i965/brw_tcs.c | 57 +++-- src/mesa/drivers/dri/i965/brw_tes.c | 40 ++- src/mesa/drivers/dri/i965/brw_vs.c | 26 ++- src/mesa/drivers/dri/i965/brw_vs.h | 4 +++ src/mesa/drivers/dri/i965/brw_wm.c | 48 --- src/mesa/drivers/dri/i965/brw_wm.h | 4 +++ 12 files changed, 195 insertions(+), 73 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 9b1b0832b5a..614eb64bca9 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -184,6 +184,16 @@ brw_upload_cs_prog(struct brw_context *brw) assert(success); } +void +brw_cs_populate_default_key(const struct gen_device_info *devinfo, +struct brw_cs_prog_key *key, +struct gl_program *prog) +{ + memset(key, 0, sizeof(*key)); + key->program_string_id = brw_program(prog)->id; + + brw_setup_tex_for_precompile(devinfo, >tex, prog); +} bool brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog) @@ -193,10 +203,7 @@ brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog) struct brw_program *bcp = brw_program(prog); - memset(, 0, sizeof(key)); - key.program_string_id = bcp->id; - - brw_setup_tex_for_precompile(>screen->devinfo, , prog); + brw_cs_populate_default_key(>screen->devinfo, , prog); uint32_t old_prog_offset = brw->cs.base.prog_offset; struct brw_stage_prog_data *old_prog_data = brw->cs.base.prog_data; diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index 60eb19c3594..669d4b544eb 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -34,6 +34,10 @@ brw_upload_cs_prog(struct brw_context *brw); void brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key); +void +brw_cs_populate_default_key(const struct gen_device_info *devinfo, +struct brw_cs_prog_key *key, +struct gl_program *prog); #ifdef __cplusplus } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index f488fab009e..9d4dc942d34 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -208,6 +208,17 @@ brw_upload_gs_prog(struct brw_context *brw) assert(success); } +void +brw_gs_populate_default_key(const struct gen_device_info *devinfo, +struct brw_gs_prog_key *key, +struct gl_program *prog) +{ + memset(key, 0, sizeof(*key)); + + brw_setup_tex_for_precompile(devinfo, >tex, prog); + key->program_string_id = brw_program(prog)->id; +} + bool brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog) { @@ -219,10 +230,7 @@ brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog) struct brw_program *bgp = brw_program(prog); - memset(, 0, sizeof(key)); - - brw_setup_tex_for_precompile(>screen->devinfo, , prog); - key.program_string_id = bgp->id; + brw_gs_populate_default_key(>screen->devinfo, , prog); success = brw_codegen_gs_prog(brw, bgp, ); diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 537a41679df..cff994a9323 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -40,6 +40,10 @@ brw_upload_gs_prog(struct brw_context *brw); void brw_gs_populate_key(struct brw_context *brw, struct brw_gs_prog_key *key); +void +brw_gs_populate_default_key(const struct gen_device_info *devinfo, +struct brw_gs_prog_key *key, +struct gl_program *prog); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index ca934b91c9a..a54d90e92a7 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -48,6 +48,11 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" +#include "brw_cs.h" +#include "brw_gs.h" +#include "brw_vs.h" +#include "brw_wm.h" + static bool brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) { @@
Re: [Mesa-dev] [PATCH 09/19] i965: Add brw_stage_cache_id to map gl stages to brw cache_ids
On 15/05/18 02:52, Jordan Justen wrote: Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_program_cache.c | 15 +++ src/mesa/drivers/dri/i965/brw_state.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c index 78159288af0..40f1078ca98 100644 --- a/src/mesa/drivers/dri/i965/brw_program_cache.c +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c @@ -78,6 +78,21 @@ struct brw_cache_item { struct brw_cache_item *next; }; +enum brw_cache_id +brw_stage_cache_id(gl_shader_stage stage) +{ + static const enum brw_cache_id stage_sizes[] = { stage_sizes -> stage_ids ??? + BRW_CACHE_VS_PROG, + BRW_CACHE_TCS_PROG, + BRW_CACHE_TES_PROG, + BRW_CACHE_GS_PROG, + BRW_CACHE_FS_PROG, + BRW_CACHE_CS_PROG, + }; + assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes)); + return stage_sizes[stage]; +} + static unsigned get_program_string_id(enum brw_cache_id cache_id, const void *key) { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 1b4745ef753..445f5e0b510 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -180,6 +180,8 @@ void brw_destroy_caches( struct brw_context *brw ); void brw_print_program_cache(struct brw_context *brw); +enum brw_cache_id brw_stage_cache_id(gl_shader_stage stage); + /* intel_batchbuffer.c */ void brw_require_statebuffer_space(struct brw_context *brw, int size); void *brw_state_batch(struct brw_context *brw, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 03/19] st/mesa: i965: Use ShaderCacheSerializeDriverBlob driver function
The subject line contains i965: On 15/05/18 02:52, Jordan Justen wrote: Signed-off-by: Jordan Justen --- src/mesa/state_tracker/st_context.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index ee76e07a7d1..0d0cfc5c63c 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -770,10 +770,12 @@ st_init_driver_functions(struct pipe_screen *screen, screen->get_shader_param(screen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_PREFERRED_IR); if (preferred_ir == PIPE_SHADER_IR_NIR) { + functions->ShaderCacheSerializeDriverBlob = st_serialise_nir_program; functions->ProgramBinarySerializeDriverBlob = st_serialise_nir_program; functions->ProgramBinaryDeserializeDriverBlob = st_deserialise_nir_program; } else { + functions->ShaderCacheSerializeDriverBlob = st_serialise_tgsi_program; functions->ProgramBinarySerializeDriverBlob = st_serialise_tgsi_program; functions->ProgramBinaryDeserializeDriverBlob = st_deserialise_tgsi_program; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 6/9] nir: Add a large constants optimization pass
This pass searches for reasonably large local variables which can be statically proven to be constant and moves them into shader constant data. This is especially useful when large tables are baked into the shader source code because they can be moved into a UBO by the driver to reduce register pressure and make indirect access cheaper. v2 (Jason Ekstrand): - Use a size/align function to ensure we get the right alignments - Use the newly added deref offset helpers --- src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 4 + src/compiler/nir/nir_opt_large_constants.c | 301 + 4 files changed, 307 insertions(+) create mode 100644 src/compiler/nir/nir_opt_large_constants.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 0fcbc5c5c5b..9e3fbdc2612 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -276,6 +276,7 @@ NIR_FILES = \ nir/nir_opt_if.c \ nir/nir_opt_intrinsics.c \ nir/nir_opt_loop_unroll.c \ + nir/nir_opt_large_constants.c \ nir/nir_opt_move_comparisons.c \ nir/nir_opt_move_load_ubo.c \ nir/nir_opt_peephole_select.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index eb7fb7b121e..28aa8de7014 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -160,6 +160,7 @@ files_libnir = files( 'nir_opt_global_to_local.c', 'nir_opt_if.c', 'nir_opt_intrinsics.c', + 'nir_opt_large_constants.c', 'nir_opt_loop_unroll.c', 'nir_opt_move_comparisons.c', 'nir_opt_move_load_ubo.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index cc5f88d6f54..92ab3a699cc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2905,6 +2905,10 @@ bool nir_opt_if(nir_shader *shader); bool nir_opt_intrinsics(nir_shader *shader); +bool nir_opt_large_constants(nir_shader *shader, + glsl_type_size_align_func size_align, + unsigned threshold); + bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); bool nir_opt_move_comparisons(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_large_constants.c b/src/compiler/nir/nir_opt_large_constants.c new file mode 100644 index 000..027c6e8e5b5 --- /dev/null +++ b/src/compiler/nir/nir_opt_large_constants.c @@ -0,0 +1,301 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_deref.h" + +struct var_info { + bool is_constant; + bool found_read; +}; + +static nir_ssa_def * +build_constant_load(nir_builder *b, nir_deref_instr *deref, +glsl_type_size_align_func size_align) +{ + nir_variable *var = nir_deref_instr_get_variable(deref); + + const unsigned bit_size = glsl_get_bit_size(deref->type); + const unsigned num_components = glsl_get_vector_elements(deref->type); + + UNUSED unsigned var_size, var_align; + size_align(var->type, _size, _align); + assert(var->data.location % var_align == 0); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_constant); + load->num_components = num_components; + nir_intrinsic_set_base(load, var->data.location); + nir_intrinsic_set_range(load, var_size); + load->src[0] = nir_src_for_ssa(nir_build_deref_offset(b, deref, size_align)); + nir_ssa_dest_init(>instr, >dest, + num_components, bit_size, NULL); + nir_builder_instr_insert(b, >instr); + + return >dest.ssa; +} + +static void +handle_constant_store(nir_builder *b, nir_intrinsic_instr *store, + glsl_type_size_align_func size_align) +{ + nir_deref_instr *deref =
[Mesa-dev] [PATCH v2 9/9] anv, intel: Enable nir_opt_large_constants for Vulkan
According to RenderDoc, this shaves 99.6% of the run time off of the ambient occlusion pass in Skyrim Special Edition when running under DXVK and shaves 92% off the runtime for a reasonably representative frame. When running the actual game, Skyrim goes from being a slide-show to a very stable and playable framerate on my SKL GT4e machine. --- src/intel/compiler/brw_compiler.h | 6 ++ src/intel/compiler/brw_nir.c | 7 +++ src/intel/vulkan/anv_device.c | 1 + 3 files changed, 14 insertions(+) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 2f745d92745..9dfcfcc0115 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -112,6 +112,12 @@ struct brw_compiler { * will attempt to push everything. */ bool supports_pull_constants; + + /** +* Whether or not the driver supports NIR shader constants. This controls +* whether nir_opt_large_constants will be run. +*/ + bool supports_shader_constants; }; /** diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index dfeea73b06a..f4aee3d41ee 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -664,6 +664,13 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) nir = brw_nir_optimize(nir, compiler, is_scalar); + /* This needs to be run after the first optimization pass but before we +* lower indirect derefs away +*/ + if (compiler->supports_shader_constants) { + OPT(nir_opt_large_constants, NULL, 32); + } + nir_lower_bit_size(nir, lower_bit_size_callback, NULL); if (is_scalar) { diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index b3d30675b1e..077f5c16e46 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -434,6 +434,7 @@ anv_physical_device_init(struct anv_physical_device *device, device->compiler->supports_pull_constants = false; device->compiler->constant_buffer_0_is_relative = device->info.gen < 8 || !device->has_context_isolation; + device->compiler->supports_shader_constants = true; isl_device_init(>isl_dev, >info, swizzled); -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 5/9] nir: Add a concept of constant data associated with a shader
This commit adds a concept to NIR of having a blob of constant data associated with a shader. Instead of being a UBO or uniform that can be manipulated by the client, this constant data considered part of the shader and remains constant across all invocations of the given shader until the end of time. To access this constant data from the shader, we add a new load_constant intrinsic. The intention is that drivers will eventually lower load_constant intrinsics to load_ubo, load_uniform, or something similar. Constant data will be used by the optimization pass in the next commit but this concept may also be useful for OpenCL. v2 (Jason Ekstrand): - Rename num_constants to constant_data_size (anholt) --- src/compiler/nir/nir.h | 8 src/compiler/nir/nir_clone.c | 6 ++ src/compiler/nir/nir_intrinsics.py | 2 ++ src/compiler/nir/nir_serialize.c | 12 src/compiler/nir/nir_sweep.c | 2 ++ 5 files changed, 30 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index e35bef612df..cc5f88d6f54 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2067,6 +2067,14 @@ typedef struct nir_shader { * access plus one */ unsigned num_inputs, num_uniforms, num_outputs, num_shared; + + /** Constant data associated with this shader. +* +* Constant data is loaded through load_constant intrinsics. See also +* nir_opt_large_constants. +*/ + void *constant_data; + unsigned constant_data_size; } nir_shader; static inline nir_function_impl * diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index 23bb17eeba3..989c5051a54 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -734,6 +734,12 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) ns->num_outputs = s->num_outputs; ns->num_shared = s->num_shared; + ns->constant_data_size = s->constant_data_size; + if (s->constant_data_size > 0) { + ns->constant_data = ralloc_size(ns, s->constant_data_size); + memcpy(ns->constant_data, s->constant_data, s->constant_data_size); + } + free_clone_state(); return ns; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d9d0bbdfccf..44a5b76beb6 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -532,6 +532,8 @@ load("per_vertex_output", 2, [BASE, COMPONENT], [CAN_ELIMINATE]) load("shared", 1, [BASE], [CAN_ELIMINATE]) # src[] = { offset }. const_index[] = { base, range } load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) +# src[] = { offset }. const_index[] = { base, range } +load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) # Stores work the same way as loads, except now the first source is the value # to store and the second (and possibly third) source specify where to store diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index cc4bf23aa0f..6a30738c2d7 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -1116,6 +1116,10 @@ nir_serialize(struct blob *blob, const nir_shader *nir) write_function_impl(, fxn->impl); } + blob_write_uint32(blob, nir->constant_data_size); + if (nir->constant_data_size > 0) + blob_write_bytes(blob, nir->constant_data, nir->constant_data_size); + *(uintptr_t *)(blob->data + idx_size_offset) = ctx.next_idx; _mesa_hash_table_destroy(ctx.remap_table, NULL); @@ -1169,6 +1173,14 @@ nir_deserialize(void *mem_ctx, nir_foreach_function(fxn, ctx.nir) fxn->impl = read_function_impl(, fxn); + ctx.nir->constant_data_size = blob_read_uint32(blob); + if (ctx.nir->constant_data_size > 0) { + ctx.nir->constant_data = + ralloc_size(ctx.nir, ctx.nir->constant_data_size); + blob_copy_bytes(blob, ctx.nir->constant_data, + ctx.nir->constant_data_size); + } + free(ctx.idx_table); return ctx.nir; diff --git a/src/compiler/nir/nir_sweep.c b/src/compiler/nir/nir_sweep.c index b14bf139c1b..aab641388db 100644 --- a/src/compiler/nir/nir_sweep.c +++ b/src/compiler/nir/nir_sweep.c @@ -167,6 +167,8 @@ nir_sweep(nir_shader *nir) sweep_function(nir, func); } + ralloc_steal(nir, nir->constant_data); + /* Free everything we didn't steal back. */ ralloc_free(rubbish); } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/9] nir: Add a deref_instr_has_indirect helper
--- src/compiler/nir/nir.h | 2 ++ src/compiler/nir/nir_deref.c | 18 ++ 2 files changed, 20 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c16ce547642..e35bef612df 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -987,6 +987,8 @@ nir_deref_instr_get_variable(const nir_deref_instr *instr) return instr->var; } +bool nir_deref_instr_has_indirect(nir_deref_instr *instr); + bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); typedef struct { diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c index 1a00157c2fc..22ecde4ecca 100644 --- a/src/compiler/nir/nir_deref.c +++ b/src/compiler/nir/nir_deref.c @@ -102,6 +102,24 @@ nir_deref_instr_remove_if_unused(nir_deref_instr *instr) return progress; } +bool +nir_deref_instr_has_indirect(nir_deref_instr *instr) +{ + while (instr->deref_type != nir_deref_type_var) { + /* Consider casts to be indirects */ + if (instr->deref_type == nir_deref_type_cast) + return true; + + if (instr->deref_type == nir_deref_type_array && + !nir_src_as_const_value(instr->arr.index)) + return true; + + instr = nir_deref_instr_parent(instr); + } + + return false; +} + bool nir_remove_dead_derefs_impl(nir_function_impl *impl) { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 8/9] anv: Add state setup support for shader constants
--- .../vulkan/anv_nir_apply_pipeline_layout.c| 47 src/intel/vulkan/anv_private.h| 1 + src/intel/vulkan/genX_cmd_buffer.c| 72 ++- 3 files changed, 101 insertions(+), 19 deletions(-) diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 37a54b2efff..c287a005bd6 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -32,6 +32,8 @@ struct apply_pipeline_layout_state { struct anv_pipeline_layout *layout; bool add_bounds_checks; + bool uses_constants; + uint8_t constants_offset; struct { BITSET_WORD *used; uint8_t *surface_offsets; @@ -100,6 +102,10 @@ get_used_bindings_block(nir_block *block, add_deref_src_binding(state, intrin->src[0]); break; + case nir_intrinsic_load_constant: +state->uses_constants = true; +break; + default: break; } @@ -172,6 +178,33 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, nir_instr_remove(>instr); } +static void +lower_load_constant(nir_intrinsic_instr *intrin, +struct apply_pipeline_layout_state *state) +{ + nir_builder *b = >builder; + + b->cursor = nir_before_instr(>instr); + + nir_ssa_def *index = nir_imm_int(b, state->constants_offset); + nir_ssa_def *offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[0], 1), + nir_imm_int(b, nir_intrinsic_base(intrin))); + + nir_intrinsic_instr *load_ubo = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); + load_ubo->num_components = intrin->num_components; + load_ubo->src[0] = nir_src_for_ssa(index); + load_ubo->src[1] = nir_src_for_ssa(offset); + nir_ssa_dest_init(_ubo->instr, _ubo->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(b, _ubo->instr); + + nir_ssa_def_rewrite_uses(>dest.ssa, +nir_src_for_ssa(_ubo->dest.ssa)); + nir_instr_remove(>instr); +} + static void lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type, unsigned *base_index, @@ -285,6 +318,9 @@ apply_pipeline_layout_block(nir_block *block, case nir_intrinsic_vulkan_resource_reindex: lower_res_reindex_intrinsic(intrin, state); break; + case nir_intrinsic_load_constant: +lower_load_constant(intrin, state); +break; default: break; } @@ -343,6 +379,9 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, get_used_bindings_block(block, ); } + if (state.uses_constants) + map->surface_count++; + for (uint32_t set = 0; set < layout->num_sets; set++) { struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; @@ -365,6 +404,14 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, unsigned surface = 0; unsigned sampler = 0; unsigned image = 0; + + if (state.uses_constants) { + state.constants_offset = surface; + map->surface_to_descriptor[surface].set = + ANV_DESCRIPTOR_SET_SHADER_CONSTANTS; + surface++; + } + for (uint32_t set = 0; set < layout->num_sets; set++) { struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f82b88df7a0..139c48b7e46 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1570,6 +1570,7 @@ anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_pool *pool, struct anv_descriptor_set *set); +#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1) #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX struct anv_pipeline_binding { diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 97b321ccaeb..34fbd83d148 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2026,6 +2026,26 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bt_map[bias + s] = surface_state.offset + state_offset; continue; + } else if (binding->set == ANV_DESCRIPTOR_SET_SHADER_CONSTANTS) { + struct anv_state surface_state = +anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + struct anv_address constant_data = { +.bo = >device->dynamic_state_pool.block_pool.bo, +.offset = pipeline->shaders[stage]->constant_data.offset, + }; + unsigned constant_data_size = +pipeline->shaders[stage]->constant_data_size; + + const enum isl_format format = +
[Mesa-dev] [PATCH v2 4/9] nir/deref: Add helpers for getting offsets
These are very similar to the related function in nir_lower_io except that they don't handle per-vertex or packed things (that could be added, in theory) and they take a more detailed size/align function pointer. One day, we should consider switching nir_lower_io over to using the more detailed size/align functions and then we could make it use these helpers instead of having its own. --- src/compiler/nir/nir_deref.c | 91 src/compiler/nir/nir_deref.h | 6 +++ 2 files changed, 97 insertions(+) diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c index 22ecde4ecca..94f91df5a0f 100644 --- a/src/compiler/nir/nir_deref.c +++ b/src/compiler/nir/nir_deref.c @@ -120,6 +120,97 @@ nir_deref_instr_has_indirect(nir_deref_instr *instr) return false; } +static unsigned +type_get_array_stride(const struct glsl_type *elem_type, + glsl_type_size_align_func size_align) +{ + unsigned elem_size, elem_align; + glsl_get_natural_size_align_bytes(elem_type, _size, _align); + return ALIGN_POT(elem_size, elem_align); +} + +static unsigned +struct_type_get_field_offset(const struct glsl_type *struct_type, + glsl_type_size_align_func size_align, + unsigned field_idx) +{ + assert(glsl_type_is_struct(struct_type)); + unsigned offset = 0; + for (unsigned i = 0; i <= field_idx; i++) { + unsigned elem_size, elem_align; + glsl_get_natural_size_align_bytes(glsl_get_struct_field(struct_type, i), +_size, _align); + offset = ALIGN_POT(offset, elem_align); + if (i < field_idx) + offset += elem_size; + } + return offset; +} + +unsigned +nir_deref_instr_get_const_offset(nir_deref_instr *deref, + glsl_type_size_align_func size_align) +{ + nir_deref_path path; + nir_deref_path_init(, deref, NULL); + + assert(path.path[0]->deref_type == nir_deref_type_var); + nir_deref_instr **p = [1]; + + unsigned offset = 0; + for (; *p; p++) { + if ((*p)->deref_type == nir_deref_type_array) { + offset += nir_src_as_const_value((*p)->arr.index)->u32[0] * + type_get_array_stride((*p)->type, size_align); + } else if ((*p)->deref_type == nir_deref_type_struct) { + /* p starts at path[1], so this is safe */ + nir_deref_instr *parent = *(p - 1); + offset += struct_type_get_field_offset(parent->type, size_align, +(*p)->strct.index); + } else { + unreachable("Unsupported deref type"); + } + } + + nir_deref_path_finish(); + + return offset; +} + +nir_ssa_def * +nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref, + glsl_type_size_align_func size_align) +{ + nir_deref_path path; + nir_deref_path_init(, deref, NULL); + + assert(path.path[0]->deref_type == nir_deref_type_var); + nir_deref_instr **p = [1]; + + nir_ssa_def *offset = nir_imm_int(b, 0); + for (; *p; p++) { + if ((*p)->deref_type == nir_deref_type_array) { + nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1); + nir_ssa_def *stride = +nir_imm_int(b, type_get_array_stride((*p)->type, size_align)); + offset = nir_iadd(b, offset, nir_imul(b, index, stride)); + } else if ((*p)->deref_type == nir_deref_type_struct) { + /* p starts at path[1], so this is safe */ + nir_deref_instr *parent = *(p - 1); + unsigned field_offset = +struct_type_get_field_offset(parent->type, size_align, + (*p)->strct.index); + nir_iadd(b, offset, nir_imm_int(b, field_offset)); + } else { + unreachable("Unsupported deref type"); + } + } + + nir_deref_path_finish(); + + return offset; +} + bool nir_remove_dead_derefs_impl(nir_function_impl *impl) { diff --git a/src/compiler/nir/nir_deref.h b/src/compiler/nir/nir_deref.h index 0980bae7215..6f4141aaf82 100644 --- a/src/compiler/nir/nir_deref.h +++ b/src/compiler/nir/nir_deref.h @@ -48,6 +48,12 @@ void nir_deref_path_init(nir_deref_path *path, nir_deref_instr *deref, void *mem_ctx); void nir_deref_path_finish(nir_deref_path *path); +unsigned nir_deref_instr_get_const_offset(nir_deref_instr *deref, + glsl_type_size_align_func size_align); + +nir_ssa_def *nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref, +glsl_type_size_align_func size_align); + #ifdef __cplusplus } /* extern "C" */ #endif -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 7/9] anv: Add support for shader constant data to the pipeline cache
--- src/intel/vulkan/anv_blorp.c | 1 + src/intel/vulkan/anv_pipeline.c | 16 src/intel/vulkan/anv_pipeline_cache.c | 27 +++ src/intel/vulkan/anv_private.h| 6 ++ 4 files changed, 50 insertions(+) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 5373faaa680..4dbfb7a83fd 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -70,6 +70,7 @@ upload_blorp_shader(struct blorp_context *blorp, struct anv_shader_bin *bin = anv_pipeline_cache_upload_kernel(>blorp_shader_cache, key, key_size, kernel, kernel_size, + NULL, 0, prog_data, prog_data_size, _map); if (!bin) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 67ede46f2ae..dc3b4a0e3ba 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -485,6 +485,8 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, + const void *constant_data, + uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, const struct anv_pipeline_bind_map *bind_map) @@ -492,11 +494,13 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, if (cache) { return anv_pipeline_cache_upload_kernel(cache, key_data, key_size, kernel_data, kernel_size, + constant_data, constant_data_size, prog_data, prog_data_size, bind_map); } else { return anv_shader_bin_create(pipeline->device, key_data, key_size, kernel_data, kernel_size, + constant_data, constant_data_size, prog_data, prog_data_size, prog_data->param, bind_map); } @@ -575,6 +579,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, unsigned code_size = prog_data.base.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, + nir->constant_data, + nir->constant_data_size, _data.base.base, sizeof(prog_data), ); if (!bin) { @@ -742,6 +748,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline, tcs_bin = anv_pipeline_upload_kernel(pipeline, cache, tcs_sha1, sizeof(tcs_sha1), shader_code, code_size, + tcs_nir->constant_data, + tcs_nir->constant_data_size, _prog_data.base.base, sizeof(tcs_prog_data), _map); @@ -763,6 +771,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline, tes_bin = anv_pipeline_upload_kernel(pipeline, cache, tes_sha1, sizeof(tes_sha1), shader_code, code_size, + tes_nir->constant_data, + tes_nir->constant_data_size, _prog_data.base.base, sizeof(tes_prog_data), _map); @@ -845,6 +855,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, const unsigned code_size = prog_data.base.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, + nir->constant_data, + nir->constant_data_size, _data.base.base, sizeof(prog_data), ); if (!bin) { @@ -995,6 +1007,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, unsigned code_size = prog_data.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, +
[Mesa-dev] [PATCH v2 3/9] nir/types: Add a natural size and alignment helper
The size and alignment are "natural" in the sense that everything is aligned to a scalar. This is a bit tighter than std430 where vec3s are required to be aligned to a vec4. --- src/compiler/nir_types.cpp | 56 ++ src/compiler/nir_types.h | 6 2 files changed, 62 insertions(+) diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index d2b2a93b207..2b932b1967e 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -477,3 +477,59 @@ glsl_channel_type(const glsl_type *t) unreachable("Unhandled base type glsl_channel_type()"); } } + +void +glsl_get_natural_size_align_bytes(const struct glsl_type *type, + unsigned *size, unsigned *align) +{ + switch (type->base_type) { + case GLSL_TYPE_UINT8: + case GLSL_TYPE_INT8: + case GLSL_TYPE_UINT16: + case GLSL_TYPE_INT16: + case GLSL_TYPE_FLOAT16: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_UINT64: + case GLSL_TYPE_INT64: { + unsigned N = glsl_get_bit_size(type) / 8; + *size = N * type->components(); + *align = N; + break; + } + + case GLSL_TYPE_ARRAY: { + unsigned elem_size, elem_align; + glsl_get_natural_size_align_bytes(type->fields.array, +_size, _align); + *align = elem_align; + *size = type->length * ALIGN_POT(elem_size, elem_align); + break; + } + + case GLSL_TYPE_STRUCT: + *size = 0; + *align = 0; + for (unsigned i = 0; i < type->length; i++) { + unsigned elem_size, elem_align; + glsl_get_natural_size_align_bytes(type->fields.structure[i].type, + _size, _align); + *align = MAX2(*align, elem_align); + *size = ALIGN_POT(*size, elem_align) + elem_size; + } + break; + + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: + unreachable("type does not have a natural size"); + } +} diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 1107cfd73f2..67c4d7b5097 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -184,6 +184,12 @@ const struct glsl_type *glsl_transposed_type(const struct glsl_type *type); const struct glsl_type *glsl_channel_type(const struct glsl_type *type); +typedef void (*glsl_type_size_align_func)(const struct glsl_type *type, + unsigned *size, unsigned *align); + +void glsl_get_natural_size_align_bytes(const struct glsl_type *type, + unsigned *size, unsigned *align); + #ifdef __cplusplus } #endif -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/9] util/macros: Import ALIGN_POT from ralloc.c
--- src/util/macros.h | 3 +++ src/util/ralloc.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/util/macros.h b/src/util/macros.h index 6d3df904082..95b86c7a31a 100644 --- a/src/util/macros.h +++ b/src/util/macros.h @@ -285,6 +285,9 @@ do { \ #define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C)) #define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C)) +/** Align a value to a power of two */ +#define ALIGN_POT(x, y) (((x) + (y) - 1) & ~((y) - 1)) + /** * Macro for declaring an explicit conversion operator. Defaults to an * implicit conversion if C++11 is not supported. diff --git a/src/util/ralloc.c b/src/util/ralloc.c index 42cfa2e391d..5d77f75ee85 100644 --- a/src/util/ralloc.c +++ b/src/util/ralloc.c @@ -553,8 +553,6 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, * other buffers. */ -#define ALIGN_POT(x, y) (((x) + (y) - 1) & ~((y) - 1)) - #define MIN_LINEAR_BUFSIZE 2048 #define SUBALLOC_ALIGNMENT sizeof(uintptr_t) #define LMAGIC 0x87b9c7d3 -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 0/9] anv,nir: Move large constants to a UBO
This little series adds an optimization pass to NIR and wires up up in anv that moves large constant variables to a UBO. This fixes a farily common case in some filter or ambient occlusion shaders where they put some sort of look-up table in the shader itself. This series takes Skyrim Special Edition running under DXVK from a slide show to a smooth and very playable framerate on my SKL desktop. The first part of the series adds a concept of constant data that can be associated with a NIR shader and adds an optimization pass to move large constant variables into this constant data section. It's left up to the driver to figure out how to get this constant data into the shader. The last three patches wire things up in ANV to put this data into an implicit UBO and enables the optimization. v2 (Jason Ekstrand): - Take anholt's feedback and make it more clear that the units on the number of constants is in bytes by calling it constant_data_size. - Break some of the deref to offset code out into helpers - Add new size/align helpers for types to ensure that we get alignments right when setting up constants. This hasn't usually been a problem in the past because we align most things to a dword and 64-bit values aren't common. We should start being more careful. Jason Ekstrand (9): util/macros: Import ALIGN_POT from ralloc.c nir: Add a deref_instr_has_indirect helper nir/types: Add a natural size and alignment helper nir/deref: Add helpers for getting offsets nir: Add a concept of constant data associated with a shader nir: Add a large constants optimization pass anv: Add support for shader constant data to the pipeline cache anv: Add state setup support for shader constants anv,intel: Enable nir_opt_large_constants for Vulkan src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h| 14 + src/compiler/nir/nir_clone.c | 6 + src/compiler/nir/nir_deref.c | 109 +++ src/compiler/nir/nir_deref.h | 6 + src/compiler/nir/nir_intrinsics.py| 2 + src/compiler/nir/nir_opt_large_constants.c| 301 ++ src/compiler/nir/nir_serialize.c | 12 + src/compiler/nir/nir_sweep.c | 2 + src/compiler/nir_types.cpp| 56 src/compiler/nir_types.h | 6 + src/intel/compiler/brw_compiler.h | 6 + src/intel/compiler/brw_nir.c | 7 + src/intel/vulkan/anv_blorp.c | 1 + src/intel/vulkan/anv_device.c | 1 + .../vulkan/anv_nir_apply_pipeline_layout.c| 47 +++ src/intel/vulkan/anv_pipeline.c | 16 + src/intel/vulkan/anv_pipeline_cache.c | 27 ++ src/intel/vulkan/anv_private.h| 7 + src/intel/vulkan/genX_cmd_buffer.c| 72 +++-- src/util/macros.h | 3 + src/util/ralloc.c | 2 - 23 files changed, 684 insertions(+), 21 deletions(-) create mode 100644 src/compiler/nir/nir_opt_large_constants.c -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 26/33] nvir/nir: implement variable indexing
We store those arrays in local memory and reserve some space for each of the arrays. With NIR we could store those arrays packed, but we don't do that yet as it causes MemoryOpt to generate unaligned memory accesses. v3: use fixed size vec4 arrays until we fix MemoryOpt v4: fix for 64 bit types v5: use loadFrom helper v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 58 +++ 1 file changed, 58 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index a8e126a5730..d3188aa9872 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -78,6 +78,7 @@ public: private: typedef std::vector LValues; typedef std::tr1::unordered_map NirDefMap; + typedef std::tr1::unordered_map NirArrayLMemOffsets; typedef std::tr1::unordered_map NirBlockMap; TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow); @@ -155,6 +156,7 @@ private: NirDefMap ssaDefs; NirDefMap regDefs; + NirArrayLMemOffsets regToLmemOffset; NirBlockMap blocks; unsigned int curLoopDepth; @@ -1267,6 +1269,7 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, bool Converter::parseNIR() { + info->bin.tlsSpace = 0; info->io.clipDistances = nir->info.clip_distance_array_size; info->io.cullDistances = nir->info.cull_distance_array_size; @@ -1358,6 +1361,16 @@ Converter::visit(nir_function *function) break; } + nir_foreach_register(reg, >impl->registers) { + if (reg->num_array_elems) { + /* TODO: packed variables would be nice, but MemoryOpt fails */ + /* replace 4 with reg->num_components */ + uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8); + regToLmemOffset[reg->index] = info->bin.tlsSpace; + info->bin.tlsSpace += size; + } + } + nir_index_ssa_defs(function->impl); foreach_list_typed(nir_cf_node, node, node, >impl->body) { if (!visit(node)) @@ -2088,6 +2101,51 @@ Converter::visit(nir_alu_instr *insn) * 2. they basically just merge multiple values into one data type */ CASE_OPFI(mov): + if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) { + nir_reg_dest& reg = insn->dest.dest.reg; + uint32_t goffset = regToLmemOffset[reg.reg->index]; + uint8_t comps = reg.reg->num_components; + uint8_t size = reg.reg->bit_size / 8; + uint8_t csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; */ + uint32_t aoffset = csize * reg.base_offset; + Value *indirect = NULL; + + if (reg.indirect) +indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), + getSrc(reg.indirect, 0), mkImm(csize)); + + for (uint8_t i = 0u; i < comps; ++i) { +if (!((1u << i) & insn->dest.write_mask)) + continue; + +Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size); +mkStore(OP_STORE, dType, sym, indirect, getSrc(>src[0], i)); + } + break; + } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) { + LValues = convert(>dest); + nir_reg_src& reg = insn->src[0].src.reg; + uint32_t goffset = regToLmemOffset[reg.reg->index]; + /* uint8_t comps = reg.reg->num_components; */ + uint8_t size = reg.reg->bit_size / 8; + uint8_t csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; */ + uint32_t aoffset = csize * reg.base_offset; + Value *indirect = NULL; + + if (reg.indirect) +indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize)); + + for (uint8_t i = 0u; i < newDefs.size(); ++i) +loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect); + + break; + } else { + LValues = convert(>dest); + for (LValues::size_type c = 0u; c < newDefs.size(); ++c) { +mkMov(newDefs[c], getSrc(>src[0], c), dType); + } + } + break; case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 30/33] nvir/nir: implement images
v3: fix compiler warnings v4: use loadFrom helper v5: fix signed min/max v6: set tex mask add support for indirect image access set cache mode v7: make compatible with 884d27bcf688d36c3bbe01bceca525595add3b33 rework the whole deref thing to prepare for bindless v8: port to deref instructions don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 390 +- 1 file changed, 370 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index f3f15d1d2ff..e354c32459b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -31,6 +31,7 @@ #include "codegen/nv50_ir_lowering_helper.h" #include "codegen/nv50_ir_util.h" +#include #include #include @@ -89,6 +90,8 @@ private: LValues& convert(nir_register *); LValues& convert(nir_ssa_def *); + ImgFormat convertGLImgFormat(GLuint); + Value* getSrc(nir_alu_src *, uint8_t component = 0); Value* getSrc(nir_register *, uint8_t); Value* getSrc(nir_src *, uint8_t, bool indirect = false); @@ -118,6 +121,7 @@ private: DataType getDType(nir_alu_instr*); DataType getDType(nir_intrinsic_instr*); + DataType getDType(nir_intrinsic_instr*, bool isSigned); DataType getDType(nir_op, uint8_t); std::vector getSTypes(nir_alu_instr*); @@ -139,6 +143,7 @@ private: bool visit(nir_alu_instr *); bool visit(nir_block *); bool visit(nir_cf_node *); + bool visit(nir_deref_instr *); bool visit(nir_function *); bool visit(nir_if *); bool visit(nir_instr *); @@ -151,6 +156,11 @@ private: /* tex stuff */ Value* applyProjection(Value *src, Value *proj); + unsigned int getNIRArgCount(TexInstruction::Target&); + + /* image stuff */ + uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const nir_variable * &); + CacheMode getCacheModeFromVar(const nir_variable *); nir_shader *nir; @@ -243,11 +253,30 @@ Converter::getDType(nir_alu_instr *insn) DataType Converter::getDType(nir_intrinsic_instr *insn) +{ + bool isSigned; + switch (insn->intrinsic) { + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_imin: + isSigned = true; + break; + default: + isSigned = false; + break; + } + + return getDType(insn, isSigned); +} + +DataType +Converter::getDType(nir_intrinsic_instr *insn, bool isSigned) { if (insn->dest.is_ssa) - return typeOfSize(insn->dest.ssa.bit_size / 8, false, false); + return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned); else - return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false); + return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned); } DataType @@ -415,6 +444,22 @@ Converter::getOperation(nir_intrinsic_op op) return OP_EMIT; case nir_intrinsic_end_primitive: return OP_RESTART; + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + return OP_SUREDP; + case nir_intrinsic_image_deref_load: + return OP_SULDP; + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_size: + return OP_SUQ; + case nir_intrinsic_image_deref_store: + return OP_SUSTP; default: ERROR("couldn't get operation for nir_intrinsic_op %u\n", op); assert(false); @@ -444,28 +489,31 @@ Converter::getSubOp(nir_op op) } } +#define CASE_OP_INTR_ATOM(nir, nvir) \ + case nir_intrinsic_image_deref_atomic_ ## nir : \ + case nir_intrinsic_shared_atomic_ ## nir : \ + case nir_intrinsic_ssbo_atomic_ ## nir : \ + return NV50_IR_SUBOP_ATOM_ ## nvir +#define CASE_OP_INTR_ATOM_S(nir, nvir) \ + case nir_intrinsic_shared_atomic_ ## nir : \ + case nir_intrinsic_ssbo_atomic_ ## nir : \ + return NV50_IR_SUBOP_ATOM_ ## nvir int Converter::getSubOp(nir_intrinsic_op op) { switch (op) { - case nir_intrinsic_ssbo_atomic_add: - return NV50_IR_SUBOP_ATOM_ADD; - case nir_intrinsic_ssbo_atomic_and: - return NV50_IR_SUBOP_ATOM_AND; - case nir_intrinsic_ssbo_atomic_comp_swap: - return NV50_IR_SUBOP_ATOM_CAS; - case nir_intrinsic_ssbo_atomic_exchange: - return NV50_IR_SUBOP_ATOM_EXCH; - case nir_intrinsic_ssbo_atomic_or: - return NV50_IR_SUBOP_ATOM_OR; - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - return NV50_IR_SUBOP_ATOM_MAX; - case
[Mesa-dev] [PATCH v8 33/33] nvir/nir: implement intrinsic shader_clock
Signed-off-by: Karol Herbst --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 8 1 file changed, 8 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index f4875113d00..ed2453136fd 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -2344,6 +2344,14 @@ Converter::visit(nir_intrinsic_instr *insn) bar->subOp = getSubOp(op); break; } + case nir_intrinsic_shader_clock: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + + loadImm(newDefs[0], 0u); + mkOp1v(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0)); + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 29/33] nvir/nir: implement ssbo intrinsics
v4: use loadFrom helper v5: support indirect buffer access v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 90 +++ 1 file changed, 90 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 994d8f3968a..f3f15d1d2ff 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -448,6 +448,24 @@ int Converter::getSubOp(nir_intrinsic_op op) { switch (op) { + case nir_intrinsic_ssbo_atomic_add: + return NV50_IR_SUBOP_ATOM_ADD; + case nir_intrinsic_ssbo_atomic_and: + return NV50_IR_SUBOP_ATOM_AND; + case nir_intrinsic_ssbo_atomic_comp_swap: + return NV50_IR_SUBOP_ATOM_CAS; + case nir_intrinsic_ssbo_atomic_exchange: + return NV50_IR_SUBOP_ATOM_EXCH; + case nir_intrinsic_ssbo_atomic_or: + return NV50_IR_SUBOP_ATOM_OR; + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + return NV50_IR_SUBOP_ATOM_MAX; + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + return NV50_IR_SUBOP_ATOM_MIN; + case nir_intrinsic_ssbo_atomic_xor: + return NV50_IR_SUBOP_ATOM_XOR; case nir_intrinsic_vote_all: return NV50_IR_SUBOP_VOTE_ALL; case nir_intrinsic_vote_any: @@ -1948,6 +1966,78 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_get_buffer_size: { + LValues = convert(>dest); + const DataType dType = getDType(insn); + Value *indirectBuffer; + uint32_t buffer = getIndirect(>src[0], 0, indirectBuffer); + + Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0); + mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer); + break; + } + case nir_intrinsic_store_ssbo: { + DataType sType = getSType(insn->src[0], false, false); + Value *indirectBuffer; + Value *indirectOffset; + uint32_t buffer = getIndirect(>src[1], 0, indirectBuffer); + uint32_t offset = getIndirect(>src[2], 0, indirectOffset); + + for (uint8_t i = 0u; i < insn->num_components; ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) +continue; + Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType, +offset + i * typeSizeof(sType)); + mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(>src[0], i)) +->setIndirect(0, 1, indirectBuffer); + } + info->io.globalAccess |= 0x2; + break; + } + case nir_intrinsic_load_ssbo: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectBuffer; + Value *indirectOffset; + uint32_t buffer = getIndirect(>src[0], 0, indirectBuffer); + uint32_t offset = getIndirect(>src[1], 0, indirectOffset); + + for (uint8_t i = 0u; i < insn->num_components; ++i) + loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i, + indirectOffset, indirectBuffer); + + info->io.globalAccess |= 0x1; + break; + } + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_xor: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectBuffer; + Value *indirectOffset; + uint32_t buffer = getIndirect(>src[0], 0, indirectBuffer); + uint32_t offset = getIndirect(>src[1], 0, indirectOffset); + + Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset); + Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, +getSrc(>src[2], 0)); + if (op == nir_intrinsic_ssbo_atomic_comp_swap) + atom->setSrc(2, getSrc(>src[3], 0)); + atom->setIndirect(0, 0, indirectOffset); + atom->setIndirect(0, 1, indirectBuffer); + atom->subOp = getSubOp(op); + + info->io.globalAccess |= 0x2; + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 23/33] nvir/nir: implement nir_instr_type_tex
a lot of those fields are not valid for a lot of tex ops. Not quite sure if it's worth the effort to check for those or just keep it like that. It seems to kind of work. v2: reworked offset handling add tex support with indirect R/S arguments handle GLSL_SAMPLER_DIM_EXTERNAL drop reference in convert(glsl_sampler_dim&, bool, bool) fix tg4 component selection v5: fill up coords args with scratch values if coords provided is less than TexTarget.getArgCount() v7: prepare for bindless_texture support v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 232 ++ 1 file changed, 232 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 637ec9d0664..9c9cc533006 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -80,6 +80,7 @@ private: typedef std::tr1::unordered_map NirDefMap; typedef std::tr1::unordered_map NirBlockMap; + TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow); LValues& convert(nir_alu_dest *); BasicBlock* convert(nir_block *); LValues& convert(nir_dest *); @@ -122,6 +123,7 @@ private: DataType getSType(nir_src&, bool isFloat, bool isSigned); operation getOperation(nir_op); + operation getOperation(nir_texop); operation preOperationNeeded(nir_op); int getSubOp(nir_op); @@ -142,6 +144,10 @@ private: bool visit(nir_load_const_instr*); bool visit(nir_loop *); bool visit(nir_ssa_undef_instr *); + bool visit(nir_tex_instr *); + + /* tex stuff */ + Value* applyProjection(Value *src, Value *proj); nir_shader *nir; @@ -367,6 +373,36 @@ Converter::getOperation(nir_op op) } } +operation +Converter::getOperation(nir_texop op) +{ + switch (op) { + case nir_texop_tex: + return OP_TEX; + case nir_texop_lod: + return OP_TXLQ; + case nir_texop_txb: + return OP_TXB; + case nir_texop_txd: + return OP_TXD; + case nir_texop_txf: + case nir_texop_txf_ms: + return OP_TXF; + case nir_texop_tg4: + return OP_TXG; + case nir_texop_txl: + return OP_TXL; + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_txs: + return OP_TXQ; + default: + ERROR("couldn't get operation for nir_texop %u\n", op); + assert(false); + return OP_NOP; + } +} + operation Converter::preOperationNeeded(nir_op op) { @@ -1459,6 +1495,8 @@ Converter::visit(nir_instr *insn) return visit(nir_instr_as_load_const(insn)); case nir_instr_type_ssa_undef: return visit(nir_instr_as_ssa_undef(insn)); + case nir_instr_type_tex: + return visit(nir_instr_as_tex(insn)); default: ERROR("unknown nir_instr type %u\n", insn->type); return false; @@ -2180,6 +2218,200 @@ Converter::visit(nir_ssa_undef_instr *insn) return true; } +#define CASE_SAMPLER(ty) \ + case GLSL_SAMPLER_DIM_ ## ty : \ + if (isArray && !isShadow) \ + return TEX_TARGET_ ## ty ## _ARRAY; \ + else if (!isArray && isShadow) \ + return TEX_TARGET_## ty ## _SHADOW; \ + else if (isArray && isShadow) \ + return TEX_TARGET_## ty ## _ARRAY_SHADOW; \ + else \ + return TEX_TARGET_ ## ty + +TexTarget +Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow) +{ + switch (dim) { + CASE_SAMPLER(1D); + CASE_SAMPLER(2D); + CASE_SAMPLER(CUBE); + case GLSL_SAMPLER_DIM_3D: + return TEX_TARGET_3D; + case GLSL_SAMPLER_DIM_MS: + if (isArray) + return TEX_TARGET_2D_MS_ARRAY; + return TEX_TARGET_2D_MS; + case GLSL_SAMPLER_DIM_RECT: + if (isShadow) + return TEX_TARGET_RECT_SHADOW; + return TEX_TARGET_RECT; + case GLSL_SAMPLER_DIM_BUF: + return TEX_TARGET_BUFFER; + case GLSL_SAMPLER_DIM_EXTERNAL: + return TEX_TARGET_2D; + default: + ERROR("unknown glsl_sampler_dim %u\n", dim); + assert(false); + return TEX_TARGET_COUNT; + } +} +#undef CASE_SAMPLER + +Value* +Converter::applyProjection(Value *src, Value *proj) +{ + if (!proj) + return src; + return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj); +} + +bool +Converter::visit(nir_tex_instr *insn) +{ + switch (insn->op) { + case nir_texop_lod: + case nir_texop_query_levels: + case nir_texop_tex: + case nir_texop_texture_samples: + case nir_texop_tg4: + case nir_texop_txb: + case nir_texop_txd: + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txl: + case nir_texop_txs: { + LValues = convert(>dest); + std::vector srcs; + std::vector defs; + std::vector offsets; + uint8_t mask = 0; + bool lz = false; + Value *proj = NULL; + TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow); +
[Mesa-dev] [PATCH v8 20/33] nvir/nir: implement intrinsic_discard(_if)
Signed-off-by: Karol Herbst --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++ 1 file changed, 14 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index a410468fd50..2047abdf371 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1651,6 +1651,20 @@ Converter::visit(nir_intrinsic_instr *insn) loadImm(newDefs[1], mode); break; } + case nir_intrinsic_discard: + mkOp(OP_DISCARD, TYPE_NONE, NULL); + break; + case nir_intrinsic_discard_if: { + Value *pred = new_LValue(func, FILE_PREDICATE); + if (insn->num_components > 1) { + ERROR("nir_intrinsic_discard_if only with 1 component supported!\n"); + assert(false); + return false; + } + mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(>src[0], 0), zero); + mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred); + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 31/33] nvir/nir: add memory barriers
v5: add more barrier intrinsics Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index e354c32459b..ff9396ed715 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -506,6 +506,14 @@ Converter::getSubOp(nir_intrinsic_op op) CASE_OP_INTR_ATOM(and, AND); CASE_OP_INTR_ATOM(comp_swap, CAS); CASE_OP_INTR_ATOM(exchange, EXCH); + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + return NV50_IR_SUBOP_MEMBAR(M, GL); + case nir_intrinsic_memory_barrier_shared: + return NV50_IR_SUBOP_MEMBAR(M, CTA); CASE_OP_INTR_ATOM(or, OR); case nir_intrinsic_image_deref_atomic_max: CASE_OP_INTR_ATOM_S(imax, MAX); @@ -2302,6 +2310,17 @@ Converter::visit(nir_intrinsic_instr *insn) bar->subOp = NV50_IR_SUBOP_BAR_SYNC; break; } + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: { + Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL); + bar->fixed = 1; + bar->subOp = getSubOp(op); + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 22/33] nvir/nir: implement nir_ssa_undef_instr
v2: use mkOp v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp| 13 + 1 file changed, 13 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 2e98e3fcce0..637ec9d0664 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -141,6 +141,7 @@ private: bool visit(nir_jump_instr *); bool visit(nir_load_const_instr*); bool visit(nir_loop *); + bool visit(nir_ssa_undef_instr *); nir_shader *nir; @@ -1456,6 +1457,8 @@ Converter::visit(nir_instr *insn) return visit(nir_instr_as_jump(insn)); case nir_instr_type_load_const: return visit(nir_instr_as_load_const(insn)); + case nir_instr_type_ssa_undef: + return visit(nir_instr_as_ssa_undef(insn)); default: ERROR("unknown nir_instr type %u\n", insn->type); return false; @@ -2167,6 +2170,16 @@ Converter::visit(nir_alu_instr *insn) } #undef DEFAULT_CHECKS +bool +Converter::visit(nir_ssa_undef_instr *insn) +{ + LValues = convert(>def); + for (uint8_t i = 0u; i < insn->def.num_components; ++i) { + mkOp(OP_NOP, TYPE_NONE, newDefs[i]); + } + return true; +} + bool Converter::run() { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 11/33] nvir/nir: run assignSlots
v2: add support for geometry shaders set idx add some missing mappings fix for 64bit inputs/outputs fix up some FP color output index messup parse centroid flag v3: fix arrays in outputs as well fix input/ouput size calculation for tessellation shaders v4: add getSlotAddress helper fix for 64 bit typed inputs v5: change getSlotAddress interface for easier use fix sample inputs fix slot counting for mat v7: fix driver_location of images v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 626 ++ 1 file changed, 626 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 436c9f7b28b..c4fcc603861 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -67,6 +67,13 @@ private: uint32_t getIndirect(nir_src *, uint8_t, Value*&); uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&); + uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot); + + void setInterpolate(nv50_ir_varying *, + uint8_t, + bool centroid, + unsigned semantics); + bool isFloatType(nir_alu_type); bool isSignedType(nir_alu_type); bool isResultFloat(nir_op); @@ -79,6 +86,8 @@ private: std::vector getSTypes(nir_alu_instr*); DataType getSType(nir_src&, bool isFloat, bool isSigned); + bool assignSlots(); + nir_shader *nir; NirDefMap ssaDefs; @@ -301,6 +310,618 @@ Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *& return idx; } +static void +vert_attrib_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index) +{ + if (slot >= VERT_ATTRIB_GENERIC0) { + *name = TGSI_SEMANTIC_GENERIC; + *index = slot - VERT_ATTRIB_GENERIC0; + return; + } + + if (slot == VERT_ATTRIB_POINT_SIZE) { + ERROR("unknown vert attrib slot %u\n", slot); + assert(false); + return; + } + + if (slot >= VERT_ATTRIB_TEX0) { + *name = TGSI_SEMANTIC_TEXCOORD; + *index = slot - VERT_ATTRIB_TEX0; + return; + } + + switch (slot) { + case VERT_ATTRIB_COLOR0: + *name = TGSI_SEMANTIC_COLOR; + *index = 0; + break; + case VERT_ATTRIB_COLOR1: + *name = TGSI_SEMANTIC_COLOR; + *index = 1; + break; + case VERT_ATTRIB_EDGEFLAG: + *name = TGSI_SEMANTIC_EDGEFLAG; + *index = 0; + break; + case VERT_ATTRIB_FOG: + *name = TGSI_SEMANTIC_FOG; + *index = 0; + break; + case VERT_ATTRIB_NORMAL: + *name = TGSI_SEMANTIC_NORMAL; + *index = 0; + break; + case VERT_ATTRIB_POS: + *name = TGSI_SEMANTIC_POSITION; + *index = 0; + break; + default: + ERROR("unknown vert attrib slot %u\n", slot); + assert(false); + break; + } +} + +static void +varying_slot_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index) +{ + if (slot >= VARYING_SLOT_PATCH0) { + *name = TGSI_SEMANTIC_PATCH; + *index = slot - VARYING_SLOT_PATCH0; + return; + } + + if (slot >= VARYING_SLOT_VAR0) { + *name = TGSI_SEMANTIC_GENERIC; + *index = slot - VARYING_SLOT_VAR0; + return; + } + + if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) { + *name = TGSI_SEMANTIC_TEXCOORD; + *index = slot - VARYING_SLOT_TEX0; + return; + } + + switch (slot) { + case VARYING_SLOT_BFC0: + *name = TGSI_SEMANTIC_BCOLOR; + *index = 0; + break; + case VARYING_SLOT_BFC1: + *name = TGSI_SEMANTIC_BCOLOR; + *index = 1; + break; + case VARYING_SLOT_CLIP_DIST0: + *name = TGSI_SEMANTIC_CLIPDIST; + *index = 0; + break; + case VARYING_SLOT_CLIP_DIST1: + *name = TGSI_SEMANTIC_CLIPDIST; + *index = 1; + break; + case VARYING_SLOT_CLIP_VERTEX: + *name = TGSI_SEMANTIC_CLIPVERTEX; + *index = 0; + break; + case VARYING_SLOT_COL0: + *name = TGSI_SEMANTIC_COLOR; + *index = 0; + break; + case VARYING_SLOT_COL1: + *name = TGSI_SEMANTIC_COLOR; + *index = 1; + break; + case VARYING_SLOT_EDGE: + *name = TGSI_SEMANTIC_EDGEFLAG; + *index = 0; + break; + case VARYING_SLOT_FACE: + *name = TGSI_SEMANTIC_FACE; + *index = 0; + break; + case VARYING_SLOT_FOGC: + *name = TGSI_SEMANTIC_FOG; + *index = 0; + break; + case VARYING_SLOT_LAYER: + *name = TGSI_SEMANTIC_LAYER; + *index = 0; + break; + case VARYING_SLOT_PNTC: + *name = TGSI_SEMANTIC_PCOORD; + *index = 0; + break; + case VARYING_SLOT_POS: + *name = TGSI_SEMANTIC_POSITION; + *index = 0; + break; + case VARYING_SLOT_PRIMITIVE_ID: + *name = TGSI_SEMANTIC_PRIMID; + *index = 0; +
[Mesa-dev] [PATCH v8 28/33] nvir/nir: implement nir_intrinsic_load_ubo
v4: use loadFrom helper v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++ 1 file changed, 14 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index e5b73c1919c..994d8f3968a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1934,6 +1934,20 @@ Converter::visit(nir_intrinsic_instr *insn) mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; break; } + case nir_intrinsic_load_ubo: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectIndex; + Value *indirectOffset; + uint32_t index = getIndirect(>src[0], 0, indirectIndex) + 1; + uint32_t offset = getIndirect(>src[1], 0, indirectOffset); + + for (uint8_t i = 0u; i < insn->num_components; ++i) { + loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i, + indirectOffset, indirectIndex); + } + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 21/33] nvir/nir: implement loading system values
v2: support more sys values fixed a bug where for multi component reads all values ended up in x v3: add load_patch_vertices_in v4: add subgroup stuff v5: add helper invocation v6: fix loading 64 bit system values v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 122 ++ 1 file changed, 122 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 2047abdf371..2e98e3fcce0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -83,6 +83,7 @@ private: LValues& convert(nir_alu_dest *); BasicBlock* convert(nir_block *); LValues& convert(nir_dest *); + SVSemantic convert(nir_intrinsic_op); LValues& convert(nir_register *); LValues& convert(nir_ssa_def *); @@ -1462,6 +1463,70 @@ Converter::visit(nir_instr *insn) return true; } +SVSemantic +Converter::convert(nir_intrinsic_op intr) +{ + switch (intr) { + case nir_intrinsic_load_base_vertex: + return SV_BASEVERTEX; + case nir_intrinsic_load_base_instance: + return SV_BASEINSTANCE; + case nir_intrinsic_load_draw_id: + return SV_DRAWID; + case nir_intrinsic_load_front_face: + return SV_FACE; + case nir_intrinsic_load_helper_invocation: + return SV_THREAD_KILL; + case nir_intrinsic_load_instance_id: + return SV_INSTANCE_ID; + case nir_intrinsic_load_invocation_id: + return SV_INVOCATION_ID; + case nir_intrinsic_load_local_group_size: + return SV_NTID; + case nir_intrinsic_load_local_invocation_id: + return SV_TID; + case nir_intrinsic_load_num_work_groups: + return SV_NCTAID; + case nir_intrinsic_load_patch_vertices_in: + return SV_VERTEX_COUNT; + case nir_intrinsic_load_primitive_id: + return SV_PRIMITIVE_ID; + case nir_intrinsic_load_sample_id: + return SV_SAMPLE_INDEX; + case nir_intrinsic_load_sample_mask_in: + return SV_SAMPLE_MASK; + case nir_intrinsic_load_sample_pos: + return SV_SAMPLE_POS; + case nir_intrinsic_load_subgroup_eq_mask: + return SV_LANEMASK_EQ; + case nir_intrinsic_load_subgroup_ge_mask: + return SV_LANEMASK_GE; + case nir_intrinsic_load_subgroup_gt_mask: + return SV_LANEMASK_GT; + case nir_intrinsic_load_subgroup_le_mask: + return SV_LANEMASK_LE; + case nir_intrinsic_load_subgroup_lt_mask: + return SV_LANEMASK_LT; + case nir_intrinsic_load_subgroup_invocation: + return SV_LANEID; + case nir_intrinsic_load_tess_coord: + return SV_TESS_COORD; + case nir_intrinsic_load_tess_level_inner: + return SV_TESS_INNER; + case nir_intrinsic_load_tess_level_outer: + return SV_TESS_OUTER; + case nir_intrinsic_load_vertex_id: + return SV_VERTEX_ID; + case nir_intrinsic_load_work_group_id: + return SV_CTAID; + default: + ERROR("unknown SVSemantic for nir_intrinsic_op %s\n", +nir_intrinsic_infos[intr].name); + assert(false); + return SV_LAST; + } +} + bool Converter::visit(nir_intrinsic_instr *insn) { @@ -1665,6 +1730,63 @@ Converter::visit(nir_intrinsic_instr *insn) mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred); break; } + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_base_instance: + case nir_intrinsic_load_draw_id: + case nir_intrinsic_load_front_face: + case nir_intrinsic_load_helper_invocation: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_invocation_id: + case nir_intrinsic_load_local_group_size: + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_num_work_groups: + case nir_intrinsic_load_patch_vertices_in: + case nir_intrinsic_load_primitive_id: + case nir_intrinsic_load_sample_id: + case nir_intrinsic_load_sample_mask_in: + case nir_intrinsic_load_sample_pos: + case nir_intrinsic_load_subgroup_eq_mask: + case nir_intrinsic_load_subgroup_ge_mask: + case nir_intrinsic_load_subgroup_gt_mask: + case nir_intrinsic_load_subgroup_le_mask: + case nir_intrinsic_load_subgroup_lt_mask: + case nir_intrinsic_load_subgroup_invocation: + case nir_intrinsic_load_tess_coord: + case nir_intrinsic_load_tess_level_inner: + case nir_intrinsic_load_tess_level_outer: + case nir_intrinsic_load_vertex_id: + case nir_intrinsic_load_work_group_id: { + const DataType dType = getDType(insn); + SVSemantic sv = convert(op); + LValues = convert(>dest); + + for (uint8_t i = 0u; i < insn->num_components; ++i) { + Value *def; + if (typeSizeof(dType) == 8) +def = getSSA(); + else +def = newDefs[i]; + + if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) { +loadImm(def, 0u); + } else { +Symbol *sym = mkSysVal(sv, i); +Instruction
[Mesa-dev] [PATCH v8 18/33] nvir/nir: implement nir_intrinsic_store_(per_vertex_)output
v3: add workaround for RA issues indirects have to be multiplied by 0x10 fix indirect access v4: use smarter getIndirect helper use storeTo helper v5: don't use const_offset directly v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 46 +++ 1 file changed, 46 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index bcb2e8d28cc..32f33680bb4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1270,6 +1270,11 @@ Converter::visit(nir_function *function) setPosition(entry, true); + if (info->io.genUserClip > 0) { + for (int c = 0; c < 4; ++c) + clipVtx[c] = getScratch(); + } + switch (prog->getType()) { case Program::TYPE_TESSELLATION_CONTROL: outBase = mkOp2v( @@ -1296,6 +1301,9 @@ Converter::visit(nir_function *function) bb->cfg.attach(>cfg, Graph::Edge::TREE); setPosition(exit, true); + if (info->io.genUserClip > 0) + handleUserClipPlanes(); + /* TODO: for non main function this needs to be a OP_RETURN */ mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; return true; @@ -1470,6 +1478,44 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: { + Value *indirect; + DataType dType = getSType(insn->src[0], false, false); + uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect); + + for (uint8_t i = 0u; i < insn->num_components; ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) +continue; + + uint8_t offset = 0; + Value *src = getSrc(>src[0], i); + switch (prog->getType()) { + case Program::TYPE_FRAGMENT: { +if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) { + /* TGSI uses a different interface than NIR, TGSI stores that +* value in the z component, NIR in X +*/ + offset += 2; + src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src); +} +break; + } + case Program::TYPE_VERTEX: { +if (info->io.genUserClip > 0) { + mkMov(clipVtx[i], src); + src = clipVtx[i]; +} +break; + } + default: +break; + } + + storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect); + } + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 12/33] nvir/nir: add loadFrom and storeTo helpler
v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 72 +++ 1 file changed, 72 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index c4fcc603861..aa484df8fea 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -74,6 +74,13 @@ private: bool centroid, unsigned semantics); + Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base, + uint8_t c, Value *indirect0 = NULL, + Value *indirect1 = NULL, bool patch = false); + void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType, +Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL, +Value *indirect1 = NULL); + bool isFloatType(nir_alu_type); bool isSignedType(nir_alu_type); bool isResultFloat(nir_op); @@ -922,6 +929,71 @@ Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot) return vary[idx].slot[slot] * 4; } +Instruction * +Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def, +uint32_t base, uint8_t c, Value *indirect0, +Value *indirect1, bool patch) +{ + unsigned int tySize = typeSizeof(ty); + + if (tySize == 8 && + (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) { + Value *lo = getSSA(); + Value *hi = getSSA(); + + Instruction *loi = + mkLoad(TYPE_U32, lo, +mkSymbol(file, i, TYPE_U32, base + c * tySize), +indirect0); + loi->setIndirect(0, 1, indirect1); + loi->perPatch = patch; + + Instruction *hii = + mkLoad(TYPE_U32, hi, +mkSymbol(file, i, TYPE_U32, base + c * tySize + 4), +indirect0); + hii->setIndirect(0, 1, indirect1); + hii->perPatch = patch; + + return mkOp2(OP_MERGE, ty, def, lo, hi); + } else { + Instruction *ld = + mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0); + ld->setIndirect(0, 1, indirect1); + ld->perPatch = patch; + return ld; + } +} + +void +Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, + DataType ty, Value *src, uint8_t idx, uint8_t c, + Value *indirect0, Value *indirect1) +{ + uint8_t size = typeSizeof(ty); + uint32_t address = getSlotAddress(insn, idx, c); + + if (size == 8 && indirect0) { + Value *split[2]; + mkSplit(split, 4, src); + + if (op == OP_EXPORT) { + split[0] = mkMov(getSSA(), split[0], ty)->getDef(0); + split[1] = mkMov(getSSA(), split[1], ty)->getDef(0); + } + + mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0, + split[0])->perPatch = info->out[idx].patch; + mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0, + split[1])->perPatch = info->out[idx].patch; + } else { + if (op == OP_EXPORT) + src = mkMov(getSSA(size), src, ty)->getDef(0); + mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0, + src)->perPatch = info->out[idx].patch; + } +} + bool Converter::run() { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 24/33] nvir/nir: add getOperation for intrinsics
v7: don't assert in default case Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 22 +++ 1 file changed, 22 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 9c9cc533006..e79871c1ebe 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -122,10 +122,12 @@ private: std::vector getSTypes(nir_alu_instr*); DataType getSType(nir_src&, bool isFloat, bool isSigned); + operation getOperation(nir_intrinsic_op); operation getOperation(nir_op); operation getOperation(nir_texop); operation preOperationNeeded(nir_op); + int getSubOp(nir_intrinsic_op); int getSubOp(nir_op); CondCode getCondCode(nir_op); @@ -403,6 +405,17 @@ Converter::getOperation(nir_texop op) } } +operation +Converter::getOperation(nir_intrinsic_op op) +{ + switch (op) { + default: + ERROR("couldn't get operation for nir_intrinsic_op %u\n", op); + assert(false); + return OP_NOP; + } +} + operation Converter::preOperationNeeded(nir_op op) { @@ -425,6 +438,15 @@ Converter::getSubOp(nir_op op) } } +int +Converter::getSubOp(nir_intrinsic_op op) +{ + switch (op) { + default: + return 0; + } +} + CondCode Converter::getCondCode(nir_op op) { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 25/33] nvir/nir: implement vote and ballot
v2: add vote_eq support use the new subop intrinsic helper add ballot v3: add read_(first_)invocation v8: handle vectorized intrinsics don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 44 +++ 1 file changed, 44 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index e79871c1ebe..a8e126a5730 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -442,6 +442,12 @@ int Converter::getSubOp(nir_intrinsic_op op) { switch (op) { + case nir_intrinsic_vote_all: + return NV50_IR_SUBOP_VOTE_ALL; + case nir_intrinsic_vote_any: + return NV50_IR_SUBOP_VOTE_ANY; + case nir_intrinsic_vote_ieq: + return NV50_IR_SUBOP_VOTE_UNI; default: return 0; } @@ -1850,6 +1856,44 @@ Converter::visit(nir_intrinsic_instr *insn) loadImm(newDefs[0], 32u); break; } + case nir_intrinsic_vote_all: + case nir_intrinsic_vote_any: + case nir_intrinsic_vote_ieq: { + LValues = convert(>dest); + Value *pred = new_LValue(func, FILE_PREDICATE); + mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), zero); + mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op); + mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred); + break; + } + case nir_intrinsic_ballot: { + LValues = convert(>dest); + Value *pred = new_LValue(func, FILE_PREDICATE); + mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), zero); + Instruction *ballot = mkOp1(OP_VOTE, TYPE_U32, getSSA(), pred); + ballot->subOp = NV50_IR_SUBOP_VOTE_ANY; + mkOp2(OP_MERGE, TYPE_U64, newDefs[0], ballot->getDef(0), loadImm(getSSA(), 0)); + break; + } + case nir_intrinsic_read_first_invocation: + case nir_intrinsic_read_invocation: { + LValues = convert(>dest); + const DataType dType = getDType(insn); + Value *tmp = getScratch(); + + if (op == nir_intrinsic_read_first_invocation) { + mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; + mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; + } else + tmp = getSrc(>src[1], 0); + + for (uint8_t i = 0; i < insn->num_components; ++i) { + mkOp3(OP_SHFL, dType, newDefs[i], getSrc(>src[0], i), tmp, mkImm(0x1f)) +->subOp = NV50_IR_SUBOP_SHFL_IDX; + } + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 27/33] nvir/nir: implement geometry shader nir_intrinsics
v4: use smarter getIndirect helper use new getSlotAddress helper use loadFrom helper v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 27 +++ 1 file changed, 27 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index d3188aa9872..e5b73c1919c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -411,6 +411,10 @@ operation Converter::getOperation(nir_intrinsic_op op) { switch (op) { + case nir_intrinsic_emit_vertex: + return OP_EMIT; + case nir_intrinsic_end_primitive: + return OP_RESTART; default: ERROR("couldn't get operation for nir_intrinsic_op %u\n", op); assert(false); @@ -1907,6 +1911,29 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_load_per_vertex_input: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectVertex; + Value *indirectOffset; + uint32_t baseVertex = getIndirect(>src[0], 0, indirectVertex); + uint32_t idx = getIndirect(insn, 1, 0, indirectOffset); + + Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), + mkImm(baseVertex), indirectVertex); + for (uint8_t i = 0u; i < insn->num_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0, + indirectOffset, vtxBase, info->in[idx].patch); + } + break; + } + case nir_intrinsic_emit_vertex: + case nir_intrinsic_end_primitive: { + uint32_t idx = nir_intrinsic_stream_id(insn); + mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 32/33] nvir/nir: implement load_per_vertex_output
v4: use smarter getIndirect helper use new getSlotAddress helper v5: use loadFrom helper v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 23 +++ 1 file changed, 23 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index ff9396ed715..f4875113d00 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -2069,6 +2069,29 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_load_per_vertex_output: { + const DataType dType = getDType(insn); + LValues = convert(>dest); + Value *indirectVertex; + Value *indirectOffset; + uint32_t baseVertex = getIndirect(>src[0], 0, indirectVertex); + uint32_t idx = getIndirect(insn, 1, 0, indirectOffset); + Value *vtxBase = NULL; + + if (indirectVertex) + vtxBase = indirectVertex; + else + vtxBase = loadImm(NULL, baseVertex); + + vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase); + + for (uint8_t i = 0u; i < insn->num_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0, + indirectOffset, vtxBase, info->in[idx].patch); + } + break; + } case nir_intrinsic_emit_vertex: case nir_intrinsic_end_primitive: { uint32_t idx = nir_intrinsic_stream_id(insn); -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 13/33] nvir/nir: parse NIR shader info
v2: parse a few more fields v3: add special handling for GL_ISOLINES v8: set info->prop.fp.readsSampleLocations don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 325 +- 1 file changed, 323 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index aa484df8fea..9ac98e04562 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -53,8 +53,10 @@ public: private: typedef std::vector LValues; typedef std::tr1::unordered_map NirDefMap; + typedef std::tr1::unordered_map NirBlockMap; LValues& convert(nir_alu_dest *); + BasicBlock* convert(nir_block *); LValues& convert(nir_dest *); LValues& convert(nir_register *); LValues& convert(nir_ssa_def *); @@ -94,16 +96,48 @@ private: DataType getSType(nir_src&, bool isFloat, bool isSigned); bool assignSlots(); + bool parseNIR(); + + bool visit(nir_block *); + bool visit(nir_cf_node *); + bool visit(nir_function *); + bool visit(nir_if *); + bool visit(nir_instr *); + bool visit(nir_jump_instr *); + bool visit(nir_loop *); nir_shader *nir; NirDefMap ssaDefs; NirDefMap regDefs; + NirBlockMap blocks; + unsigned int curLoopDepth; + + BasicBlock *exit; + + union { + struct { + Value *position; + } fp; + }; }; Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), - nir(nir) {} + nir(nir), + curLoopDepth(0) {} + +BasicBlock * +Converter::convert(nir_block *block) +{ + NirBlockMap::iterator it = blocks.find(block->index); + if (it != blocks.end()) + return (*it).second; + + BasicBlock *bb = new BasicBlock(func); + blocks[block->index] = bb; + return bb; +} bool Converter::isFloatType(nir_alu_type type) @@ -994,6 +1028,283 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, } } +bool +Converter::parseNIR() +{ + info->io.clipDistances = nir->info.clip_distance_array_size; + info->io.cullDistances = nir->info.cull_distance_array_size; + + switch(prog->getType()) { + case Program::TYPE_COMPUTE: + info->prop.cp.numThreads[0] = nir->info.cs.local_size[0]; + info->prop.cp.numThreads[1] = nir->info.cs.local_size[1]; + info->prop.cp.numThreads[2] = nir->info.cs.local_size[2]; + info->bin.smemSize = nir->info.cs.shared_size; + break; + case Program::TYPE_FRAGMENT: + info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests; + info->prop.fp.persampleInvocation = + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) || + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); + info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; + info->prop.fp.readsSampleLocations = + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); + info->prop.fp.usesDiscard = nir->info.fs.uses_discard; + info->prop.fp.usesSampleMaskIn = + !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN); + break; + case Program::TYPE_GEOMETRY: + info->prop.gp.inputPrim = nir->info.gs.input_primitive; + info->prop.gp.instanceCount = nir->info.gs.invocations; + info->prop.gp.maxVertices = nir->info.gs.vertices_out; + info->prop.gp.outputPrim = nir->info.gs.output_primitive; + break; + case Program::TYPE_TESSELLATION_CONTROL: + case Program::TYPE_TESSELLATION_EVAL: + if (nir->info.tess.primitive_mode == GL_ISOLINES) + info->prop.tp.domain = GL_LINES; + else + info->prop.tp.domain = nir->info.tess.primitive_mode; + info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out; + info->prop.tp.outputPrim = + nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES; + info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3; + info->prop.tp.winding = !nir->info.tess.ccw; + break; + case Program::TYPE_VERTEX: + info->prop.vp.usesDrawParameters = + (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) || + (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) || + (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)); + break; + default: + break; + } + + return true; +} + +bool +Converter::visit(nir_function *function) +{ + /* we only support emiting the main function for now */ + assert(!strcmp(function->name, "main")); + assert(function->impl); + + /* usually the blocks will set everything up, but main is special */ + BasicBlock *entry = new BasicBlock(prog->main); + exit = new BasicBlock(prog->main); + blocks[nir_start_block(function->impl)->index] = entry; +
[Mesa-dev] [PATCH v8 17/33] nvir/nir: implement nir_intrinsic_load_uniform
v2: use new getIndirect helper fixes symbols for 64 bit types v4: use smarter getIndirect helper simplify address calculation use loadFrom helper v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 0ae7363b161..bcb2e8d28cc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1460,6 +1460,16 @@ Converter::visit(nir_intrinsic_instr *insn) nir_intrinsic_op op = insn->intrinsic; switch (op) { + case nir_intrinsic_load_uniform: { + LValues = convert(>dest); + const DataType dType = getDType(insn); + Value *indirect; + uint32_t coffset = getIndirect(insn, 0, 0, indirect); + for (uint8_t i = 0; i < insn->num_components; ++i) { + loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect); + } + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 10/33] nvir/nir: add nir type helper functions
v4: treat imul as unsigned v5: remove pointless !! v7: inot is unsigned as well v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 115 ++ 1 file changed, 115 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index b7fab016b19..436c9f7b28b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -67,6 +67,18 @@ private: uint32_t getIndirect(nir_src *, uint8_t, Value*&); uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&); + bool isFloatType(nir_alu_type); + bool isSignedType(nir_alu_type); + bool isResultFloat(nir_op); + bool isResultSigned(nir_op); + + DataType getDType(nir_alu_instr*); + DataType getDType(nir_intrinsic_instr*); + DataType getDType(nir_op, uint8_t); + + std::vector getSTypes(nir_alu_instr*); + DataType getSType(nir_src&, bool isFloat, bool isSigned); + nir_shader *nir; NirDefMap ssaDefs; @@ -77,6 +89,109 @@ Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), nir(nir) {} +bool +Converter::isFloatType(nir_alu_type type) +{ + return nir_alu_type_get_base_type(type) == nir_type_float; +} + +bool +Converter::isSignedType(nir_alu_type type) +{ + return nir_alu_type_get_base_type(type) == nir_type_int; +} + +bool +Converter::isResultFloat(nir_op op) +{ + const nir_op_info = nir_op_infos[op]; + if (info.output_type != nir_type_invalid) + return isFloatType(info.output_type); + + ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name); + assert(false); + return true; +} + +bool +Converter::isResultSigned(nir_op op) +{ + switch (op) { + /* there is no umul and we get wrong results if we treat all muls as signed */ + case nir_op_imul: + case nir_op_inot: + return false; + default: + const nir_op_info = nir_op_infos[op]; + if (info.output_type != nir_type_invalid) + return isSignedType(info.output_type); + ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name); + assert(false); + return true; + } +} + +DataType +Converter::getDType(nir_alu_instr *insn) +{ + if (insn->dest.dest.is_ssa) + return getDType(insn->op, insn->dest.dest.ssa.bit_size); + else + return getDType(insn->op, insn->dest.dest.reg.reg->bit_size); +} + +DataType +Converter::getDType(nir_intrinsic_instr *insn) +{ + if (insn->dest.is_ssa) + return typeOfSize(insn->dest.ssa.bit_size / 8, false, false); + else + return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false); +} + +DataType +Converter::getDType(nir_op op, uint8_t bitSize) +{ + DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op)); + if (ty == TYPE_NONE) { + ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize); + assert(false); + } + return ty; +} + +std::vector +Converter::getSTypes(nir_alu_instr *insn) +{ + const nir_op_info = nir_op_infos[insn->op]; + std::vector res(info.num_inputs); + + for (uint8_t i = 0; i < info.num_inputs; ++i) { + if (info.input_types[i] != nir_type_invalid) { + res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i])); + } else { + ERROR("getSType not implemented for %s idx %u\n", info.name, i); + assert(false); + res[i] = TYPE_NONE; + break; + } + } + + return res; +} + +DataType +Converter::getSType(nir_src , bool isFloat, bool isSigned) +{ + uint8_t bitSize; + if (src.is_ssa) + bitSize = src.ssa->bit_size; + else + bitSize = src.reg.reg->bit_size; + + return typeOfSize(bitSize / 8, isFloat, isSigned); +} + Converter::LValues& Converter::convert(nir_dest *dest) { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 19/33] nvir/nir: implement load_(interpolated_)input/output
v3: and load_output v4: use smarter getIndirect helper use new getSlotAddress helper v5: don't use const_offset directly fix for indirects v6: add support for interpolateAt v7: fix compiler warnings add load_barycentric_sample handle load_output for fragment shaders v8: set info->prop.fp.readsSampleLocations for at_sample interpolation don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 135 ++ 1 file changed, 135 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 32f33680bb4..a410468fd50 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1516,6 +1516,141 @@ Converter::visit(nir_intrinsic_instr *insn) } break; } + case nir_intrinsic_load_input: + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_output: { + LValues = convert(>dest); + + /* FBFetch */ + if (prog->getType() == Program::TYPE_FRAGMENT && + op == nir_intrinsic_load_output) { + std::vector defs, srcs; + uint8_t mask = 0; + + srcs.push_back(getSSA()); + srcs.push_back(getSSA()); + Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0)); + Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1)); + mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z; + mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z; + + srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0))); + srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0))); + + for (uint8_t i = 0u; i < insn->num_components; ++i) { +defs.push_back(newDefs[i]); +mask |= 1 << i; + } + + TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs); + texi->tex.levelZero = 1; + texi->tex.mask = mask; + texi->tex.useOffsets = 0; + texi->tex.r = 0x; + texi->tex.s = 0x; + + info->prop.fp.readsFramebuffer = true; + break; + } + + const DataType dType = getDType(insn); + Value *indirect; + bool input = op != nir_intrinsic_load_output; + operation nvirOp; + uint32_t mode = 0; + + uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect); + nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx]; + + /* see load_barycentric_* handling */ + if (prog->getType() == Program::TYPE_FRAGMENT) { + mode = translateInterpMode(, nvirOp); + if (op == nir_intrinsic_load_interpolated_input) { +ImmediateValue immMode; +if (getSrc(>src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode)) + mode |= immMode.reg.data.u32; + } + } + + for (uint8_t i = 0u; i < insn->num_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address); + if (prog->getType() == Program::TYPE_FRAGMENT) { +int s = 1; +if (typeSizeof(dType) == 8) { + Value *lo = getSSA(); + Value *hi = getSSA(); + Instruction *interp; + + interp = mkOp1(nvirOp, TYPE_U32, lo, sym); + if (nvirOp == OP_PINTERP) + interp->setSrc(s, fp.position); + if (mode & NV50_IR_INTERP_OFFSET) + interp->setSrc(s, getSrc(>src[0], 0)); + interp->setInterpolate(mode); + interp->setIndirect(0, 0, indirect); + + Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4); + interp = mkOp1(nvirOp, TYPE_U32, hi, sym1); + if (nvirOp == OP_PINTERP) + interp->setSrc(s++, fp.position); + if (mode & NV50_IR_INTERP_OFFSET) + interp->setSrc(s++, getSrc(>src[0], 0)); + interp->setInterpolate(mode); + interp->setIndirect(0, 0, indirect); + + mkOp2(OP_MERGE, dType, newDefs[i], lo, hi); +} else { + Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym); + if (nvirOp == OP_PINTERP) + interp->setSrc(s++, fp.position); + if (mode & NV50_IR_INTERP_OFFSET) + interp->setSrc(s++, getSrc(>src[0], 0)); + interp->setInterpolate(mode); + interp->setIndirect(0, 0, indirect); +} + } else { +mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch; + } + } + break; + }
[Mesa-dev] [PATCH v8 05/33] nouveau: add support for nir
not all those nir options are actually required, it just made the work a little easier. v2: fix asserts parse compute shaders don't lower bitfield_insert v3: fix memory leak v4: don't lower fmod32 v5: set lower_all_io_to_temps to false fix memory leak because we take over ownership of the nir shader merge: use the lowering helper v6: include TGSI debug header for proper assert call add nv50 support v7: fix Automake build v8: free shader only for the set shader type Signed-off-by: Karol Herbst --- src/gallium/drivers/nouveau/Automake.inc | 3 + src/gallium/drivers/nouveau/Makefile.am | 5 ++ src/gallium/drivers/nouveau/Makefile.sources | 1 + .../drivers/nouveau/codegen/nv50_ir.cpp | 3 + src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + .../nouveau/codegen/nv50_ir_from_nir.cpp | 76 +++ src/gallium/drivers/nouveau/meson.build | 9 ++- .../drivers/nouveau/nv50/nv50_program.c | 19 - .../drivers/nouveau/nv50/nv50_screen.c| 40 ++ src/gallium/drivers/nouveau/nv50/nv50_state.c | 35 - .../drivers/nouveau/nvc0/nvc0_program.c | 18 - .../drivers/nouveau/nvc0/nvc0_screen.c| 42 +- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 31 +++- 13 files changed, 267 insertions(+), 16 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp diff --git a/src/gallium/drivers/nouveau/Automake.inc b/src/gallium/drivers/nouveau/Automake.inc index 1d383fcb7b1..657790494dc 100644 --- a/src/gallium/drivers/nouveau/Automake.inc +++ b/src/gallium/drivers/nouveau/Automake.inc @@ -8,4 +8,7 @@ TARGET_LIB_DEPS += \ $(NOUVEAU_LIBS) \ $(LIBDRM_LIBS) +TARGET_COMPILER_LIB_DEPS = \ + $(top_builddir)/src/compiler/nir/libnir.la + endif diff --git a/src/gallium/drivers/nouveau/Makefile.am b/src/gallium/drivers/nouveau/Makefile.am index f6126b54481..478dfcf437b 100644 --- a/src/gallium/drivers/nouveau/Makefile.am +++ b/src/gallium/drivers/nouveau/Makefile.am @@ -25,6 +25,10 @@ include $(top_srcdir)/src/gallium/Automake.inc AM_CPPFLAGS = \ -I$(top_srcdir)/include/drm-uapi \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_srcdir)/src/compiler/nir \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ $(GALLIUM_DRIVER_CFLAGS) \ $(LIBDRM_CFLAGS) \ $(NOUVEAU_CFLAGS) @@ -47,6 +51,7 @@ nouveau_compiler_SOURCES = \ nouveau_compiler_LDADD = \ libnouveau.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/util/libmesautil.la \ $(GALLIUM_COMMON_LIB_DEPS) diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index ec344c63169..c6a1aff7110 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -117,6 +117,7 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_emit_nv50.cpp \ codegen/nv50_ir_from_common.cpp \ codegen/nv50_ir_from_common.h \ + codegen/nv50_ir_from_nir.cpp \ codegen/nv50_ir_from_tgsi.cpp \ codegen/nv50_ir_graph.cpp \ codegen/nv50_ir_graph.h \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 49425b98b91..042091c7c04 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1240,6 +1240,9 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) prog->optLevel = info->optLevel; switch (info->bin.sourceRep) { + case PIPE_SHADER_IR_NIR: + ret = prog->makeFromNIR(info) ? 0 : -2; + break; case PIPE_SHADER_IR_TGSI: ret = prog->makeFromTGSI(info) ? 0 : -2; break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index f4f3c708886..e5b4592a61e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -1255,6 +1255,7 @@ public: inline void del(Function *fn, int& id) { allFuncs.remove(id); } inline void add(Value *rval, int& id) { allRValues.insert(rval, id); } + bool makeFromNIR(struct nv50_ir_prog_info *); bool makeFromTGSI(struct nv50_ir_prog_info *); bool convertToSSA(); bool optimizeSSA(int level); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp new file mode 100644 index 000..b22c62fd434 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -0,0 +1,76 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including
[Mesa-dev] [PATCH v8 08/33] nvir/nir: run some passes to make the conversion easier
v2: add constant_folding v6: print non final NIR only for verbose debugging v8: add passes we will need for OpenCL compute shaders Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 41 +++ 1 file changed, 41 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index b22c62fd434..a04314afe19 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -31,6 +31,12 @@ #include "codegen/nv50_ir_lowering_helper.h" #include "codegen/nv50_ir_util.h" +static int +type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + namespace { using namespace nv50_ir; @@ -52,6 +58,41 @@ Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) bool Converter::run() { + bool progress; + + if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) + nir_print_shader(nir, stderr); + + NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, (nir_lower_io_options)0); + NIR_PASS_V(nir, nir_lower_regs_to_ssa); + NIR_PASS_V(nir, nir_lower_load_const_to_scalar); + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + NIR_PASS_V(nir, nir_lower_alu_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar); + + do { + progress = false; + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_opt_trivial_continues); + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_dead_cf); + } while (progress); + + NIR_PASS_V(nir, nir_lower_locals_to_regs); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_local); + NIR_PASS_V(nir, nir_convert_from_ssa, true); + + /* Garbage collect dead instructions */ + nir_sweep(nir); + + if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) + nir_print_shader(nir, stderr); + return false; } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 09/33] nvir/nir: track defs and provide easy access functions
v2: add helper function for indirects v4: add new getIndirect overload for easier use v5: use getSSA for ssa values we can just create the values for unassigned registers in getSrc v6: always create at least 32 bit values v8: don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 131 ++ 1 file changed, 131 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index a04314afe19..b7fab016b19 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -31,6 +31,9 @@ #include "codegen/nv50_ir_lowering_helper.h" #include "codegen/nv50_ir_util.h" +#include +#include + static int type_size(const struct glsl_type *type) { @@ -48,13 +51,141 @@ public: bool run(); private: + typedef std::vector LValues; + typedef std::tr1::unordered_map NirDefMap; + + LValues& convert(nir_alu_dest *); + LValues& convert(nir_dest *); + LValues& convert(nir_register *); + LValues& convert(nir_ssa_def *); + + Value* getSrc(nir_alu_src *, uint8_t component = 0); + Value* getSrc(nir_register *, uint8_t); + Value* getSrc(nir_src *, uint8_t, bool indirect = false); + Value* getSrc(nir_ssa_def *, uint8_t); + + uint32_t getIndirect(nir_src *, uint8_t, Value*&); + uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&); + nir_shader *nir; + + NirDefMap ssaDefs; + NirDefMap regDefs; }; Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), nir(nir) {} +Converter::LValues& +Converter::convert(nir_dest *dest) +{ + if (dest->is_ssa) + return convert(>ssa); + if (dest->reg.indirect) { + ERROR("no support for indirects."); + assert(false); + } + return convert(dest->reg.reg); +} + +Converter::LValues& +Converter::convert(nir_register *reg) +{ + NirDefMap::iterator it = regDefs.find(reg->index); + if (it != regDefs.end()) + return (*it).second; + + LValues newDef(reg->num_components); + for (uint8_t i = 0; i < reg->num_components; i++) + newDef[i] = getScratch(std::max(4, reg->bit_size / 8)); + return regDefs[reg->index] = newDef; +} + +Converter::LValues& +Converter::convert(nir_ssa_def *def) +{ + NirDefMap::iterator it = ssaDefs.find(def->index); + if (it != ssaDefs.end()) + return (*it).second; + + LValues newDef(def->num_components); + for (uint8_t i = 0; i < def->num_components; i++) + newDef[i] = getSSA(std::max(4, def->bit_size / 8)); + return ssaDefs[def->index] = newDef; +} + +Value* +Converter::getSrc(nir_alu_src *src, uint8_t component) +{ + if (src->abs || src->negate) { + ERROR("modifiers currently not supported on nir_alu_src\n"); + assert(false); + } + return getSrc(>src, src->swizzle[component]); +} + +Value* +Converter::getSrc(nir_register *reg, uint8_t idx) +{ + NirDefMap::iterator it = regDefs.find(reg->index); + if (it == regDefs.end()) + return convert(reg)[idx]; + return (*it).second[idx]; +} + +Value* +Converter::getSrc(nir_src *src, uint8_t idx, bool indirect) +{ + if (src->is_ssa) + return getSrc(src->ssa, idx); + + if (src->reg.indirect) { + if (indirect) + return getSrc(src->reg.indirect, idx); + ERROR("no support for indirects."); + assert(false); + return NULL; + } + + return getSrc(src->reg.reg, idx); +} + +Value* +Converter::getSrc(nir_ssa_def *src, uint8_t idx) +{ + NirDefMap::iterator it = ssaDefs.find(src->index); + if (it == ssaDefs.end()) { + ERROR("SSA value %u not found\n", src->index); + assert(false); + return NULL; + } + return (*it).second[idx]; +} + +uint32_t +Converter::getIndirect(nir_src *src, uint8_t idx, Value *) +{ + nir_const_value *offset = nir_src_as_const_value(*src); + + if (offset) { + indirect = NULL; + return offset->u32[0]; + } + + indirect = getSrc(src, idx, true); + return 0; +} + +uint32_t +Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *) +{ + int32_t idx = nir_intrinsic_base(insn) + getIndirect(>src[s], c, indirect); + if (indirect) + indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4)); + return idx; +} + bool Converter::run() { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 16/33] nvir/nir: implement nir_alu_instr handling
v2: user bitfield_insert instead of bfi rework switch helper macros remove some lowering code (LoweringHelper is now used for this) v3: add pack_half_2x16_split add unpack_half_2x16_split_x/y v5: replace first argument with nullptr in loadImm calls prefer getSSA over getScratch v8: fix setting precise modifier for first instruction inside a block add guard in case no instruction gets inserted into an empty block don't require C++11 features Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 492 +- 1 file changed, 491 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index a990acfc354..0ae7363b161 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -34,6 +34,31 @@ #include #include +#define CASE_OPFI(ni) \ + case nir_op_f ## ni : \ + case nir_op_i ## ni +#define CASE_OPFIU(ni) \ + case nir_op_f ## ni : \ + case nir_op_i ## ni : \ + case nir_op_u ## ni +#define CASE_OPIU(ni) \ + case nir_op_i ## ni : \ + case nir_op_u ## ni + +#define CASE_OPFI_RET(ni, val) \ + case nir_op_f ## ni : \ + case nir_op_i ## ni : \ + return val +#define CASE_OPFIU_RET(ni, val) \ + case nir_op_f ## ni : \ + case nir_op_i ## ni : \ + case nir_op_u ## ni : \ + return val +#define CASE_OPIU_RET(ni, val) \ + case nir_op_i ## ni : \ + case nir_op_u ## ni : \ + return val + static int type_size(const struct glsl_type *type) { @@ -95,9 +120,17 @@ private: std::vector getSTypes(nir_alu_instr*); DataType getSType(nir_src&, bool isFloat, bool isSigned); + operation getOperation(nir_op); + operation preOperationNeeded(nir_op); + + int getSubOp(nir_op); + + CondCode getCondCode(nir_op); + bool assignSlots(); bool parseNIR(); + bool visit(nir_alu_instr *); bool visit(nir_block *); bool visit(nir_cf_node *); bool visit(nir_function *); @@ -116,6 +149,7 @@ private: unsigned int curLoopDepth; BasicBlock *exit; + Value *zero; union { struct { @@ -127,7 +161,10 @@ private: Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) : ConverterCommon(prog, info), nir(nir), - curLoopDepth(0) {} + curLoopDepth(0) +{ + zero = mkImm((uint32_t)0); +} BasicBlock * Converter::convert(nir_block *block) @@ -244,6 +281,137 @@ Converter::getSType(nir_src , bool isFloat, bool isSigned) return typeOfSize(bitSize / 8, isFloat, isSigned); } +operation +Converter::getOperation(nir_op op) +{ + switch (op) { + // basic ops with float and int variants + CASE_OPFI_RET(abs, OP_ABS); + CASE_OPFI_RET(add, OP_ADD); + CASE_OPFI_RET(and, OP_AND); + CASE_OPFIU_RET(div, OP_DIV); + CASE_OPIU_RET(find_msb, OP_BFIND); + CASE_OPFIU_RET(max, OP_MAX); + CASE_OPFIU_RET(min, OP_MIN); + CASE_OPFIU_RET(mod, OP_MOD); + CASE_OPFI_RET(rem, OP_MOD); + CASE_OPFI_RET(mul, OP_MUL); + CASE_OPIU_RET(mul_high, OP_MUL); + CASE_OPFI_RET(neg, OP_NEG); + CASE_OPFI_RET(not, OP_NOT); + CASE_OPFI_RET(or, OP_OR); + CASE_OPFI_RET(eq, OP_SET); + CASE_OPFIU_RET(ge, OP_SET); + CASE_OPFIU_RET(lt, OP_SET); + CASE_OPFI_RET(ne, OP_SET); + CASE_OPIU_RET(shr, OP_SHR); + CASE_OPFI_RET(sub, OP_SUB); + CASE_OPFI_RET(xor, OP_XOR); + case nir_op_fceil: + return OP_CEIL; + case nir_op_fcos: + return OP_COS; + case nir_op_f2f32: + case nir_op_f2f64: + case nir_op_f2i32: + case nir_op_f2i64: + case nir_op_f2u32: + case nir_op_f2u64: + case nir_op_i2f32: + case nir_op_i2f64: + case nir_op_i2i32: + case nir_op_i2i64: + case nir_op_u2f32: + case nir_op_u2f64: + case nir_op_u2u32: + case nir_op_u2u64: + return OP_CVT; + case nir_op_fddx: + case nir_op_fddx_coarse: + case nir_op_fddx_fine: + return OP_DFDX; + case nir_op_fddy: + case nir_op_fddy_coarse: + case nir_op_fddy_fine: + return OP_DFDY; + case nir_op_fexp2: + return OP_EX2; + case nir_op_ffloor: + return OP_FLOOR; + case nir_op_ffma: + return OP_FMA; + case nir_op_flog2: + return OP_LG2; + case nir_op_pack_64_2x32_split: + return OP_MERGE; + case nir_op_frcp: + return OP_RCP; + case nir_op_frsq: + return OP_RSQ; + case nir_op_fsat: + return OP_SAT; + case nir_op_ishl: + return OP_SHL; + case nir_op_fsin: + return OP_SIN; + case nir_op_fsqrt: + return OP_SQRT; + case nir_op_ftrunc: + return OP_TRUNC; + default: + ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name); + assert(false); + return OP_NOP; + } +} + +operation +Converter::preOperationNeeded(nir_op op) +{ + switch (op) { + case nir_op_fcos: + case nir_op_fsin: + return OP_PRESIN; + default: + return OP_NOP; + } +} + +int
[Mesa-dev] [PATCH v8 15/33] nvir/nir: add skeleton for nir_intrinsic_instr
Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp| 17 + 1 file changed, 17 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 3084f32abb4..a990acfc354 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -103,6 +103,7 @@ private: bool visit(nir_function *); bool visit(nir_if *); bool visit(nir_instr *); + bool visit(nir_intrinsic_instr *); bool visit(nir_jump_instr *); bool visit(nir_load_const_instr*); bool visit(nir_loop *); @@ -1270,6 +1271,8 @@ bool Converter::visit(nir_instr *insn) { switch (insn->type) { + case nir_instr_type_intrinsic: + return visit(nir_instr_as_intrinsic(insn)); case nir_instr_type_jump: return visit(nir_instr_as_jump(insn)); case nir_instr_type_load_const: @@ -1281,6 +1284,20 @@ Converter::visit(nir_instr *insn) return true; } +bool +Converter::visit(nir_intrinsic_instr *insn) +{ + nir_intrinsic_op op = insn->intrinsic; + + switch (op) { + default: + ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); + return false; + } + + return true; +} + bool Converter::visit(nir_jump_instr *insn) { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 14/33] nvir/nir: implement nir_load_const_instr
v8: fix loading 8/16 bit constants Signed-off-by: Karol Herbst --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 28 +++ 1 file changed, 28 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 9ac98e04562..3084f32abb4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -104,6 +104,7 @@ private: bool visit(nir_if *); bool visit(nir_instr *); bool visit(nir_jump_instr *); + bool visit(nir_load_const_instr*); bool visit(nir_loop *); nir_shader *nir; @@ -1271,6 +1272,8 @@ Converter::visit(nir_instr *insn) switch (insn->type) { case nir_instr_type_jump: return visit(nir_instr_as_jump(insn)); + case nir_instr_type_load_const: + return visit(nir_instr_as_load_const(insn)); default: ERROR("unknown nir_instr type %u\n", insn->type); return false; @@ -1305,6 +1308,31 @@ Converter::visit(nir_jump_instr *insn) return true; } +bool +Converter::visit(nir_load_const_instr *insn) +{ + assert(insn->def.bit_size <= 64); + + LValues = convert(>def); + for (int i = 0; i < insn->def.num_components; i++) { + switch (insn->def.bit_size) { + case 64: + loadImm(newDefs[i], insn->value.u64[i]); + break; + case 32: + loadImm(newDefs[i], insn->value.u32[i]); + break; + case 16: + loadImm(newDefs[i], insn->value.u16[i]); + break; + case 8: + loadImm(newDefs[i], insn->value.u8[i]); + break; + } + } + return true; +} + bool Converter::run() { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 02/33] nvc0: print the shader type when dumping headers
this makes debugging the shader header a little easier Acked-by: Pierre Moreau Signed-off-by: Karol Herbst --- src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 57d98753f45..d87adc998a2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -554,6 +554,7 @@ nvc0_program_dump(struct nvc0_program *prog) unsigned pos; if (prog->type != PIPE_SHADER_COMPUTE) { + debug_printf("dumping HDR for type %i\n", prog->type); for (pos = 0; pos < ARRAY_SIZE(prog->hdr); ++pos) debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n", pos * sizeof(prog->hdr[0]), prog->hdr[pos]); -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 07/33] nouveau: fix nir and TGSI shader cache collision
Signed-off-by: Karol Herbst --- src/gallium/drivers/nouveau/nouveau_screen.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 03c1c74160f..7acf2050f10 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -156,9 +156,13 @@ nouveau_disk_cache_create(struct nouveau_screen *screen) _timestamp)) { res = asprintf(_str, "%u", mesa_timestamp); if (res != -1) { + uint64_t shader_debug_flags = 0; + if (screen->prefer_nir) +shader_debug_flags |= 1 << 0; + screen->disk_shader_cache = disk_cache_create(nouveau_screen_get_name(>base), - timestamp_str, 0); + timestamp_str, shader_debug_flags); free(timestamp_str); } } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 00/33] Nouveau NIR support
I think this is finally getting complete. There are a handful of piglit and CTS regressions, but those are caused by issues in common nir and gallium code. Some features remain disabled like bindless textures, because there is more ground work needed to be able to properly do it with nir. Changes since v7: * port images to derefs * remove use of C++11 features * add more passes the code implicitly depends on * add changes for reading out sample locations * fix loading 8 and 16 bit constants * minor cleanups and bugfixes Connor Abbott (1): nv50/ir/ra: Fix copying compound for moves Karol Herbst (32): nvc0: print the shader type when dumping headers nvir: move common converter code in base class nvir: add lowering helper nouveau: add support for nir nouveau: add env var to make nir default nouveau: fix nir and TGSI shader cache collision nvir/nir: run some passes to make the conversion easier nvir/nir: track defs and provide easy access functions nvir/nir: add nir type helper functions nvir/nir: run assignSlots nvir/nir: add loadFrom and storeTo helpler nvir/nir: parse NIR shader info nvir/nir: implement nir_load_const_instr nvir/nir: add skeleton for nir_intrinsic_instr nvir/nir: implement nir_alu_instr handling nvir/nir: implement nir_intrinsic_load_uniform nvir/nir: implement nir_intrinsic_store_(per_vertex_)output nvir/nir: implement load_(interpolated_)input/output nvir/nir: implement intrinsic_discard(_if) nvir/nir: implement loading system values nvir/nir: implement nir_ssa_undef_instr nvir/nir: implement nir_instr_type_tex nvir/nir: add getOperation for intrinsics nvir/nir: implement vote and ballot nvir/nir: implement variable indexing nvir/nir: implement geometry shader nir_intrinsics nvir/nir: implement nir_intrinsic_load_ubo nvir/nir: implement ssbo intrinsics nvir/nir: implement images nvir/nir: add memory barriers nvir/nir: implement load_per_vertex_output nvir/nir: implement intrinsic shader_clock src/gallium/drivers/nouveau/Automake.inc |3 + src/gallium/drivers/nouveau/Makefile.am |5 + src/gallium/drivers/nouveau/Makefile.sources |5 + .../drivers/nouveau/codegen/nv50_ir.cpp |3 + src/gallium/drivers/nouveau/codegen/nv50_ir.h |1 + .../nouveau/codegen/nv50_ir_from_common.cpp | 107 + .../nouveau/codegen/nv50_ir_from_common.h | 58 + .../nouveau/codegen/nv50_ir_from_nir.cpp | 3144 + .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 106 +- .../codegen/nv50_ir_lowering_helper.cpp | 275 ++ .../nouveau/codegen/nv50_ir_lowering_helper.h | 53 + .../drivers/nouveau/codegen/nv50_ir_ra.cpp| 60 +- src/gallium/drivers/nouveau/meson.build | 13 +- src/gallium/drivers/nouveau/nouveau_screen.c | 11 +- src/gallium/drivers/nouveau/nouveau_screen.h |2 + .../drivers/nouveau/nv50/nv50_program.c | 19 +- .../drivers/nouveau/nv50/nv50_screen.c| 44 +- src/gallium/drivers/nouveau/nv50/nv50_state.c | 35 +- .../drivers/nouveau/nvc0/nvc0_program.c | 19 +- .../drivers/nouveau/nvc0/nvc0_screen.c| 61 +- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 31 +- 21 files changed, 3907 insertions(+), 148 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v8 03/33] nvir: move common converter code in base class
v2: remove TGSI related bits Signed-off-by: Karol Herbst Reviewed-by: Pierre Moreau --- src/gallium/drivers/nouveau/Makefile.sources | 2 + .../nouveau/codegen/nv50_ir_from_common.cpp | 107 ++ .../nouveau/codegen/nv50_ir_from_common.h | 58 ++ .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 106 + src/gallium/drivers/nouveau/meson.build | 2 + 5 files changed, 172 insertions(+), 103 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 65f08c7d8d8..fee5e59522e 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_build_util.h \ codegen/nv50_ir_driver.h \ codegen/nv50_ir_emit_nv50.cpp \ + codegen/nv50_ir_from_common.cpp \ + codegen/nv50_ir_from_common.h \ codegen/nv50_ir_from_tgsi.cpp \ codegen/nv50_ir_graph.cpp \ codegen/nv50_ir_graph.h \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp new file mode 100644 index 000..0ad6087e588 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp @@ -0,0 +1,107 @@ +/* + * Copyright 2011 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_from_common.h" + +namespace nv50_ir { + +ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info) + : BuildUtil(prog), + info(info) {} + +ConverterCommon::Subroutine * +ConverterCommon::getSubroutine(unsigned ip) +{ + std::map::iterator it = sub.map.find(ip); + + if (it == sub.map.end()) + it = sub.map.insert(std::make_pair( + ip, Subroutine(new Function(prog, "SUB", ip.first; + + return >second; +} + +ConverterCommon::Subroutine * +ConverterCommon::getSubroutine(Function *f) +{ + unsigned ip = f->getLabel(); + std::map::iterator it = sub.map.find(ip); + + if (it == sub.map.end()) + it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; + + return >second; +} + +uint8_t +ConverterCommon::translateInterpMode(const struct nv50_ir_varying *var, operation& op) +{ + uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; + + if (var->flat) + mode = NV50_IR_INTERP_FLAT; + else + if (var->linear) + mode = NV50_IR_INTERP_LINEAR; + else + if (var->sc) + mode = NV50_IR_INTERP_SC; + + op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) + ? OP_PINTERP : OP_LINTERP; + + if (var->centroid) + mode |= NV50_IR_INTERP_CENTROID; + + return mode; +} + +void +ConverterCommon::handleUserClipPlanes() +{ + Value *res[8]; + int n, i, c; + + for (c = 0; c < 4; ++c) { + for (i = 0; i < info->io.genUserClip; ++i) { + Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot, +TYPE_F32, info->io.ucpBase + i * 16 + c * 4); + Value *ucp = mkLoadv(TYPE_F32, sym, NULL); + if (c == 0) +res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); + else +mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); + } + } + + const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; + + for (i = 0; i < info->io.genUserClip; ++i) { + n = i / 4 + first; + c = i % 4; + Symbol *sym = + mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); + mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); + } +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h
[Mesa-dev] [PATCH v8 04/33] nvir: add lowering helper
this is mostly usefull for lazy IR converters not wanting to deal with 64 bit lowering and other illegal stuff v5: also handle SAT v6: rename type variables fixed lowering of NEG add lowering of NOT v8: don't require C++11 features Signed-off-by: Karol Herbst --- src/gallium/drivers/nouveau/Makefile.sources | 2 + .../codegen/nv50_ir_lowering_helper.cpp | 275 ++ .../nouveau/codegen/nv50_ir_lowering_helper.h | 53 src/gallium/drivers/nouveau/meson.build | 2 + 4 files changed, 332 insertions(+) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index fee5e59522e..ec344c63169 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -122,6 +122,8 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_graph.h \ codegen/nv50_ir.h \ codegen/nv50_ir_inlines.h \ + codegen/nv50_ir_lowering_helper.cpp \ + codegen/nv50_ir_lowering_helper.h \ codegen/nv50_ir_lowering_nv50.cpp \ codegen/nv50_ir_peephole.cpp \ codegen/nv50_ir_print.cpp \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp new file mode 100644 index 000..02380f12b9f --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp @@ -0,0 +1,275 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Karol Herbst + */ + +#include "codegen/nv50_ir_lowering_helper.h" + +namespace nv50_ir { + +bool +LoweringHelper::visit(Instruction *insn) +{ + switch (insn->op) { + case OP_ABS: + return handleABS(insn); + case OP_CVT: + return handleCVT(insn); + case OP_MAX: + case OP_MIN: + return handleMAXMIN(insn); + case OP_MOV: + return handleMOV(insn); + case OP_NEG: + return handleNEG(insn); + case OP_SAT: + return handleSAT(insn); + case OP_SLCT: + return handleSLCT(insn->asCmp()); + case OP_AND: + case OP_NOT: + case OP_OR: + case OP_XOR: + return handleLogOp(insn); + default: + return true; + } +} + +bool +LoweringHelper::handleABS(Instruction *insn) +{ + DataType dTy = insn->dType; + if (!(dTy == TYPE_U64 || dTy == TYPE_S64)) + return true; + + bld.setPosition(insn, false); + + Value *neg = bld.getSSA(8); + Value *negComp[2], *srcComp[2]; + Value *lo = bld.getSSA(), *hi = bld.getSSA(); + bld.mkOp2(OP_SUB, dTy, neg, bld.mkImm((uint64_t)0), insn->getSrc(0)); + bld.mkSplit(negComp, 4, neg); + bld.mkSplit(srcComp, 4, insn->getSrc(0)); + bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, lo, TYPE_S32, negComp[0], srcComp[0], srcComp[1]); + bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, hi, TYPE_S32, negComp[1], srcComp[1], srcComp[1]); + insn->op = OP_MERGE; + insn->setSrc(0, lo); + insn->setSrc(1, hi); + + return true; +} + +bool +LoweringHelper::handleCVT(Instruction *insn) +{ + DataType dTy = insn->dType; + DataType sTy = insn->sType; + + if (typeSizeof(dTy) <= 4 && typeSizeof(sTy) <= 4) + return true; + + bld.setPosition(insn, false); + + if ((dTy == TYPE_S32 && sTy == TYPE_S64) || + (dTy == TYPE_U32 && sTy == TYPE_U64)) { + Value *src[2]; + bld.mkSplit(src, 4, insn->getSrc(0)); + insn->op = OP_MOV; + insn->setSrc(0, src[0]); + } else if (dTy == TYPE_S64 && sTy == TYPE_S32) { + Value *tmp = bld.getSSA(); + bld.mkOp2(OP_SHR, TYPE_S32, tmp, insn->getSrc(0), bld.loadImm(bld.getSSA(), 31)); + insn->op = OP_MERGE; + insn->setSrc(1, tmp); + } else if (dTy == TYPE_U64 && sTy == TYPE_U32) { + insn->op = OP_MERGE; + insn->setSrc(1,
[Mesa-dev] [PATCH v8 06/33] nouveau: add env var to make nir default
v2: allow for non debug builds as well v3: move reading out env var more global disable tg4 with multiple offsets with nir disable caps for 64 bit types v6: nv50 support disable MS images disable bindless textures Signed-off-by: Karol Herbst --- src/gallium/drivers/nouveau/nouveau_screen.c | 5 + src/gallium/drivers/nouveau/nouveau_screen.h | 2 ++ .../drivers/nouveau/nv50/nv50_screen.c| 4 +++- .../drivers/nouveau/nvc0/nvc0_screen.c| 19 +-- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index c97b707955c..03c1c74160f 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -176,9 +176,14 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) union nouveau_bo_config mm_config; char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG"); + char *use_nir = getenv("NV50_PROG_USE_NIR"); + if (nv_dbg) nouveau_mesa_debug = atoi(nv_dbg); + if (use_nir) + screen->prefer_nir = strtol(use_nir, NULL, 0) == 1; + /* These must be set before any failure is possible, as the cleanup * paths assume they're responsible for deleting them. */ diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index e4fbae99ca4..1229b66b26f 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -62,6 +62,8 @@ struct nouveau_screen { struct disk_cache *disk_shader_cache; + bool prefer_nir; + #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS union { uint64_t v[29]; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 24324d7a11e..826c01c238e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -317,6 +317,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader, enum pipe_shader_cap param) { + const struct nouveau_screen *screen = nouveau_screen(pscreen); + switch (shader) { case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: @@ -370,7 +372,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return MIN2(16, PIPE_MAX_SAMPLERS); case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_TGSI; + return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index b32a129c7e2..70c8a6ffbf4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -87,9 +87,11 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, if (bindings & PIPE_BIND_SHADER_IMAGE) { if (sample_count > 0 && - nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) { + (nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS || + nouveau_screen(pscreen)->prefer_nir)) { /* MS images are currently unsupported on Maxwell because they have to * be handled explicitly. */ + /* MS images are currently unsupported with NIR */ return false; } @@ -109,7 +111,8 @@ static int nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; - struct nouveau_device *dev = nouveau_screen(pscreen)->device; + const struct nouveau_screen *screen = nouveau_screen(pscreen); + struct nouveau_device *dev = screen->device; switch (param) { /* non-boolean caps */ @@ -217,7 +220,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: - case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: @@ -258,6 +260,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_QUERY_SO_OVERFLOW: return 1; + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + /* TODO: nir doesn't support tg4 with multiple offsets */ + return screen->prefer_nir ? 0 : 1; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; case PIPE_CAP_TGSI_FS_FBFETCH: @@ -275,8 +280,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles
https://bugs.freedesktop.org/show_bug.cgi?id=77449 Bug 77449 depends on bug 99813, which changed state. Bug 99813 Summary: F1 2015 rendering broken at medium quality https://bugs.freedesktop.org/show_bug.cgi?id=99813 What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/2] r600: correct texture offset for array index lookup
Am Freitag, den 29.06.2018, 17:42 +0200 schrieb Roland Scheidegger: [...] Thanks for the review > > So for gather4_O (where you'd need to do it differently) it must not > be > done, but for gather4 it has to be done? > Still doesn't make all that much sense to me. The hw may be weird but > I don't think it would be that weird? Yes, this made me thinking too. First I was wondering whether all gather4 operations ignore the offset (as in hardware bug), but no, this is not the case. Then I looked at the created shaders. The first thing I don't really understand is why the TGSI has TG4 TEMP[1], TEMP[1], IMM[1]., SAMP[0], 2D_ARRAY, TEMP[2].xyy i.e. the offset Y coordinate is also passed into Z, but somehow this seems to be irrelevant - I wonder whether this might pose a problem somewhere else but here the piglit still passes. However, the real fun comes from sb: The code created without adding the offset to the texture coordinate: 0032 SET_TEXTURE_OFFSETS R0., R0.zww0, RID:0, SID:0 CT: 0036 GATHER4_O R0.yzxw, R0.xy1_, RID:18, SID:0 CT:NNUN and this is what sb does when I add the 0.5 offset the the z coordinate: 0032 SET_TEXTURE_OFFSETS R0., R0., RID:0, SID:0 CT: 0036 GATHER4_O R0.yzxw, R0.xyz_, RID:18, SID:0 CT:NNUN Consequently, when disabling sb, the piglit also passes when I add the 0.5 offset, I've now also tested what happens when I set this offset 0.5 via SET_TEXTURE_OFFSETS, and it seems to work and sb doesn't do funny things. Best, Gert ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [ANNOUNCE] mesa 18.1.3
On Fri, Jun 29, 2018 at 2:00 PM, Dylan Baker wrote: > Hi List, > > Mesa 18.1.3 is now available. This is a bug fix point release in the 18.1 > series. > > In this release we have: > - numerous fixes for radv > - some fixes for common radeon code > - several meson fixes > - fixes for autotools with non-gnu grep > - fixes for both the nir and glsl compilers > - An i965 fix for some gpu hangs on SNB A git shortlog has traditionally been included in such announcements (as well as the pre-announcements). I'd strongly recommend continuing this practice. Cheers, -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 106842] Error de prueba
https://bugs.freedesktop.org/show_bug.cgi?id=106842 Andre Klapper changed: What|Removed |Added Component|Demos |Two QA Contact|mesa-dev@lists.freedesktop. | |org | Product|Mesa|Spam Assignee|mesa-dev@lists.freedesktop. |dan...@fooishbar.org |org | -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 4/4] gallium/llvmpipe: Enable support bptc format.
For the series: Reviewed-by: Marek Olšák Marek On Tue, Jun 26, 2018 at 4:30 PM, Denis Pauk wrote: > v2: none > v3: none > > Signed-off-by: Denis Pauk > CC: Marek Olšák > CC: Rhys Perry > CC: Matt Turner > --- > src/gallium/drivers/llvmpipe/lp_screen.c | 3 +-- > src/gallium/drivers/llvmpipe/lp_test_format.c | 3 +-- > 2 files changed, 2 insertions(+), 4 deletions(-) > > diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c > b/src/gallium/drivers/llvmpipe/lp_screen.c > index 28dbd9908f..9921a941d5 100644 > --- a/src/gallium/drivers/llvmpipe/lp_screen.c > +++ b/src/gallium/drivers/llvmpipe/lp_screen.c > @@ -534,8 +534,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, >} > } > > - if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC || > - format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { > + if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { >/* Software decoding is not hooked up. */ >return FALSE; > } > diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c > b/src/gallium/drivers/llvmpipe/lp_test_format.c > index e9a6e01fdc..a8aa33d8ae 100644 > --- a/src/gallium/drivers/llvmpipe/lp_test_format.c > +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c > @@ -388,8 +388,7 @@ test_all(unsigned verbose, FILE *fp) >} > >/* missing fetch funcs */ > - if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC || > - format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { > + if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { > continue; >} > > -- > 2.18.0 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 3/3] st/mesa: Also check for PIPE_FORMAT_A8R8G8B8_SRGB for texture_sRGB
For the series: Reviewed-by: Marek Olšák Marek On Fri, Jun 22, 2018 at 9:59 AM, Tomeu Vizoso wrote: > and PIPE_FORMAT_R8G8B8A8_SRGB, as well. > > The reason for this is that when Virgl runs with GLES on the host, it > cannot directly upload textures in BGRA. > > So to avoid a conversion step, consider the RGB sRGB formats as well for > this extension. > > Signed-off-by: Tomeu Vizoso > --- > src/mesa/state_tracker/st_extensions.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/state_tracker/st_extensions.c > b/src/mesa/state_tracker/st_extensions.c > index 274df7bd6a6a..ba0f671c363d 100644 > --- a/src/mesa/state_tracker/st_extensions.c > +++ b/src/mesa/state_tracker/st_extensions.c > @@ -811,7 +811,9 @@ void st_init_extensions(struct pipe_screen *screen, >{ { o(EXT_texture_sRGB), >o(EXT_texture_sRGB_decode) }, > { PIPE_FORMAT_A8B8G8R8_SRGB, > - PIPE_FORMAT_B8G8R8A8_SRGB }, > + PIPE_FORMAT_B8G8R8A8_SRGB, > + PIPE_FORMAT_A8R8G8B8_SRGB, > + PIPE_FORMAT_R8G8B8A8_SRGB}, > GL_TRUE }, /* at least one format must be supported */ > >{ { o(EXT_texture_type_2_10_10_10_REV) }, > -- > 2.17.0 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 2/3] st/mesa: Fall back to R8G8B8A8_SRGB for ETC2
On Fri, Jun 22, 2018 at 9:59 AM, Tomeu Vizoso wrote: > If the driver doesn't support PIPE_FORMAT_B8G8R8A8_SRGB, fall back to > PIPE_FORMAT_R8G8B8A8_SRGB. > > Drivers such as Virgl will have a hard time supporting > PIPE_FORMAT_B8G8R8A8_SRGB when the host runs GLES, as GL_ABGR isn't as > well suported there. B8G8R8A8 is GL_BGRA. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: fix typo (wrong gen number) in comment
On Friday, June 29, 2018 11:39:34 AM PDT Caio Marcelo de Oliveira Filho wrote: > --- > src/mesa/drivers/dri/i965/genX_state_upload.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c > b/src/mesa/drivers/dri/i965/genX_state_upload.c > index b279f01e1a1..921b420ab81 100644 > --- a/src/mesa/drivers/dri/i965/genX_state_upload.c > +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c > @@ -1935,7 +1935,7 @@ genX(upload_wm)(struct brw_context *brw) > brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, > 2)); >} > #elif GEN_GEN == 6 > - /* On gen5, we have multiple shader kernels and we no longer specify a > + /* On gen6, we have multiple shader kernels and we no longer specify a > * register count for each one. > */ >wm.KernelStartPointer0 = stage_state->prog_offset + > Reviewed-by: Kenneth Graunke signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] anv, intel: Enable nir_opt_large_constants for Vulkan
According to RenderDoc, this shaves 99.6% of the run time off of the ambient occlusion pass in Skyrim Special Edition when running under DXVK and shaves 92% off the runtime for a reasonably representative frame. When running the actual game, Skyrim goes from being a slide-show to a very stable and playable framerate on my SKL GT4e machine. --- src/intel/compiler/brw_compiler.h | 6 ++ src/intel/compiler/brw_nir.c | 7 +++ src/intel/vulkan/anv_device.c | 1 + 3 files changed, 14 insertions(+) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 2f745d92745..9dfcfcc0115 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -112,6 +112,12 @@ struct brw_compiler { * will attempt to push everything. */ bool supports_pull_constants; + + /** +* Whether or not the driver supports NIR shader constants. This controls +* whether nir_opt_large_constants will be run. +*/ + bool supports_shader_constants; }; /** diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index dfeea73b06a..2d04133fbae 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -664,6 +664,13 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) nir = brw_nir_optimize(nir, compiler, is_scalar); + /* This needs to be run after the first optimization pass but before we +* lower indirect derefs away +*/ + if (compiler->supports_shader_constants) { + OPT(nir_opt_large_constants, type_size_scalar_bytes, 32); + } + nir_lower_bit_size(nir, lower_bit_size_callback, NULL); if (is_scalar) { diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index b3d30675b1e..077f5c16e46 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -434,6 +434,7 @@ anv_physical_device_init(struct anv_physical_device *device, device->compiler->supports_pull_constants = false; device->compiler->constant_buffer_0_is_relative = device->info.gen < 8 || !device->has_context_isolation; + device->compiler->supports_shader_constants = true; isl_device_init(>isl_dev, >info, swizzled); -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] anv: Add state setup support for shader constants
--- .../vulkan/anv_nir_apply_pipeline_layout.c| 47 src/intel/vulkan/anv_private.h| 1 + src/intel/vulkan/genX_cmd_buffer.c| 72 ++- 3 files changed, 101 insertions(+), 19 deletions(-) diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 37a54b2efff..c287a005bd6 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -32,6 +32,8 @@ struct apply_pipeline_layout_state { struct anv_pipeline_layout *layout; bool add_bounds_checks; + bool uses_constants; + uint8_t constants_offset; struct { BITSET_WORD *used; uint8_t *surface_offsets; @@ -100,6 +102,10 @@ get_used_bindings_block(nir_block *block, add_deref_src_binding(state, intrin->src[0]); break; + case nir_intrinsic_load_constant: +state->uses_constants = true; +break; + default: break; } @@ -172,6 +178,33 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, nir_instr_remove(>instr); } +static void +lower_load_constant(nir_intrinsic_instr *intrin, +struct apply_pipeline_layout_state *state) +{ + nir_builder *b = >builder; + + b->cursor = nir_before_instr(>instr); + + nir_ssa_def *index = nir_imm_int(b, state->constants_offset); + nir_ssa_def *offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[0], 1), + nir_imm_int(b, nir_intrinsic_base(intrin))); + + nir_intrinsic_instr *load_ubo = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); + load_ubo->num_components = intrin->num_components; + load_ubo->src[0] = nir_src_for_ssa(index); + load_ubo->src[1] = nir_src_for_ssa(offset); + nir_ssa_dest_init(_ubo->instr, _ubo->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(b, _ubo->instr); + + nir_ssa_def_rewrite_uses(>dest.ssa, +nir_src_for_ssa(_ubo->dest.ssa)); + nir_instr_remove(>instr); +} + static void lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type, unsigned *base_index, @@ -285,6 +318,9 @@ apply_pipeline_layout_block(nir_block *block, case nir_intrinsic_vulkan_resource_reindex: lower_res_reindex_intrinsic(intrin, state); break; + case nir_intrinsic_load_constant: +lower_load_constant(intrin, state); +break; default: break; } @@ -343,6 +379,9 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, get_used_bindings_block(block, ); } + if (state.uses_constants) + map->surface_count++; + for (uint32_t set = 0; set < layout->num_sets; set++) { struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; @@ -365,6 +404,14 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, unsigned surface = 0; unsigned sampler = 0; unsigned image = 0; + + if (state.uses_constants) { + state.constants_offset = surface; + map->surface_to_descriptor[surface].set = + ANV_DESCRIPTOR_SET_SHADER_CONSTANTS; + surface++; + } + for (uint32_t set = 0; set < layout->num_sets; set++) { struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f82b88df7a0..139c48b7e46 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1570,6 +1570,7 @@ anv_descriptor_set_destroy(struct anv_device *device, struct anv_descriptor_pool *pool, struct anv_descriptor_set *set); +#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1) #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX struct anv_pipeline_binding { diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 97b321ccaeb..34fbd83d148 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2026,6 +2026,26 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bt_map[bias + s] = surface_state.offset + state_offset; continue; + } else if (binding->set == ANV_DESCRIPTOR_SET_SHADER_CONSTANTS) { + struct anv_state surface_state = +anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + struct anv_address constant_data = { +.bo = >device->dynamic_state_pool.block_pool.bo, +.offset = pipeline->shaders[stage]->constant_data.offset, + }; + unsigned constant_data_size = +pipeline->shaders[stage]->constant_data_size; + + const enum isl_format format = +
[Mesa-dev] [PATCH 3/6] nir: Add a large constants optimization pass
This pass searches for reasonably large local variables which can be statically proven to be constant and moves them into shader constant data. This is especially useful when large tables are baked into the shader source code because they can be moved into a UBO by the driver to reduce register pressure and make indirect access cheaper. --- src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 4 + src/compiler/nir/nir_opt_large_constants.c | 338 + 4 files changed, 344 insertions(+) create mode 100644 src/compiler/nir/nir_opt_large_constants.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 0fcbc5c5c5b..9e3fbdc2612 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -276,6 +276,7 @@ NIR_FILES = \ nir/nir_opt_if.c \ nir/nir_opt_intrinsics.c \ nir/nir_opt_loop_unroll.c \ + nir/nir_opt_large_constants.c \ nir/nir_opt_move_comparisons.c \ nir/nir_opt_move_load_ubo.c \ nir/nir_opt_peephole_select.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index eb7fb7b121e..28aa8de7014 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -160,6 +160,7 @@ files_libnir = files( 'nir_opt_global_to_local.c', 'nir_opt_if.c', 'nir_opt_intrinsics.c', + 'nir_opt_large_constants.c', 'nir_opt_loop_unroll.c', 'nir_opt_move_comparisons.c', 'nir_opt_move_load_ubo.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 6c49bce9aaa..106bc538427 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2904,6 +2904,10 @@ bool nir_opt_if(nir_shader *shader); bool nir_opt_intrinsics(nir_shader *shader); +bool nir_opt_large_constants(nir_shader *shader, + int (*type_size)(const struct glsl_type *), + unsigned threshold); + bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); bool nir_opt_move_comparisons(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_large_constants.c b/src/compiler/nir/nir_opt_large_constants.c new file mode 100644 index 000..9dfbf9a83f8 --- /dev/null +++ b/src/compiler/nir/nir_opt_large_constants.c @@ -0,0 +1,338 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +struct var_info { + bool is_constant; + bool found_read; +}; + +static unsigned +struct_type_get_field_offset(const struct glsl_type *struct_type, + int (*type_size)(const struct glsl_type *), + unsigned field_idx) +{ + assert(glsl_type_is_struct(struct_type)); + unsigned offset = 0; + for (unsigned i = 0; i < field_idx; i++) + offset += type_size(glsl_get_struct_field(struct_type, i)); + return offset; +} + +static nir_ssa_def * +build_constant_load(nir_builder *b, +int (*type_size)(const struct glsl_type *), +nir_deref_instr *deref) +{ + const unsigned bit_size = glsl_get_bit_size(deref->type); + const unsigned num_components = glsl_get_vector_elements(deref->type); + + nir_ssa_def *offset = nir_imm_int(b, 0); + while (deref->deref_type != nir_deref_type_var) { + nir_deref_instr *parent = nir_deref_instr_parent(deref); + + if (deref->deref_type == nir_deref_type_array) { + nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); + nir_ssa_def *stride = nir_imm_int(b, type_size(deref->type)); + offset = nir_iadd(b, offset, nir_imul(b, index, stride)); + } else { + assert(deref->deref_type == nir_deref_type_struct); + unsigned field_offset = +
[Mesa-dev] [PATCH 4/6] anv: Add support for shader constant data to the pipeline cache
--- src/intel/vulkan/anv_blorp.c | 1 + src/intel/vulkan/anv_pipeline.c | 12 src/intel/vulkan/anv_pipeline_cache.c | 26 ++ src/intel/vulkan/anv_private.h| 6 ++ 4 files changed, 45 insertions(+) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 5373faaa680..4dbfb7a83fd 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -70,6 +70,7 @@ upload_blorp_shader(struct blorp_context *blorp, struct anv_shader_bin *bin = anv_pipeline_cache_upload_kernel(>blorp_shader_cache, key, key_size, kernel, kernel_size, + NULL, 0, prog_data, prog_data_size, _map); if (!bin) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 67ede46f2ae..8b630f7a85a 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -485,6 +485,8 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, const void *key_data, uint32_t key_size, const void *kernel_data, uint32_t kernel_size, + const void *constant_data, + uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, const struct anv_pipeline_bind_map *bind_map) @@ -492,11 +494,13 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, if (cache) { return anv_pipeline_cache_upload_kernel(cache, key_data, key_size, kernel_data, kernel_size, + constant_data, constant_data_size, prog_data, prog_data_size, bind_map); } else { return anv_shader_bin_create(pipeline->device, key_data, key_size, kernel_data, kernel_size, + constant_data, constant_data_size, prog_data, prog_data_size, prog_data->param, bind_map); } @@ -575,6 +579,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, unsigned code_size = prog_data.base.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, + nir->constant_data, nir->num_constants, _data.base.base, sizeof(prog_data), ); if (!bin) { @@ -742,6 +747,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline, tcs_bin = anv_pipeline_upload_kernel(pipeline, cache, tcs_sha1, sizeof(tcs_sha1), shader_code, code_size, + tcs_nir->constant_data, + tcs_nir->num_constants, _prog_data.base.base, sizeof(tcs_prog_data), _map); @@ -763,6 +770,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline, tes_bin = anv_pipeline_upload_kernel(pipeline, cache, tes_sha1, sizeof(tes_sha1), shader_code, code_size, + tes_nir->constant_data, + tes_nir->num_constants, _prog_data.base.base, sizeof(tes_prog_data), _map); @@ -845,6 +854,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, const unsigned code_size = prog_data.base.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, + nir->constant_data, nir->num_constants, _data.base.base, sizeof(prog_data), ); if (!bin) { @@ -995,6 +1005,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, unsigned code_size = prog_data.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, + nir->constant_data, nir->num_constants,
[Mesa-dev] [PATCH 1/6] nir: Add a deref_instr_has_indirect helper
--- src/compiler/nir/nir.h | 2 ++ src/compiler/nir/nir_deref.c | 18 ++ 2 files changed, 20 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c16ce547642..e35bef612df 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -987,6 +987,8 @@ nir_deref_instr_get_variable(const nir_deref_instr *instr) return instr->var; } +bool nir_deref_instr_has_indirect(nir_deref_instr *instr); + bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); typedef struct { diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c index 1a00157c2fc..22ecde4ecca 100644 --- a/src/compiler/nir/nir_deref.c +++ b/src/compiler/nir/nir_deref.c @@ -102,6 +102,24 @@ nir_deref_instr_remove_if_unused(nir_deref_instr *instr) return progress; } +bool +nir_deref_instr_has_indirect(nir_deref_instr *instr) +{ + while (instr->deref_type != nir_deref_type_var) { + /* Consider casts to be indirects */ + if (instr->deref_type == nir_deref_type_cast) + return true; + + if (instr->deref_type == nir_deref_type_array && + !nir_src_as_const_value(instr->arr.index)) + return true; + + instr = nir_deref_instr_parent(instr); + } + + return false; +} + bool nir_remove_dead_derefs_impl(nir_function_impl *impl) { -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] nir: Add a concept of constant data associated with a shader
This commit adds a concept to NIR of having a blob of constant data associated with a shader. Instead of being a UBO or uniform that can be manipulated by the client, this constant data considered part of the shader and remains constant across all invocations of the given shader until the end of time. To access this constant data from the shader, we add a new load_constant intrinsic. The intention is that drivers will eventually lower load_constant intrinsics to load_ubo, load_uniform, or something similar. Constant data will be used by the optimization pass in the next commit but this concept may also be useful for OpenCL. --- src/compiler/nir/nir.h | 9 - src/compiler/nir/nir_clone.c | 6 ++ src/compiler/nir/nir_intrinsics.py | 2 ++ src/compiler/nir/nir_serialize.c | 10 ++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index e35bef612df..6c49bce9aaa 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2038,6 +2038,13 @@ typedef struct nir_shader { /** list of shared compute variables (nir_variable) */ struct exec_list shared; + /** Constant data associated with this shader. +* +* Constant data is loaded through load_constant intrinsics. See also +* nir_opt_large_constants. +*/ + void *constant_data; + /** Set of driver-specific options for the shader. * * The memory for the options is expected to be kept in a single static @@ -2066,7 +2073,7 @@ typedef struct nir_shader { * the highest index a load_input_*, load_uniform_*, etc. intrinsic can * access plus one */ - unsigned num_inputs, num_uniforms, num_outputs, num_shared; + unsigned num_inputs, num_uniforms, num_outputs, num_shared, num_constants; } nir_shader; static inline nir_function_impl * diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index 23bb17eeba3..1adfaccc54d 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -733,6 +733,12 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) ns->num_uniforms = s->num_uniforms; ns->num_outputs = s->num_outputs; ns->num_shared = s->num_shared; + ns->num_constants = s->num_constants; + + if (s->num_constants > 0) { + ns->constant_data = ralloc_size(ns, s->num_constants); + memcpy(ns->constant_data, s->constant_data, s->num_constants); + } free_clone_state(); diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d9d0bbdfccf..44a5b76beb6 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -532,6 +532,8 @@ load("per_vertex_output", 2, [BASE, COMPONENT], [CAN_ELIMINATE]) load("shared", 1, [BASE], [CAN_ELIMINATE]) # src[] = { offset }. const_index[] = { base, range } load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) +# src[] = { offset }. const_index[] = { base, range } +load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) # Stores work the same way as loads, except now the first source is the value # to store and the second (and possibly third) source specify where to store diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index cc4bf23aa0f..2f2ecc9b270 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -1106,6 +1106,10 @@ nir_serialize(struct blob *blob, const nir_shader *nir) blob_write_uint32(blob, nir->num_uniforms); blob_write_uint32(blob, nir->num_outputs); blob_write_uint32(blob, nir->num_shared); + blob_write_uint32(blob, nir->num_constants); + + if (nir->num_constants > 0) + blob_write_bytes(blob, nir->constant_data, nir->num_constants); blob_write_uint32(blob, exec_list_length(>functions)); nir_foreach_function(fxn, nir) { @@ -1161,6 +1165,12 @@ nir_deserialize(void *mem_ctx, ctx.nir->num_uniforms = blob_read_uint32(blob); ctx.nir->num_outputs = blob_read_uint32(blob); ctx.nir->num_shared = blob_read_uint32(blob); + ctx.nir->num_constants = blob_read_uint32(blob); + + if (ctx.nir->num_constants > 0) { + ctx.nir->constant_data = ralloc_size(ctx.nir, ctx.nir->num_constants); + blob_copy_bytes(blob, ctx.nir->constant_data, ctx.nir->num_constants); + } unsigned num_functions = blob_read_uint32(blob); for (unsigned i = 0; i < num_functions; i++) -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/6] anv,nir: Move large constants to a UBO
This little series adds an optimization pass to NIR and wires up up in anv that moves large constant variables to a UBO. This fixes a farily common case in some filter or ambient occlusion shaders where they put some sort of look-up table in the shader itself. This series takes Skyrim Special Edition running under DXVK from a slide show to a smooth and very playable framerate on my SKL desktop. The first part of the series adds a concept of constant data that can be associated with a NIR shader and adds an optimization pass to move large constant variables into this constant data section. It's left up to the driver to figure out how to get this constant data into the shader. The last three patches wire things up in ANV to put this data into an implicit UBO and enables the optimization. Jason Ekstrand (6): nir: Add a deref_instr_has_indirect helper nir: Add a concept of constant data associated with a shader nir: Add a large constants optimization pass anv: Add support for shader constant data to the pipeline cache anv: Add state setup support for shader constants anv,intel: Enable nir_opt_large_constants for Vulkan src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h| 15 +- src/compiler/nir/nir_clone.c | 6 + src/compiler/nir/nir_deref.c | 18 + src/compiler/nir/nir_intrinsics.py| 2 + src/compiler/nir/nir_opt_large_constants.c| 338 ++ src/compiler/nir/nir_serialize.c | 10 + src/intel/compiler/brw_compiler.h | 6 + src/intel/compiler/brw_nir.c | 7 + src/intel/vulkan/anv_blorp.c | 1 + src/intel/vulkan/anv_device.c | 1 + .../vulkan/anv_nir_apply_pipeline_layout.c| 47 +++ src/intel/vulkan/anv_pipeline.c | 12 + src/intel/vulkan/anv_pipeline_cache.c | 26 ++ src/intel/vulkan/anv_private.h| 7 + src/intel/vulkan/genX_cmd_buffer.c| 72 +++- 17 files changed, 550 insertions(+), 20 deletions(-) create mode 100644 src/compiler/nir/nir_opt_large_constants.c -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/u_vbuf: drop min/max-scanning for empty indirect draws
Reviewed-by: Marek Olšák Marek On Thu, Jun 28, 2018 at 9:22 AM, Erik Faye-Lund wrote: > When building with asserts enabled, we'll end up triggering an assert > in pipe_buffer_map_range down this code-path, due to trying to map > an empty range. Even if we avoid that, we'll trigger another assert > a bit later, because u_vbuf_get_minmax_index returns a min-index of > -1 here, which gets promoted to an unsigned value, and gives us an > out-of-bounds buffer-mapping offset. > > Since we can't really have a well-defined min/max range here when > the range is empty anyway, we should just drop this dance in the > first place. After all, no rendering is going to be produced. > > This fixes a crash in dEQP-GLES31.functional.draw_indirect.random.0 > on VirGL for me. > > Signed-off-by: Erik Faye-Lund > --- > I noticed this while debugging something else, so I thought I'd send > a patch upstream, as the problem doesn't seem unique to my usecase. > > src/gallium/auxiliary/util/u_vbuf.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/src/gallium/auxiliary/util/u_vbuf.c > b/src/gallium/auxiliary/util/u_vbuf.c > index 42f37c7574..76a1d143d9 100644 > --- a/src/gallium/auxiliary/util/u_vbuf.c > +++ b/src/gallium/auxiliary/util/u_vbuf.c > @@ -1183,6 +1183,9 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct > pipe_draw_info *info) >new_info.start = data[2]; >pipe_buffer_unmap(pipe, transfer); >new_info.indirect = NULL; > + > + if (!new_info.count) > + return; > } > > if (new_info.index_size) { > -- > 2.18.0.rc2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 14/18] mesa: add compat profile support for ARB_multi_draw_indirect
Reviewed-by: Marek Olšák Marek On Fri, Jun 29, 2018 at 5:23 AM, Timothy Arceri wrote: > v2: add missing ARB_base_instance support > --- > src/mesa/main/extensions_table.h | 2 +- > src/mesa/vbo/vbo_exec_array.c| 77 +++- > 2 files changed, 76 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/main/extensions_table.h > b/src/mesa/main/extensions_table.h > index 1446a4bd421..12b796777df 100644 > --- a/src/mesa/main/extensions_table.h > +++ b/src/mesa/main/extensions_table.h > @@ -88,7 +88,7 @@ EXT(ARB_invalidate_subdata , dummy_true > EXT(ARB_map_buffer_alignment, dummy_true > , GLL, GLC, x , x , 2011) > EXT(ARB_map_buffer_range, ARB_map_buffer_range > , GLL, GLC, x , x , 2008) > EXT(ARB_multi_bind , dummy_true > , GLL, GLC, x , x , 2013) > -EXT(ARB_multi_draw_indirect , ARB_draw_indirect > , x , GLC, x , x , 2012) > +EXT(ARB_multi_draw_indirect , ARB_draw_indirect > , GLL, GLC, x , x , 2012) > EXT(ARB_multisample , dummy_true > , GLL, x , x , x , 1994) > EXT(ARB_multitexture, dummy_true > , GLL, x , x , x , 1998) > EXT(ARB_occlusion_query , ARB_occlusion_query > , GLL, x , x , x , 2001) > diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c > index dbb2590f0c9..58bba208db1 100644 > --- a/src/mesa/vbo/vbo_exec_array.c > +++ b/src/mesa/vbo/vbo_exec_array.c > @@ -1749,7 +1749,38 @@ vbo_exec_MultiDrawArraysIndirect(GLenum mode, const > GLvoid *indirect, > > /* If is zero, the array elements are treated as tightly packed. > */ > if (stride == 0) > - stride = 4 * sizeof(GLuint); /* sizeof(DrawArraysIndirectCommand) > */ > + stride = sizeof(DrawArraysIndirectCommand); > + > + /* From the ARB_draw_indirect spec: > +* > +*"Initially zero is bound to DRAW_INDIRECT_BUFFER. In the > +*compatibility profile, this indicates that DrawArraysIndirect and > +*DrawElementsIndirect are to source their arguments directly from the > +*pointer passed as their parameters." > +*/ > + if (ctx->API == API_OPENGL_COMPAT && > + !_mesa_is_bufferobj(ctx->DrawIndirectBuffer)) { > + > + if (!_mesa_valid_draw_indirect_multi(ctx, primcount, stride, > + "glMultiDrawArraysIndirect")) > + return; > + > + const ubyte *ptr = (const ubyte *) indirect; > + for (unsigned i = 0; i < primcount; i++) { > + DrawArraysIndirectCommand *cmd = (DrawArraysIndirectCommand *) ptr; > + vbo_exec_DrawArraysInstancedBaseInstance(mode, cmd->first, > + cmd->count, cmd->primCount, > + cmd->baseInstance); > + > + if (stride == 0) { > +ptr += sizeof(DrawArraysIndirectCommand); > + } else { > +ptr += stride; > + } > + } > + > + return; > + } > > FLUSH_FOR_DRAW(ctx); > > @@ -1788,7 +1819,49 @@ vbo_exec_MultiDrawElementsIndirect(GLenum mode, GLenum > type, > > /* If is zero, the array elements are treated as tightly packed. > */ > if (stride == 0) > - stride = 5 * sizeof(GLuint); /* > sizeof(DrawElementsIndirectCommand) */ > + stride = sizeof(DrawElementsIndirectCommand); > + > + > + /* From the ARB_draw_indirect spec: > +* > +*"Initially zero is bound to DRAW_INDIRECT_BUFFER. In the > +*compatibility profile, this indicates that DrawArraysIndirect and > +*DrawElementsIndirect are to source their arguments directly from the > +*pointer passed as their parameters." > +*/ > + if (ctx->API == API_OPENGL_COMPAT && > + !_mesa_is_bufferobj(ctx->DrawIndirectBuffer)) { > + /* > + * Unlike regular DrawElementsInstancedBaseVertex commands, the indices > + * may not come from a client array and must come from an index buffer. > + * If no element array buffer is bound, an INVALID_OPERATION error is > + * generated. > + */ > + if (!_mesa_is_bufferobj(ctx->Array.VAO->IndexBufferObj)) { > + _mesa_error(ctx, GL_INVALID_OPERATION, > + "glMultiDrawElementsIndirect(no buffer bound " > + "to GL_ELEMENT_ARRAY_BUFFER)"); > + > + return; > + } > + > + if (!_mesa_valid_draw_indirect_multi(ctx, primcount, stride, > + "glMultiDrawArraysIndirect")) > + return; > + > + const ubyte *ptr = (const ubyte *) indirect; > + for (unsigned i = 0; i < primcount; i++) { > +
Re: [Mesa-dev] [PATCH v2 12/18] mesa: add ARB_draw_indirect support to compat profile
Reviewed-by: Marek Olšák Marek On Fri, Jun 29, 2018 at 5:22 AM, Timothy Arceri wrote: > v2: add missing ARB_base_instance support > --- > src/mesa/main/bufferobj.c| 3 +- > src/mesa/main/extensions_table.h | 2 +- > src/mesa/vbo/vbo_exec_array.c| 71 +++- > 3 files changed, 72 insertions(+), 4 deletions(-) > > diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c > index 67f9cd0a902..1d1e51bc015 100644 > --- a/src/mesa/main/bufferobj.c > +++ b/src/mesa/main/bufferobj.c > @@ -129,8 +129,7 @@ get_buffer_target(struct gl_context *ctx, GLenum target) > return >QueryBuffer; >break; > case GL_DRAW_INDIRECT_BUFFER: > - if ((ctx->API == API_OPENGL_CORE && > - ctx->Extensions.ARB_draw_indirect) || > + if ((_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_draw_indirect) || > _mesa_is_gles31(ctx)) { > return >DrawIndirectBuffer; >} > diff --git a/src/mesa/main/extensions_table.h > b/src/mesa/main/extensions_table.h > index f79a52cee8c..1446a4bd421 100644 > --- a/src/mesa/main/extensions_table.h > +++ b/src/mesa/main/extensions_table.h > @@ -58,7 +58,7 @@ EXT(ARB_direct_state_access , dummy_true > EXT(ARB_draw_buffers, dummy_true > , GLL, GLC, x , x , 2002) > EXT(ARB_draw_buffers_blend , ARB_draw_buffers_blend > , GLL, GLC, x , x , 2009) > EXT(ARB_draw_elements_base_vertex , ARB_draw_elements_base_vertex > , GLL, GLC, x , x , 2009) > -EXT(ARB_draw_indirect , ARB_draw_indirect > , x , GLC, x , x , 2010) > +EXT(ARB_draw_indirect , ARB_draw_indirect > , GLL, GLC, x , x , 2010) > EXT(ARB_draw_instanced , ARB_draw_instanced > , GLL, GLC, x , x , 2008) > EXT(ARB_enhanced_layouts, ARB_enhanced_layouts > , GLL, GLC, x , x , 2013) > EXT(ARB_explicit_attrib_location, ARB_explicit_attrib_location > , GLL, GLC, x , x , 2009) > diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c > index 792907ac044..dbb2590f0c9 100644 > --- a/src/mesa/vbo/vbo_exec_array.c > +++ b/src/mesa/vbo/vbo_exec_array.c > @@ -39,6 +39,21 @@ > #include "main/macros.h" > #include "main/transformfeedback.h" > > +typedef struct { > + GLuint count; > + GLuint primCount; > + GLuint first; > + GLuint baseInstance; > +} DrawArraysIndirectCommand; > + > +typedef struct { > + GLuint count; > + GLuint primCount; > + GLuint firstIndex; > + GLint baseVertex; > + GLuint baseInstance; > +} DrawElementsIndirectCommand; > + > > /** > * Check that element 'j' of the array has reasonable data. > @@ -1616,6 +1631,23 @@ vbo_exec_DrawArraysIndirect(GLenum mode, const GLvoid > *indirect) >_mesa_debug(ctx, "glDrawArraysIndirect(%s, %p)\n", >_mesa_enum_to_string(mode), indirect); > > + /* From the ARB_draw_indirect spec: > +* > +*"Initially zero is bound to DRAW_INDIRECT_BUFFER. In the > +*compatibility profile, this indicates that DrawArraysIndirect and > +*DrawElementsIndirect are to source their arguments directly from the > +*pointer passed as their parameters." > +*/ > + if (ctx->API == API_OPENGL_COMPAT && > + !_mesa_is_bufferobj(ctx->DrawIndirectBuffer)) { > + DrawArraysIndirectCommand *cmd = (DrawArraysIndirectCommand *) > indirect; > + > + vbo_exec_DrawArraysInstancedBaseInstance(mode, cmd->first, cmd->count, > + cmd->primCount, > + cmd->baseInstance); > + return; > + } > + > FLUSH_FOR_DRAW(ctx); > > if (_mesa_is_no_error_enabled(ctx)) { > @@ -1647,6 +1679,43 @@ vbo_exec_DrawElementsIndirect(GLenum mode, GLenum > type, const GLvoid *indirect) >_mesa_enum_to_string(mode), >_mesa_enum_to_string(type), indirect); > > + /* From the ARB_draw_indirect spec: > +* > +*"Initially zero is bound to DRAW_INDIRECT_BUFFER. In the > +*compatibility profile, this indicates that DrawArraysIndirect and > +*DrawElementsIndirect are to source their arguments directly from the > +*pointer passed as their parameters." > +*/ > + if (ctx->API == API_OPENGL_COMPAT && > + !_mesa_is_bufferobj(ctx->DrawIndirectBuffer)) { > + /* > + * Unlike regular DrawElementsInstancedBaseVertex commands, the indices > + * may not come from a client array and must come from an index buffer. > + * If no element array buffer is bound, an INVALID_OPERATION error is > + * generated. > + */ > + if (!_mesa_is_bufferobj(ctx->Array.VAO->IndexBufferObj)) { > + _mesa_error(ctx,
[Mesa-dev] [PATCH] i965: fix typo (wrong gen number) in comment
--- src/mesa/drivers/dri/i965/genX_state_upload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index b279f01e1a1..921b420ab81 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -1935,7 +1935,7 @@ genX(upload_wm)(struct brw_context *brw) brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2)); } #elif GEN_GEN == 6 - /* On gen5, we have multiple shader kernels and we no longer specify a + /* On gen6, we have multiple shader kernels and we no longer specify a * register count for each one. */ wm.KernelStartPointer0 = stage_state->prog_offset + -- 2.18.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [ANNOUNCE] mesa 18.1.3
Hi List, Mesa 18.1.3 is now available. This is a bug fix point release in the 18.1 series. In this release we have: - numerous fixes for radv - some fixes for common radeon code - several meson fixes - fixes for autotools with non-gnu grep - fixes for both the nir and glsl compilers - An i965 fix for some gpu hangs on SNB git tag: mesa-18.1.3 https://mesa.freedesktop.org/archive/mesa-18.1.3.tar.gz MD5: 7c283b1c847829d443c65b30c787b5cd mesa-18.1.3.tar.gz SHA1: 611d4e7912779eb0ce11f4df59f8e633ecb451bf mesa-18.1.3.tar.gz SHA256: 2a1e36280d01ad18ba6d5b3fbd653ceaa109eaa031b78eb5dfaa4df452742b66 mesa-18.1.3.tar.gz SHA512: 59f9eee8c9045d1cf4b0a97cd542a3f100571e193f87d795e9d435bae14a3bee9e43bdff08f5a9db83b92274710d95bef2d33db3bbcb7650812035cb7338 mesa-18.1.3.tar.gz PGP: https://mesa.freedesktop.org/archive/mesa-18.1.3.tar.gz.sig https://mesa.freedesktop.org/archive/mesa-18.1.3.tar.xz MD5: b34273403a605f6f98ead00f0bdf8e0b mesa-18.1.3.tar.xz SHA1: 0a5fb3096108a95f913411267b8a374f9ff547b2 mesa-18.1.3.tar.xz SHA256: 54f08deeda0cd2f818e8d40140040ed013de7852573002453b7f50da9ea738ce mesa-18.1.3.tar.xz SHA512: f6e5b81a80a309a36a04759d18364d3c71c48d1cb88f87b2f5432ef003092a22046e88ce2082031d5d52b60ba36f585d8df52e06ecc7a5158079936236f36887 mesa-18.1.3.tar.xz PGP: https://mesa.freedesktop.org/archive/mesa-18.1.3.tar.xz.sig signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] mesa: MESA_framebuffer_flip_y extension [v3]
Adds an extension to glFramebufferParameteri that will specify if the framebuffer is vertically flipped. Historically system framebuffers are vertically flipped and user framebuffers are not. Checking to see the state was done by looking at the name field. This adds an explicit field. v2: * updated spec language [for chadv] * correctly specifying ES 3.1 [for chadv] * refactor access to rb->Name [for jason] * handle GetFramebufferParameteriv [for chadv] v3: * correct _mesa_GetMultisamplefv [for kusmabite] --- docs/specs/MESA_framebuffer_flip_y.spec| 84 ++ include/GLES2/gl2ext.h | 5 ++ src/mapi/glapi/registry/gl.xml | 6 ++ src/mesa/drivers/dri/i915/intel_fbo.c | 7 +- src/mesa/drivers/dri/i965/intel_fbo.c | 7 +- src/mesa/drivers/dri/nouveau/nouveau_fbo.c | 7 +- src/mesa/drivers/dri/radeon/radeon_fbo.c | 7 +- src/mesa/drivers/dri/radeon/radeon_span.c | 9 ++- src/mesa/drivers/dri/swrast/swrast.c | 7 +- src/mesa/drivers/osmesa/osmesa.c | 5 +- src/mesa/drivers/x11/xm_buffer.c | 3 +- src/mesa/drivers/x11/xmesaP.h | 3 +- src/mesa/main/accum.c | 17 +++-- src/mesa/main/dd.h | 3 +- src/mesa/main/extensions_table.h | 1 + src/mesa/main/fbobject.c | 18 - src/mesa/main/framebuffer.c| 1 + src/mesa/main/glheader.h | 3 + src/mesa/main/mtypes.h | 3 + src/mesa/main/readpix.c| 20 +++--- src/mesa/state_tracker/st_cb_fbo.c | 7 +- src/mesa/swrast/s_blit.c | 17 +++-- src/mesa/swrast/s_clear.c | 3 +- src/mesa/swrast/s_copypix.c| 11 +-- src/mesa/swrast/s_depth.c | 6 +- src/mesa/swrast/s_drawpix.c| 26 --- src/mesa/swrast/s_renderbuffer.c | 6 +- src/mesa/swrast/s_renderbuffer.h | 3 +- src/mesa/swrast/s_stencil.c| 3 +- 29 files changed, 241 insertions(+), 57 deletions(-) create mode 100644 docs/specs/MESA_framebuffer_flip_y.spec diff --git a/docs/specs/MESA_framebuffer_flip_y.spec b/docs/specs/MESA_framebuffer_flip_y.spec new file mode 100644 index 00..dca77a9541 --- /dev/null +++ b/docs/specs/MESA_framebuffer_flip_y.spec @@ -0,0 +1,84 @@ +Name + +MESA_framebuffer_flip_y + +Name Strings + +GL_MESA_framebuffer_flip_y + +Contact + +Fritz Koenig + +Contributors + +Fritz Koenig, Google +Kristian Høgsberg, Google +Chad Versace, Google + +Status + +Proposal + +Version + +Version 1, June 7, 2018 + +Number + +TBD + +Dependencies + +OpenGL ES 3.1 is required, for FramebufferParameteri. + +Overview + +Rendered buffers are normally returned right side up, as accessed +top to bottom. This extension allows those buffers to be upside down +when accessed top to bottom. + +This extension defines a new framebuffer parameter, +GL_FRAMEBUFFER_FLIP_Y_MESA, that changes the behavior of the reads and +writes to the framebuffer attachment points. When GL_FRAMEBUFFER_FLIP_Y_MESA +is GL_TRUE, render commands and pixel transfer operations access the +backing store of each attachment point with an y-inverted coordinate +system. This y-inversion is relative to the coordinate system set when +GL_FRAMEBUFFER_FLIP_Y_MESA is GL_FALSE. + +Access through TexSubImage2D and similar calls will notice the effect of +the flip when they are not attached to framebuffer objects because +GL_FRAMEBUFFER_FLIP_Y_MESA is associated with the framebuffer object and +not the attachment points. + +IP Status + +None + +Issues + +None + +New Procedures and Functions + +None + +New Types + +None + +New Tokens + +Accepted by the argument of FramebufferParameteri and +GetFramebufferParameteriv: + +GL_FRAMEBUFFER_FLIP_Y_MESA 0x8BBB + +Errors +GL_INVALID_OPERATION is returned from GetFramebufferParameteriv if this +is called on a winsys framebuffer. + +Revision History + +Version 1, June, 2018 +Initial draft (Fritz Koenig) diff --git a/include/GLES2/gl2ext.h b/include/GLES2/gl2ext.h index a7d19a1fc8..0a93bfb865 100644 --- a/include/GLES2/gl2ext.h +++ b/include/GLES2/gl2ext.h @@ -2334,6 +2334,11 @@ GL_APICALL void GL_APIENTRY glGetPerfQueryInfoINTEL (GLuint queryId, GLuint quer #endif #endif /* GL_INTEL_performance_query */ +#ifndef GL_MESA_framebuffer_flip_y +#define GL_MESA_framebuffer_flip_y 1 +#define GL_FRAMEBUFFER_FLIP_Y_MESA0x8BBB +#endif /* GL_MESA_framebuffer_flip_y */ + #ifndef GL_MESA_program_binary_formats #define GL_MESA_program_binary_formats 1 #define GL_PROGRAM_BINARY_FORMAT_MESA 0x875F diff --git a/src/mapi/glapi/registry/gl.xml b/src/mapi/glapi/registry/gl.xml index 833478aa51..13882eff7b 100644 --- a/src/mapi/glapi/registry/gl.xml +++
Re: [Mesa-dev] [PATCH 08/11] radeonsi: fix memory exhaustion issue with DCC statistics gathering with DRI2
Quoting Marek Olšák (2018-06-29 09:48:08) > On Fri, Jun 29, 2018 at 11:40 AM, Dylan Baker wrote: > > Quoting Marek Olšák (2018-06-18 16:33:09) > >> From: Marek Olšák > >> > >> Cc: 18.1 > >> --- > >> src/gallium/drivers/radeonsi/si_blit.c | 30 +++--- > >> 1 file changed, 27 insertions(+), 3 deletions(-) > >> > >> diff --git a/src/gallium/drivers/radeonsi/si_blit.c > >> b/src/gallium/drivers/radeonsi/si_blit.c > >> index fe059b36577..93cf7fe9001 100644 > >> --- a/src/gallium/drivers/radeonsi/si_blit.c > >> +++ b/src/gallium/drivers/radeonsi/si_blit.c > >> @@ -1320,23 +1320,47 @@ static void si_flush_resource(struct pipe_context > >> *ctx, > >> if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty) > >> return; > >> > >> if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) { > >> si_blit_decompress_color(sctx, rtex, 0, res->last_level, > >> 0, util_max_layer(res, 0), > >> rtex->dcc_separate_buffer != > >> NULL); > >> } > >> > >> /* Always do the analysis even if DCC is disabled at the moment. */ > >> - if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) { > >> - rtex->separate_dcc_dirty = false; > >> - vi_separate_dcc_process_and_reset_stats(ctx, rtex); > >> + if (rtex->dcc_gather_statistics) { > >> + bool separate_dcc_dirty = rtex->separate_dcc_dirty; > >> + > >> + /* If the color buffer hasn't been unbound and fast clear > >> hasn't > >> +* been used, separate_dcc_dirty is false, but there may > >> have been > >> +* new rendering. Check if the color buffer is bound and > >> assume > >> +* it's dirty. > >> +* > >> +* Note that DRI2 never unbinds window colorbuffers, which > >> means > >> +* the DCC pipeline statistics query would never be re-set > >> and would > >> +* keep adding new results until all free memory is > >> exhausted if we > >> +* didn't do this. > >> +*/ > >> + if (!separate_dcc_dirty) { > >> + for (unsigned i = 0; i < > >> sctx->framebuffer.state.nr_cbufs; i++) { > >> + if (sctx->framebuffer.state.cbufs[i] && > >> + > >> sctx->framebuffer.state.cbufs[i]->texture == res) { > >> + separate_dcc_dirty = true; > >> + break; > >> + } > >> + } > >> + } > >> + > >> + if (separate_dcc_dirty) { > >> + rtex->separate_dcc_dirty = false; > >> + vi_separate_dcc_process_and_reset_stats(ctx, rtex); > >> + } > >> } > >> } > >> > >> void si_decompress_dcc(struct si_context *sctx, struct r600_texture *rtex) > >> { > >> if (!rtex->dcc_offset) > >> return; > >> > >> si_blit_decompress_color(sctx, rtex, 0, > >> rtex->buffer.b.b.last_level, > >> 0, util_max_layer(>buffer.b.b, 0), > >> -- > >> 2.17.1 > >> > >> ___ > >> mesa-dev mailing list > >> mesa-dev@lists.freedesktop.org > >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > > Hi Marek, > > > > This didn't apply cleanly to 18.1 because of > > 1ba87f4438069964af6548f4fa05386be999f4de (radeonsi: rename r600_texture -> > > si_texture, rxxx -> xxx or sxxx), I've attempted to rebase the commit by > > changing "tex" to "rtex", please take a look at the commit in staging/18.1 > > in > > the main tree or 18.1-proposed in my tree and let me know if it looks good > > to > > you. > > Yes, it looks good. Thanks. > > Marek Thank you. signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/11] radeonsi: fix memory exhaustion issue with DCC statistics gathering with DRI2
On Fri, Jun 29, 2018 at 11:40 AM, Dylan Baker wrote: > Quoting Marek Olšák (2018-06-18 16:33:09) >> From: Marek Olšák >> >> Cc: 18.1 >> --- >> src/gallium/drivers/radeonsi/si_blit.c | 30 +++--- >> 1 file changed, 27 insertions(+), 3 deletions(-) >> >> diff --git a/src/gallium/drivers/radeonsi/si_blit.c >> b/src/gallium/drivers/radeonsi/si_blit.c >> index fe059b36577..93cf7fe9001 100644 >> --- a/src/gallium/drivers/radeonsi/si_blit.c >> +++ b/src/gallium/drivers/radeonsi/si_blit.c >> @@ -1320,23 +1320,47 @@ static void si_flush_resource(struct pipe_context >> *ctx, >> if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty) >> return; >> >> if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) { >> si_blit_decompress_color(sctx, rtex, 0, res->last_level, >> 0, util_max_layer(res, 0), >> rtex->dcc_separate_buffer != NULL); >> } >> >> /* Always do the analysis even if DCC is disabled at the moment. */ >> - if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) { >> - rtex->separate_dcc_dirty = false; >> - vi_separate_dcc_process_and_reset_stats(ctx, rtex); >> + if (rtex->dcc_gather_statistics) { >> + bool separate_dcc_dirty = rtex->separate_dcc_dirty; >> + >> + /* If the color buffer hasn't been unbound and fast clear >> hasn't >> +* been used, separate_dcc_dirty is false, but there may >> have been >> +* new rendering. Check if the color buffer is bound and >> assume >> +* it's dirty. >> +* >> +* Note that DRI2 never unbinds window colorbuffers, which >> means >> +* the DCC pipeline statistics query would never be re-set >> and would >> +* keep adding new results until all free memory is >> exhausted if we >> +* didn't do this. >> +*/ >> + if (!separate_dcc_dirty) { >> + for (unsigned i = 0; i < >> sctx->framebuffer.state.nr_cbufs; i++) { >> + if (sctx->framebuffer.state.cbufs[i] && >> + >> sctx->framebuffer.state.cbufs[i]->texture == res) { >> + separate_dcc_dirty = true; >> + break; >> + } >> + } >> + } >> + >> + if (separate_dcc_dirty) { >> + rtex->separate_dcc_dirty = false; >> + vi_separate_dcc_process_and_reset_stats(ctx, rtex); >> + } >> } >> } >> >> void si_decompress_dcc(struct si_context *sctx, struct r600_texture *rtex) >> { >> if (!rtex->dcc_offset) >> return; >> >> si_blit_decompress_color(sctx, rtex, 0, rtex->buffer.b.b.last_level, >> 0, util_max_layer(>buffer.b.b, 0), >> -- >> 2.17.1 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > Hi Marek, > > This didn't apply cleanly to 18.1 because of > 1ba87f4438069964af6548f4fa05386be999f4de (radeonsi: rename r600_texture -> > si_texture, rxxx -> xxx or sxxx), I've attempted to rebase the commit by > changing "tex" to "rtex", please take a look at the commit in staging/18.1 in > the main tree or 18.1-proposed in my tree and let me know if it looks good to > you. Yes, it looks good. Thanks. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] mesa: MESA_framebuffer_flip_y extension [v2]
On Fri, Jun 29, 2018 at 1:50 AM Erik Faye-Lund wrote: > > On Thu, Jun 28, 2018 at 11:12 PM Fritz Koenig wrote: > > > > Adds an extension to glFramebufferParameteri > > that will specify if the framebuffer is vertically > > flipped. Historically system framebuffers are > > vertically flipped and user framebuffers are not. > > Checking to see the state was done by looking at > > the name field. This adds an explicit field. > > > > v2: > > * updated spec language [for chadv] > > * correctly specifying ES 3.1 [for chadv] > > * refactor access to rb->Name [for jason] > > * handle GetFramebufferParameteriv [for chadv] > > --- > > docs/specs/MESA_framebuffer_flip_y.spec| 84 ++ > > include/GLES2/gl2ext.h | 5 ++ > > src/mapi/glapi/registry/gl.xml | 6 ++ > > src/mesa/drivers/dri/i915/intel_fbo.c | 7 +- > > src/mesa/drivers/dri/i965/intel_fbo.c | 7 +- > > src/mesa/drivers/dri/nouveau/nouveau_fbo.c | 7 +- > > src/mesa/drivers/dri/radeon/radeon_fbo.c | 7 +- > > src/mesa/drivers/dri/radeon/radeon_span.c | 9 ++- > > src/mesa/drivers/dri/swrast/swrast.c | 7 +- > > src/mesa/drivers/osmesa/osmesa.c | 5 +- > > src/mesa/drivers/x11/xm_buffer.c | 3 +- > > src/mesa/drivers/x11/xmesaP.h | 3 +- > > src/mesa/main/accum.c | 17 +++-- > > src/mesa/main/dd.h | 3 +- > > src/mesa/main/extensions_table.h | 1 + > > src/mesa/main/fbobject.c | 18 - > > src/mesa/main/framebuffer.c| 1 + > > src/mesa/main/glheader.h | 3 + > > src/mesa/main/mtypes.h | 3 + > > src/mesa/main/readpix.c| 20 +++--- > > src/mesa/state_tracker/st_cb_fbo.c | 7 +- > > src/mesa/swrast/s_blit.c | 17 +++-- > > src/mesa/swrast/s_clear.c | 3 +- > > src/mesa/swrast/s_copypix.c| 11 +-- > > src/mesa/swrast/s_depth.c | 6 +- > > src/mesa/swrast/s_drawpix.c| 26 --- > > src/mesa/swrast/s_renderbuffer.c | 6 +- > > src/mesa/swrast/s_renderbuffer.h | 3 +- > > src/mesa/swrast/s_stencil.c| 3 +- > > 29 files changed, 241 insertions(+), 57 deletions(-) > > create mode 100644 docs/specs/MESA_framebuffer_flip_y.spec > > > > I think this needs to update the _mesa_is_winsys_fbo-check in > _mesa_GetMultisamplefv in src/mesa/main/multisample.c to flip the > sample-positions as well... Thanks for pointing that one out, will add it. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 2/2] r600: set rounding mode for texture array layer selection
Am 29.06.2018 um 16:35 schrieb Gert Wollny: > The evaluation of the array layer index is "floor(z+0.5)", and the default > rounding mode doesn't correctly evaluate this. Therefore, set the rounding > mode to "trunc" and z-filter mode to "point". > For other textures make sure the the default rounding mode and z-filter are > used. > > Fixes single-sample tests out of: > dEQP-GLES3.functional.texture.shadow.2d_array.* > dEQP-GLES3.functional.texture.vertex.2d_array.* > dEQP-GLES3.functional.texture.filtering.2d_array.* > (With the single sample tests the rounding accuracy is tested too) > > v2: * reword comments and commit message > * clear S_03C008_TRUNC_COORD for all non-array types > > Signed-off-by: Gert Wollny > --- > src/gallium/drivers/r600/evergreen_state.c | 22 ++ > 1 file changed, 22 insertions(+) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index a484f0078a..b8ed4fea33 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -2413,6 +2413,28 @@ static void evergreen_emit_sampler_states(struct > r600_context *rctx, > rstate = texinfo->states.states[i]; > assert(rstate); > > + /* For texture arrays the formula select the layer is (floor(z > + 0.5)) and to select > + * apparently tha hardware doesn't trigger this when the > texture is in ARRAY mode > + * Neither does the default z-rounding mode provide the > required 0.5 shift > + * nor does it round with sufficinet accuracy. Consequently set > the coordinate sufficient. Albeit I'd still argue the rounding is done with quite sufficient accuracy to be spec compliant. > + * interpolation and truncate mode here to get "floor" for > positive coordinates. > + * Adding the 0.5 offset is done in the shader. > + * Also make sure that for other texture types the default is > used. You could also mention this alters all coordinates. So, I'm still really not fond of the idea, since there's no proof it causes any issues rather than venture into unknown territory, but whatever. Acked-by: Roland Scheidegger > + */ > + struct r600_pipe_sampler_view *rview = > texinfo->views.views[i]; > + if (rview) { > + rstate->tex_sampler_words[0] &= C_03C000_Z_FILTER; > + enum pipe_texture_target target = > rview->base.texture->target; > + if (target == PIPE_TEXTURE_2D_ARRAY || > + target == PIPE_TEXTURE_CUBE_ARRAY || > + target == PIPE_TEXTURE_1D_ARRAY) { > + rstate->tex_sampler_words[0] |= > S_03C000_Z_FILTER(V_03C000_SQ_TEX_Z_FILTER_POINT); > + rstate->tex_sampler_words[2] |= > S_03C008_TRUNCATE_COORD(1); > + } else { > + rstate->tex_sampler_words[2] &= > C_03C008_TRUNCATE_COORD; > + } > + } > + > radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags); > radeon_emit(cs, (resource_id_base + i) * 3); > radeon_emit_array(cs, rstate->tex_sampler_words, 3); > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/2] r600: correct texture offset for array index lookup
Am 29.06.2018 um 16:35 schrieb Gert Wollny: > For texture array lookup the slice index is evaluated according to > idx = floor(z + 0.5) > > This patch implements the first part by adding 0.5 to the according > texture coordinate when appropriate. > > Fixes multi-sample tests out of: > dEQP-GLES3.functional.texture.shadow.2d_array.* > dEQP-GLES3.functional.texture.vertex.2d_array.* > dEQP-GLES3.functional.texture.filtering.2d_array.* > (In the multi-sample case the rounding accuracy is not tested.) > > v2: - Don't apply texture offset correction for GATHER*O (corrects piglit > failures reported by Dave Airlie) > - unconditionally set the texture offset to 1 (=0.5) because the shader > can't set an offset for the array index (Roland Scheidegger) > - Add Fixes comment to commit message > > Signed-off-by: Gert Wollny > --- > src/gallium/drivers/r600/r600_shader.c | 18 +- > 1 file changed, 17 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index c466a48262..4d17b3d875 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -7456,6 +7456,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) > int8_t offset_x = 0, offset_y = 0, offset_z = 0; > boolean has_txq_cube_array_z = false; > unsigned sampler_index_mode; > + int *array_index_offset = NULL; > > if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && > ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || > @@ -8411,18 +8412,33 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) > opcode == FETCH_OP_SAMPLE_C_LB) { > /* the array index is read from Y */ > tex.coord_type_y = 0; > + array_index_offset = _y; > } else { > /* the array index is read from Z */ > tex.coord_type_z = 0; > tex.src_sel_z = tex.src_sel_y; > + array_index_offset = _z; > + > } > } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || > inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || > ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || > inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && > - (ctx->bc->chip_class >= EVERGREEN))) > + (ctx->bc->chip_class >= EVERGREEN))) { > /* the array index is read from Z */ > tex.coord_type_z = 0; > + array_index_offset = _z; > + } > + > + /* We have array access, the coordinates are not int and we use the > + * offset registers -> add 0.5 to the array index to adjust it according > + * to floor(z + 0.5). The floor opretaion is set as TRUNC in the texture operation So for gather4_O (where you'd need to do it differently) it must not be done, but for gather4 it has to be done? Still doesn't make all that much sense to me. The hw may be weird but I don't think it would be that weird? But anyway, as long as it doesn't regress anything, Acked-by: Roland Scheidegger > + * state. > + */ > + if (array_index_offset && opcode != FETCH_OP_LD && > + opcode != FETCH_OP_GATHER4_C_O && opcode != FETCH_OP_GATHER4_O) { > + *array_index_offset = 1; > + } > > /* mask unused source components */ > if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/11] radeonsi: fix memory exhaustion issue with DCC statistics gathering with DRI2
Quoting Marek Olšák (2018-06-18 16:33:09) > From: Marek Olšák > > Cc: 18.1 > --- > src/gallium/drivers/radeonsi/si_blit.c | 30 +++--- > 1 file changed, 27 insertions(+), 3 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_blit.c > b/src/gallium/drivers/radeonsi/si_blit.c > index fe059b36577..93cf7fe9001 100644 > --- a/src/gallium/drivers/radeonsi/si_blit.c > +++ b/src/gallium/drivers/radeonsi/si_blit.c > @@ -1320,23 +1320,47 @@ static void si_flush_resource(struct pipe_context > *ctx, > if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty) > return; > > if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) { > si_blit_decompress_color(sctx, rtex, 0, res->last_level, > 0, util_max_layer(res, 0), > rtex->dcc_separate_buffer != NULL); > } > > /* Always do the analysis even if DCC is disabled at the moment. */ > - if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) { > - rtex->separate_dcc_dirty = false; > - vi_separate_dcc_process_and_reset_stats(ctx, rtex); > + if (rtex->dcc_gather_statistics) { > + bool separate_dcc_dirty = rtex->separate_dcc_dirty; > + > + /* If the color buffer hasn't been unbound and fast clear > hasn't > +* been used, separate_dcc_dirty is false, but there may have > been > +* new rendering. Check if the color buffer is bound and > assume > +* it's dirty. > +* > +* Note that DRI2 never unbinds window colorbuffers, which > means > +* the DCC pipeline statistics query would never be re-set > and would > +* keep adding new results until all free memory is exhausted > if we > +* didn't do this. > +*/ > + if (!separate_dcc_dirty) { > + for (unsigned i = 0; i < > sctx->framebuffer.state.nr_cbufs; i++) { > + if (sctx->framebuffer.state.cbufs[i] && > + sctx->framebuffer.state.cbufs[i]->texture > == res) { > + separate_dcc_dirty = true; > + break; > + } > + } > + } > + > + if (separate_dcc_dirty) { > + rtex->separate_dcc_dirty = false; > + vi_separate_dcc_process_and_reset_stats(ctx, rtex); > + } > } > } > > void si_decompress_dcc(struct si_context *sctx, struct r600_texture *rtex) > { > if (!rtex->dcc_offset) > return; > > si_blit_decompress_color(sctx, rtex, 0, rtex->buffer.b.b.last_level, > 0, util_max_layer(>buffer.b.b, 0), > -- > 2.17.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev Hi Marek, This didn't apply cleanly to 18.1 because of 1ba87f4438069964af6548f4fa05386be999f4de (radeonsi: rename r600_texture -> si_texture, rxxx -> xxx or sxxx), I've attempted to rebase the commit by changing "tex" to "rtex", please take a look at the commit in staging/18.1 in the main tree or 18.1-proposed in my tree and let me know if it looks good to you. Dylan signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 32/53] intel/fs: Mark LINTERP opcode as writing accumulator implicitly on pre-Gen7.
Quoting Jason Ekstrand (2018-05-24 14:56:14) > From: Francisco Jerez > > --- > src/intel/compiler/brw_shader.cpp | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/intel/compiler/brw_shader.cpp > b/src/intel/compiler/brw_shader.cpp > index 141b64e..61211ef 100644 > --- a/src/intel/compiler/brw_shader.cpp > +++ b/src/intel/compiler/brw_shader.cpp > @@ -984,7 +984,8 @@ backend_instruction::writes_accumulator_implicitly(const > struct gen_device_info > return writes_accumulator || >(devinfo->gen < 6 && > ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || > -(opcode >= FS_OPCODE_DDX_COARSE && opcode <= > FS_OPCODE_LINTERP))); > +(opcode >= FS_OPCODE_DDX_COARSE && opcode <= > FS_OPCODE_LINTERP))) || > + (devinfo->gen < 7 && opcode == FS_OPCODE_LINTERP); > } > > bool > -- > 2.5.0.400.gff86faf > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev Hi Jason, This doesn't apply to the 18.1 branch. At the very least d3cd6b7215c11054b587fb0fd621c53c6d62c64b from your series is also needed, although there are still conflicts after that which are taking a bit more time to track down, I can look into it later, but I'd like to get the 18.1.3 release done first. If you'd like to drop this instead let me know. Dylan signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/2] r600: set rounding mode for texture array layer selection
The evaluation of the array layer index is "floor(z+0.5)", and the default rounding mode doesn't correctly evaluate this. Therefore, set the rounding mode to "trunc" and z-filter mode to "point". For other textures make sure the the default rounding mode and z-filter are used. Fixes single-sample tests out of: dEQP-GLES3.functional.texture.shadow.2d_array.* dEQP-GLES3.functional.texture.vertex.2d_array.* dEQP-GLES3.functional.texture.filtering.2d_array.* (With the single sample tests the rounding accuracy is tested too) v2: * reword comments and commit message * clear S_03C008_TRUNC_COORD for all non-array types Signed-off-by: Gert Wollny --- src/gallium/drivers/r600/evergreen_state.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index a484f0078a..b8ed4fea33 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2413,6 +2413,28 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx, rstate = texinfo->states.states[i]; assert(rstate); + /* For texture arrays the formula select the layer is (floor(z + 0.5)) and +* apparently tha hardware doesn't trigger this when the texture is in ARRAY mode +* Neither does the default z-rounding mode provide the required 0.5 shift +* nor does it round with sufficinet accuracy. Consequently set the coordinate +* interpolation and truncate mode here to get "floor" for positive coordinates. +* Adding the 0.5 offset is done in the shader. +* Also make sure that for other texture types the default is used. +*/ + struct r600_pipe_sampler_view *rview = texinfo->views.views[i]; + if (rview) { + rstate->tex_sampler_words[0] &= C_03C000_Z_FILTER; + enum pipe_texture_target target = rview->base.texture->target; + if (target == PIPE_TEXTURE_2D_ARRAY || + target == PIPE_TEXTURE_CUBE_ARRAY || + target == PIPE_TEXTURE_1D_ARRAY) { + rstate->tex_sampler_words[0] |= S_03C000_Z_FILTER(V_03C000_SQ_TEX_Z_FILTER_POINT); + rstate->tex_sampler_words[2] |= S_03C008_TRUNCATE_COORD(1); + } else { + rstate->tex_sampler_words[2] &= C_03C008_TRUNCATE_COORD; + } + } + radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags); radeon_emit(cs, (resource_id_base + i) * 3); radeon_emit_array(cs, rstate->tex_sampler_words, 3); -- 2.16.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/2] r600: correct texture offset for array index lookup
For texture array lookup the slice index is evaluated according to idx = floor(z + 0.5) This patch implements the first part by adding 0.5 to the according texture coordinate when appropriate. Fixes multi-sample tests out of: dEQP-GLES3.functional.texture.shadow.2d_array.* dEQP-GLES3.functional.texture.vertex.2d_array.* dEQP-GLES3.functional.texture.filtering.2d_array.* (In the multi-sample case the rounding accuracy is not tested.) v2: - Don't apply texture offset correction for GATHER*O (corrects piglit failures reported by Dave Airlie) - unconditionally set the texture offset to 1 (=0.5) because the shader can't set an offset for the array index (Roland Scheidegger) - Add Fixes comment to commit message Signed-off-by: Gert Wollny --- src/gallium/drivers/r600/r600_shader.c | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c466a48262..4d17b3d875 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -7456,6 +7456,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) int8_t offset_x = 0, offset_y = 0, offset_z = 0; boolean has_txq_cube_array_z = false; unsigned sampler_index_mode; + int *array_index_offset = NULL; if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || @@ -8411,18 +8412,33 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) opcode == FETCH_OP_SAMPLE_C_LB) { /* the array index is read from Y */ tex.coord_type_y = 0; + array_index_offset = _y; } else { /* the array index is read from Z */ tex.coord_type_z = 0; tex.src_sel_z = tex.src_sel_y; + array_index_offset = _z; + } } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && - (ctx->bc->chip_class >= EVERGREEN))) + (ctx->bc->chip_class >= EVERGREEN))) { /* the array index is read from Z */ tex.coord_type_z = 0; + array_index_offset = _z; + } + + /* We have array access, the coordinates are not int and we use the +* offset registers -> add 0.5 to the array index to adjust it according +* to floor(z + 0.5). The floor opretaion is set as TRUNC in the texture +* state. +*/ + if (array_index_offset && opcode != FETCH_OP_LD && + opcode != FETCH_OP_GATHER4_C_O && opcode != FETCH_OP_GATHER4_O) { + *array_index_offset = 1; + } /* mask unused source components */ if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { -- 2.16.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 0/2] r600: Fix array texture slice index evaluation
I reworded parts of the comments, and simplified the patch against the shader to correct the piglit failures and taking into account that a shader can not set an offset for the array index at all. Apart from the dEQP tests I've run ./piglit run gpu -t texture with no changes. (I always got hangs with "./piglit run gpu -x atomicity", with or without these patches, some image_store_load tests seem to behave badly) I'm still thinking about how relevant the TRUNC versus default rounding mode is and what is actually the more correct approach for point sampling. That's also why I think that it is better to keep the patches apart, to better be able to biscet possible issues. best, Gert Gert Wollny (2): r600: correct texture offset for array index lookup r600: set rounding mode for texture array layer selection src/gallium/drivers/r600/evergreen_state.c | 22 ++ src/gallium/drivers/r600/r600_shader.c | 18 +- 2 files changed, 39 insertions(+), 1 deletion(-) -- 2.16.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] nv50/ir: improve maintainability of Target*::initOpInfo()
Reviewed-by: Ilia Mirkin On Fri, Jun 29, 2018 at 9:51 AM, Rhys Perry wrote: > This is mainly useful for when one needs to add new opcodes in a painless > and reliable way. > > Signed-off-by: Rhys Perry > --- > Changes in v2: > - use operation instead of uint32_t > - use ARRAY_SIZE() > - add "set below" comments > > I don't have push access. > > .../nouveau/codegen/nv50_ir_target_nv50.cpp| 27 > -- > .../nouveau/codegen/nv50_ir_target_nvc0.cpp| 24 ++- > 2 files changed, 28 insertions(+), 23 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > index 83b4102b0a..ad76d2dcb4 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > @@ -111,16 +111,15 @@ void TargetNV50::initOpInfo() > { > unsigned int i, j; > > - static const uint32_t commutative[(OP_LAST + 31) / 32] = > + static const operation commutativeList[] = > { > - // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, > SET_XOR, > - // SET, SELP, SLCT > - 0x0ce0ca00, 0x007e, 0x, 0x > + OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_AND, OP_OR, OP_XOR, OP_MAX, OP_MIN, > + OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT > }; > - static const uint32_t shortForm[(OP_LAST + 31) / 32] = > + static const operation shortFormList[] = > { > - // MOV, ADD, SUB, MUL, MAD, SAD, RCP, L/PINTERP, TEX, TXF > - 0x00014e40, 0x0080, 0x1260, 0x > + OP_MOV, OP_ADD, OP_SUB, OP_MUL, OP_MAD, OP_SAD, OP_RCP, OP_LINTERP, > + OP_PINTERP, OP_TEX, OP_TXF > }; > static const operation noDestList[] = > { > @@ -157,18 +156,22 @@ void TargetNV50::initOpInfo() > >opInfo[i].hasDest = 1; >opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); > - opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1; > + opInfo[i].commutative = false; /* set below */ >opInfo[i].pseudo = (i < OP_MOV); >opInfo[i].predicate = !opInfo[i].pseudo; >opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); > - opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8; > + opInfo[i].minEncSize = 8; /* set below */ > } > - for (i = 0; i < sizeof(noDestList) / sizeof(noDestList[0]); ++i) > + for (i = 0; i < ARRAY_SIZE(commutativeList); ++i) > + opInfo[commutativeList[i]].commutative = true; > + for (i = 0; i < ARRAY_SIZE(shortFormList); ++i) > + opInfo[shortFormList[i]].minEncSize = 4; > + for (i = 0; i < ARRAY_SIZE(noDestList); ++i) >opInfo[noDestList[i]].hasDest = 0; > - for (i = 0; i < sizeof(noPredList) / sizeof(noPredList[0]); ++i) > + for (i = 0; i < ARRAY_SIZE(noPredList); ++i) >opInfo[noPredList[i]].predicate = 0; > > - for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) { > + for (i = 0; i < ARRAY_SIZE(_initProps); ++i) { >const struct opProperties *prop = &_initProps[i]; > >for (int s = 0; s < 3; ++s) { > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > index 8938d19f6c..1a62dce951 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > @@ -191,17 +191,15 @@ void TargetNVC0::initOpInfo() > { > unsigned int i, j; > > - static const uint32_t commutative[(OP_LAST + 31) / 32] = > + static const operation commutative[] = > { > - // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, > SET_XOR, > - // SET, SELP, SLCT > - 0x0ce0ca00, 0x007e, 0x, 0x > + OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_AND, OP_OR, OP_XOR, OP_MAX, OP_MIN, > + OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT > }; > > - static const uint32_t shortForm[(OP_LAST + 31) / 32] = > + static const operation shortForm[] = > { > - // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN > - 0x0ce0ca00, 0x, 0x, 0x > + OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_AND, OP_OR, OP_XOR, OP_MAX, OP_MIN > }; > > static const operation noDest[] = > @@ -240,15 +238,19 @@ void TargetNVC0::initOpInfo() > >opInfo[i].hasDest = 1; >opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); > - opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1; > + opInfo[i].commutative = false; /* set below */ >opInfo[i].pseudo = (i < OP_MOV); >opInfo[i].predicate = !opInfo[i].pseudo; >opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); > - opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8; > + opInfo[i].minEncSize = 8; /* set below */ > } > - for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
[Mesa-dev] [PATCH 03/18] nir/linker: use empty block info to assign uniform locations
For the cases of uniforms that doesn't have an explicit location. Under ARB_gl_spirv those are exceptions, like uniform atomic counters. --- src/compiler/glsl/gl_nir_link_uniforms.c | 31 +-- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/compiler/glsl/gl_nir_link_uniforms.c b/src/compiler/glsl/gl_nir_link_uniforms.c index 388c1ab63fc..77d3eaa5f2b 100644 --- a/src/compiler/glsl/gl_nir_link_uniforms.c +++ b/src/compiler/glsl/gl_nir_link_uniforms.c @@ -79,6 +79,8 @@ nir_setup_uniform_remap_tables(struct gl_context *ctx, } /* Reserve locations for rest of the uniforms. */ + link_util_update_empty_uniform_locations(prog); + for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) { struct gl_uniform_storage *uniform = >data->UniformStorage[i]; @@ -93,22 +95,23 @@ nir_setup_uniform_remap_tables(struct gl_context *ctx, if (uniform->remap_location != UNMAPPED_UNIFORM_LOC) continue; - /* How many new entries for this uniform? */ + /* How many entries for this uniform? */ const unsigned entries = MAX2(1, uniform->array_elements); - /* @FIXME: By now, we add un-assigned uniform locations to the end of - * the uniform file. We need to keep track of empty locations and use - * them. - */ - unsigned chosen_location = prog->NumUniformRemapTable; - - /* resize remap table to fit new entries */ - prog->UniformRemapTable = - reralloc(prog, - prog->UniformRemapTable, - struct gl_uniform_storage *, - prog->NumUniformRemapTable + entries); - prog->NumUniformRemapTable += entries; + unsigned chosen_location = + link_util_find_empty_block(prog, >data->UniformStorage[i]); + + if (chosen_location == -1) { + chosen_location = prog->NumUniformRemapTable; + + /* resize remap table to fit new entries */ + prog->UniformRemapTable = +reralloc(prog, + prog->UniformRemapTable, + struct gl_uniform_storage *, + prog->NumUniformRemapTable + entries); + prog->NumUniformRemapTable += entries; + } /* set the base location in remap table for the uniform */ uniform->remap_location = chosen_location; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/18] nir/linker: handle uniforms without explicit location
ARB_gl_spirv points that uniforms in general need explicit location. But there are still some cases of uniforms without location, like for example uniform atomic counters. Those doesn't have a location from the OpenGL point of view (they are identified with a binding), but Mesa internally assigns it a location. Signed-off-by: Eduardo Lima Signed-off-by: Alejandro Piñeiro Signed-off-by: Neil Roberts --- The @FIXME included on the patch below is solved with the follow-up path "nir/linker: use empty block info to assign uniform locations", so perhaps it makes sense to just squash both patches. I don't have a strong opinion on that, but I think that it would be easier to review as splitted patches. src/compiler/glsl/gl_nir_link_uniforms.c | 61 ++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/gl_nir_link_uniforms.c b/src/compiler/glsl/gl_nir_link_uniforms.c index c6961fbb6ca..388c1ab63fc 100644 --- a/src/compiler/glsl/gl_nir_link_uniforms.c +++ b/src/compiler/glsl/gl_nir_link_uniforms.c @@ -36,6 +36,8 @@ * normal uniforms as mandatory, and so on). */ +#define UNMAPPED_UNIFORM_LOC ~0u + static void nir_setup_uniform_remap_tables(struct gl_context *ctx, struct gl_shader_program *prog) @@ -58,8 +60,59 @@ nir_setup_uniform_remap_tables(struct gl_context *ctx, for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) { struct gl_uniform_storage *uniform = >data->UniformStorage[i]; + if (prog->data->UniformStorage[i].remap_location == UNMAPPED_UNIFORM_LOC) + continue; + + /* How many new entries for this uniform? */ + const unsigned entries = MAX2(1, uniform->array_elements); + unsigned num_slots = glsl_get_component_slots(uniform->type); + + uniform->storage = [data_pos]; + + /* Set remap table entries point to correct gl_uniform_storage. */ + for (unsigned j = 0; j < entries; j++) { + unsigned element_loc = uniform->remap_location + j; + prog->UniformRemapTable[element_loc] = uniform; + + data_pos += num_slots; + } + } + + /* Reserve locations for rest of the uniforms. */ + for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) { + struct gl_uniform_storage *uniform = >data->UniformStorage[i]; + + if (uniform->is_shader_storage) + continue; + + /* Built-in uniforms should not get any location. */ + if (uniform->builtin) + continue; + + /* Explicit ones have been set already. */ + if (uniform->remap_location != UNMAPPED_UNIFORM_LOC) + continue; + /* How many new entries for this uniform? */ const unsigned entries = MAX2(1, uniform->array_elements); + + /* @FIXME: By now, we add un-assigned uniform locations to the end of + * the uniform file. We need to keep track of empty locations and use + * them. + */ + unsigned chosen_location = prog->NumUniformRemapTable; + + /* resize remap table to fit new entries */ + prog->UniformRemapTable = + reralloc(prog, + prog->UniformRemapTable, + struct gl_uniform_storage *, + prog->NumUniformRemapTable + entries); + prog->NumUniformRemapTable += entries; + + /* set the base location in remap table for the uniform */ + uniform->remap_location = chosen_location; + unsigned num_slots = glsl_get_component_slots(uniform->type); uniform->storage = [data_pos]; @@ -302,8 +355,12 @@ nir_link_uniform(struct gl_context *ctx, } uniform->active_shader_mask |= 1 << stage; - /* Uniform has an explicit location */ - uniform->remap_location = location; + if (location >= 0) { + /* Uniform has an explicit location */ + uniform->remap_location = location; + } else { + uniform->remap_location = UNMAPPED_UNIFORM_LOC; + } /* @FIXME: the initialization of the following will be done as we * implement support for their specific features, like SSBO, atomics, -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/18] spirv/nir: tweak nir type when storage class is SpvStorageClassAtomicCounter
GLSL types differentiates uint from atomic uint. On SPIR-V the type is uint, and the variable has a specific storage class. So we need to tweak the type based on the storage class. Ideally we would like to get the proper type at vtn_handle_type, but we don't have the storage class at that moment. We tweak only the nir type, as is the one that really requires it. --- Again, this commit probably could be squashed with the follow-up patch "nir/spirv: Fix atomic counter (multidimensional-)arrays". Sending as two different patches to make easier the review. src/compiler/spirv/vtn_variables.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index c56d74d683b..a40c30c8a75 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -1643,7 +1643,17 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val, /* For these, we create the variable normally */ var->var = rzalloc(b->shader, nir_variable); var->var->name = ralloc_strdup(var->var, val->name); - var->var->type = var->type->type; + + /* Need to tweak the nir type here as at vtn_handle_type we don't have + * the access to storage_class, that is the one that points us that is + * an atomic uint. + */ + if (glsl_get_base_type(var->type->type) == GLSL_TYPE_UINT && + storage_class == SpvStorageClassAtomicCounter) { + var->var->type = glsl_atomic_uint_type(); + } else { + var->var->type = var->type->type; + } var->var->data.mode = nir_mode; var->var->data.location = -1; var->var->interface_type = NULL; -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev