Re: [Mesa-dev] [PATCH 6/6] anv: Add support for the on-disk shader cache

2018-06-29 Thread Timothy Arceri

Series:

Reviewed-by: Timothy Arceri 

On 30/06/18 13:44, Jason Ekstrand wrote:

---
  src/intel/vulkan/anv_device.c | 36 ++
  src/intel/vulkan/anv_pipeline_cache.c | 98 ---
  src/intel/vulkan/anv_private.h|  3 +
  3 files changed, 126 insertions(+), 11 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index a864c702c3f..ca6e1c0cace 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -35,6 +35,7 @@
  #include "util/strtod.h"
  #include "util/debug.h"
  #include "util/build_id.h"
+#include "util/disk_cache.h"
  #include "util/mesa-sha1.h"
  #include "vk_util.h"
  #include "common/gen_defines.h"
@@ -233,6 +234,8 @@ anv_physical_device_init_uuids(struct anv_physical_device 
*device)
 "build-id too short.  It needs to be a SHA");
 }
  
+   memcpy(device->driver_build_sha1, build_id_data(note), 20);

+
 struct mesa_sha1 sha1_ctx;
 uint8_t sha1[20];
 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
@@ -271,6 +274,35 @@ anv_physical_device_init_uuids(struct anv_physical_device 
*device)
 return VK_SUCCESS;
  }
  
+static void

+anv_physical_device_init_disk_cache(struct anv_physical_device *device)
+{
+#ifdef ENABLE_SHADER_CACHE
+   char renderer[9];
+   MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
+   device->chipset_id);
+   assert(len == sizeof(renderer) - 1);
+
+   char timestamp[41];
+   _mesa_sha1_format(timestamp, device->driver_build_sha1);
+
+   device->disk_cache = disk_cache_create(renderer, timestamp, 0);
+#else
+   device->disk_cache = NULL;
+#endif
+}
+
+static void
+anv_physical_device_free_disk_cache(struct anv_physical_device *device)
+{
+#ifdef ENABLE_SHADER_CACHE
+   if (device->disk_cache)
+  disk_cache_destroy(device->disk_cache);
+#else
+   assert(device->disk_cache == NULL);
+#endif
+}
+
  static VkResult
  anv_physical_device_init(struct anv_physical_device *device,
   struct anv_instance *instance,
@@ -442,6 +474,8 @@ anv_physical_device_init(struct anv_physical_device *device,
 if (result != VK_SUCCESS)
goto fail;
  
+   anv_physical_device_init_disk_cache(device);

+
 if (instance->enabled_extensions.KHR_display) {
master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
if (master_fd >= 0) {
@@ -459,6 +493,7 @@ anv_physical_device_init(struct anv_physical_device *device,
 result = anv_init_wsi(device);
 if (result != VK_SUCCESS) {
ralloc_free(device->compiler);
+  anv_physical_device_free_disk_cache(device);
goto fail;
 }
  
@@ -481,6 +516,7 @@ static void

  anv_physical_device_finish(struct anv_physical_device *device)
  {
 anv_finish_wsi(device);
+   anv_physical_device_free_disk_cache(device);
 ralloc_free(device->compiler);
 close(device->local_fd);
 if (device->master_fd >= 0)
diff --git a/src/intel/vulkan/anv_pipeline_cache.c 
b/src/intel/vulkan/anv_pipeline_cache.c
index e57cd1c75c6..d4c7262dc05 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -24,6 +24,8 @@
  #include "compiler/blob.h"
  #include "util/hash_table.h"
  #include "util/debug.h"
+#include "util/disk_cache.h"
+#include "util/mesa-sha1.h"
  #include "anv_private.h"
  
  struct anv_shader_bin *

@@ -280,6 +282,25 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
 return shader;
  }
  
+static void

+anv_pipeline_cache_add_shader_bin(struct anv_pipeline_cache *cache,
+  struct anv_shader_bin *bin)
+{
+   if (!cache->cache)
+  return;
+
+   pthread_mutex_lock(>mutex);
+
+   struct hash_entry *entry = _mesa_hash_table_search(cache->cache, bin->key);
+   if (entry == NULL) {
+  /* Take a reference for the cache */
+  anv_shader_bin_ref(bin);
+  _mesa_hash_table_insert(cache->cache, bin->key, bin);
+   }
+
+   pthread_mutex_unlock(>mutex);
+}
+
  static struct anv_shader_bin *
  anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache,
   const void *key_data, uint32_t key_size,
@@ -540,7 +561,38 @@ anv_device_search_for_kernel(struct anv_device *device,
   struct anv_pipeline_cache *cache,
   const void *key_data, uint32_t key_size)
  {
-   return cache ? anv_pipeline_cache_search(cache, key_data, key_size) : NULL;
+   struct anv_shader_bin *bin;
+
+   if (cache) {
+  bin = anv_pipeline_cache_search(cache, key_data, key_size);
+  if (bin)
+ return bin;
+   }
+
+#ifdef ENABLE_SHADER_CACHE
+   struct disk_cache *disk_cache = device->instance->physicalDevice.disk_cache;
+   if (disk_cache) {
+  cache_key cache_key;
+  disk_cache_compute_key(disk_cache, key_data, key_size, cache_key);
+
+  size_t buffer_size;
+  uint8_t *buffer = disk_cache_get(disk_cache, 

Re: [Mesa-dev] [PATCH v2 6/9] nir: Add a large constants optimization pass

2018-06-29 Thread Jason Ekstrand
On Fri, Jun 29, 2018 at 9:56 PM, Timothy Arceri 
wrote:

> On 30/06/18 10:13, Jason Ekstrand wrote:
>
>> This pass searches for reasonably large local variables which can be
>> statically proven to be constant and moves them into shader constant
>> data.  This is especially useful when large tables are baked into the
>> shader source code because they can be moved into a UBO by the driver to
>> reduce register pressure and make indirect access cheaper.
>>
>> v2 (Jason Ekstrand):
>>   - Use a size/align function to ensure we get the right alignments
>>   - Use the newly added deref offset helpers
>> ---
>>   src/compiler/Makefile.sources  |   1 +
>>   src/compiler/nir/meson.build   |   1 +
>>   src/compiler/nir/nir.h |   4 +
>>   src/compiler/nir/nir_opt_large_constants.c | 301 +
>>   4 files changed, 307 insertions(+)
>>   create mode 100644 src/compiler/nir/nir_opt_large_constants.c
>>
>> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.source
>> s
>> index 0fcbc5c5c5b..9e3fbdc2612 100644
>> --- a/src/compiler/Makefile.sources
>> +++ b/src/compiler/Makefile.sources
>> @@ -276,6 +276,7 @@ NIR_FILES = \
>> nir/nir_opt_if.c \
>> nir/nir_opt_intrinsics.c \
>> nir/nir_opt_loop_unroll.c \
>> +   nir/nir_opt_large_constants.c \
>> nir/nir_opt_move_comparisons.c \
>> nir/nir_opt_move_load_ubo.c \
>> nir/nir_opt_peephole_select.c \
>> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
>> index eb7fb7b121e..28aa8de7014 100644
>> --- a/src/compiler/nir/meson.build
>> +++ b/src/compiler/nir/meson.build
>> @@ -160,6 +160,7 @@ files_libnir = files(
>> 'nir_opt_global_to_local.c',
>> 'nir_opt_if.c',
>> 'nir_opt_intrinsics.c',
>> +  'nir_opt_large_constants.c',
>> 'nir_opt_loop_unroll.c',
>> 'nir_opt_move_comparisons.c',
>> 'nir_opt_move_load_ubo.c',
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index cc5f88d6f54..92ab3a699cc 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -2905,6 +2905,10 @@ bool nir_opt_if(nir_shader *shader);
>> bool nir_opt_intrinsics(nir_shader *shader);
>>   +bool nir_opt_large_constants(nir_shader *shader,
>> + glsl_type_size_align_func size_align,
>> + unsigned threshold);
>> +
>>   bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode
>> indirect_mask);
>> bool nir_opt_move_comparisons(nir_shader *shader);
>> diff --git a/src/compiler/nir/nir_opt_large_constants.c
>> b/src/compiler/nir/nir_opt_large_constants.c
>> new file mode 100644
>> index 000..027c6e8e5b5
>> --- /dev/null
>> +++ b/src/compiler/nir/nir_opt_large_constants.c
>> @@ -0,0 +1,301 @@
>> +/*
>> + * Copyright © 2018 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining
>> a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> next
>> + * paragraph) shall be included in all copies or substantial portions of
>> the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "nir.h"
>> +#include "nir_builder.h"
>> +#include "nir_deref.h"
>> +
>> +struct var_info {
>> +   bool is_constant;
>> +   bool found_read;
>> +};
>> +
>> +static nir_ssa_def *
>> +build_constant_load(nir_builder *b, nir_deref_instr *deref,
>> +glsl_type_size_align_func size_align)
>> +{
>> +   nir_variable *var = nir_deref_instr_get_variable(deref);
>> +
>> +   const unsigned bit_size = glsl_get_bit_size(deref->type);
>> +   const unsigned num_components = glsl_get_vector_elements(deref
>> ->type);
>> +
>> +   UNUSED unsigned var_size, var_align;
>> +   size_align(var->type, _size, _align);
>> +   assert(var->data.location % var_align == 0);
>> +
>> +   nir_intrinsic_instr *load =
>> +  nir_intrinsic_instr_create(b->shader,
>> nir_intrinsic_load_constant);
>> +   load->num_components = num_components;
>> +   nir_intrinsic_set_base(load, 

Re: [Mesa-dev] [PATCH] glsl/cache: save and restore ExternalSamplersUsed

2018-06-29 Thread Timothy Arceri

Reviewed-by: Timothy Arceri 

On 30/06/18 14:59, Marek Olšák wrote:

From: Marek Olšák 

Shaders that need special code for external samplers were broken if
they were loaded from the cache.

Cc: 18.1 
---
  src/compiler/glsl/serialize.cpp | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp
index 9c21453f91e..889038fb5e2 100644
--- a/src/compiler/glsl/serialize.cpp
+++ b/src/compiler/glsl/serialize.cpp
@@ -1037,20 +1037,21 @@ write_shader_metadata(struct blob *metadata, 
gl_linked_shader *shader)
  
 blob_write_bytes(metadata, glprog->TexturesUsed,

  sizeof(glprog->TexturesUsed));
 blob_write_uint64(metadata, glprog->SamplersUsed);
  
 blob_write_bytes(metadata, glprog->SamplerUnits,

  sizeof(glprog->SamplerUnits));
 blob_write_bytes(metadata, glprog->sh.SamplerTargets,
  sizeof(glprog->sh.SamplerTargets));
 blob_write_uint32(metadata, glprog->ShadowSamplers);
+   blob_write_uint32(metadata, glprog->ExternalSamplersUsed);
  
 blob_write_bytes(metadata, glprog->sh.ImageAccess,

  sizeof(glprog->sh.ImageAccess));
 blob_write_bytes(metadata, glprog->sh.ImageUnits,
  sizeof(glprog->sh.ImageUnits));
  
 size_t ptr_size = sizeof(GLvoid *);
  
 blob_write_uint32(metadata, glprog->sh.NumBindlessSamplers);

 blob_write_uint32(metadata, glprog->sh.HasBoundBindlessSampler);
@@ -1089,20 +1090,21 @@ read_shader_metadata(struct blob_reader *metadata,
  
 blob_copy_bytes(metadata, (uint8_t *) glprog->TexturesUsed,

 sizeof(glprog->TexturesUsed));
 glprog->SamplersUsed = blob_read_uint64(metadata);
  
 blob_copy_bytes(metadata, (uint8_t *) glprog->SamplerUnits,

 sizeof(glprog->SamplerUnits));
 blob_copy_bytes(metadata, (uint8_t *) glprog->sh.SamplerTargets,
 sizeof(glprog->sh.SamplerTargets));
 glprog->ShadowSamplers = blob_read_uint32(metadata);
+   glprog->ExternalSamplersUsed = blob_read_uint32(metadata);
  
 blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageAccess,

 sizeof(glprog->sh.ImageAccess));
 blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageUnits,
 sizeof(glprog->sh.ImageUnits));
  
 size_t ptr_size = sizeof(GLvoid *);
  
 glprog->sh.NumBindlessSamplers = blob_read_uint32(metadata);

 glprog->sh.HasBoundBindlessSampler = blob_read_uint32(metadata);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl/cache: save and restore ExternalSamplersUsed

2018-06-29 Thread Marek Olšák
From: Marek Olšák 

Shaders that need special code for external samplers were broken if
they were loaded from the cache.

Cc: 18.1 
---
 src/compiler/glsl/serialize.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp
index 9c21453f91e..889038fb5e2 100644
--- a/src/compiler/glsl/serialize.cpp
+++ b/src/compiler/glsl/serialize.cpp
@@ -1037,20 +1037,21 @@ write_shader_metadata(struct blob *metadata, 
gl_linked_shader *shader)
 
blob_write_bytes(metadata, glprog->TexturesUsed,
 sizeof(glprog->TexturesUsed));
blob_write_uint64(metadata, glprog->SamplersUsed);
 
blob_write_bytes(metadata, glprog->SamplerUnits,
 sizeof(glprog->SamplerUnits));
blob_write_bytes(metadata, glprog->sh.SamplerTargets,
 sizeof(glprog->sh.SamplerTargets));
blob_write_uint32(metadata, glprog->ShadowSamplers);
+   blob_write_uint32(metadata, glprog->ExternalSamplersUsed);
 
blob_write_bytes(metadata, glprog->sh.ImageAccess,
 sizeof(glprog->sh.ImageAccess));
blob_write_bytes(metadata, glprog->sh.ImageUnits,
 sizeof(glprog->sh.ImageUnits));
 
size_t ptr_size = sizeof(GLvoid *);
 
blob_write_uint32(metadata, glprog->sh.NumBindlessSamplers);
blob_write_uint32(metadata, glprog->sh.HasBoundBindlessSampler);
@@ -1089,20 +1090,21 @@ read_shader_metadata(struct blob_reader *metadata,
 
blob_copy_bytes(metadata, (uint8_t *) glprog->TexturesUsed,
sizeof(glprog->TexturesUsed));
glprog->SamplersUsed = blob_read_uint64(metadata);
 
blob_copy_bytes(metadata, (uint8_t *) glprog->SamplerUnits,
sizeof(glprog->SamplerUnits));
blob_copy_bytes(metadata, (uint8_t *) glprog->sh.SamplerTargets,
sizeof(glprog->sh.SamplerTargets));
glprog->ShadowSamplers = blob_read_uint32(metadata);
+   glprog->ExternalSamplersUsed = blob_read_uint32(metadata);
 
blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageAccess,
sizeof(glprog->sh.ImageAccess));
blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageUnits,
sizeof(glprog->sh.ImageUnits));
 
size_t ptr_size = sizeof(GLvoid *);
 
glprog->sh.NumBindlessSamplers = blob_read_uint32(metadata);
glprog->sh.HasBoundBindlessSampler = blob_read_uint32(metadata);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 6/9] nir: Add a large constants optimization pass

2018-06-29 Thread Timothy Arceri

On 30/06/18 10:13, Jason Ekstrand wrote:

This pass searches for reasonably large local variables which can be
statically proven to be constant and moves them into shader constant
data.  This is especially useful when large tables are baked into the
shader source code because they can be moved into a UBO by the driver to
reduce register pressure and make indirect access cheaper.

v2 (Jason Ekstrand):
  - Use a size/align function to ensure we get the right alignments
  - Use the newly added deref offset helpers
---
  src/compiler/Makefile.sources  |   1 +
  src/compiler/nir/meson.build   |   1 +
  src/compiler/nir/nir.h |   4 +
  src/compiler/nir/nir_opt_large_constants.c | 301 +
  4 files changed, 307 insertions(+)
  create mode 100644 src/compiler/nir/nir_opt_large_constants.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 0fcbc5c5c5b..9e3fbdc2612 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -276,6 +276,7 @@ NIR_FILES = \
nir/nir_opt_if.c \
nir/nir_opt_intrinsics.c \
nir/nir_opt_loop_unroll.c \
+   nir/nir_opt_large_constants.c \
nir/nir_opt_move_comparisons.c \
nir/nir_opt_move_load_ubo.c \
nir/nir_opt_peephole_select.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index eb7fb7b121e..28aa8de7014 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -160,6 +160,7 @@ files_libnir = files(
'nir_opt_global_to_local.c',
'nir_opt_if.c',
'nir_opt_intrinsics.c',
+  'nir_opt_large_constants.c',
'nir_opt_loop_unroll.c',
'nir_opt_move_comparisons.c',
'nir_opt_move_load_ubo.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index cc5f88d6f54..92ab3a699cc 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2905,6 +2905,10 @@ bool nir_opt_if(nir_shader *shader);
  
  bool nir_opt_intrinsics(nir_shader *shader);
  
+bool nir_opt_large_constants(nir_shader *shader,

+ glsl_type_size_align_func size_align,
+ unsigned threshold);
+
  bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
  
  bool nir_opt_move_comparisons(nir_shader *shader);

diff --git a/src/compiler/nir/nir_opt_large_constants.c 
b/src/compiler/nir/nir_opt_large_constants.c
new file mode 100644
index 000..027c6e8e5b5
--- /dev/null
+++ b/src/compiler/nir/nir_opt_large_constants.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+
+struct var_info {
+   bool is_constant;
+   bool found_read;
+};
+
+static nir_ssa_def *
+build_constant_load(nir_builder *b, nir_deref_instr *deref,
+glsl_type_size_align_func size_align)
+{
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   const unsigned bit_size = glsl_get_bit_size(deref->type);
+   const unsigned num_components = glsl_get_vector_elements(deref->type);
+
+   UNUSED unsigned var_size, var_align;
+   size_align(var->type, _size, _align);
+   assert(var->data.location % var_align == 0);
+
+   nir_intrinsic_instr *load =
+  nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_constant);
+   load->num_components = num_components;
+   nir_intrinsic_set_base(load, var->data.location);
+   nir_intrinsic_set_range(load, var_size);
+   load->src[0] = nir_src_for_ssa(nir_build_deref_offset(b, deref, 
size_align));
+   nir_ssa_dest_init(>instr, >dest,
+ num_components, bit_size, NULL);
+   nir_builder_instr_insert(b, >instr);
+
+   return >dest.ssa;
+}
+
+static void
+handle_constant_store(nir_builder *b, nir_intrinsic_instr *store,
+  glsl_type_size_align_func 

[Mesa-dev] [PATCH] ac: move all LLVM module initialization into ac_create_module

2018-06-29 Thread Marek Olšák
From: Marek Olšák 

This removes some ugly code around module initialization.
---
Dave, please rebase your code on top of this, and we don't need triple
and data_layout in ac_llvm_compiler_info.

 src/amd/common/ac_llvm_helper.cpp  | 10 ++
 src/amd/common/ac_llvm_util.h  |  1 +
 src/amd/vulkan/radv_nir_to_llvm.c  | 12 ++--
 src/gallium/drivers/radeonsi/si_pipe.c | 14 +++---
 src/gallium/drivers/radeonsi/si_shader.h   |  2 --
 .../drivers/radeonsi/si_shader_tgsi_setup.c|  5 +
 6 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/src/amd/common/ac_llvm_helper.cpp 
b/src/amd/common/ac_llvm_helper.cpp
index 1a2aee3bc9a..495bd98da9c 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -54,20 +54,30 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
 {
return LLVMGetCalledValue(call);
 }
 
 bool ac_llvm_is_function(LLVMValueRef v)
 {
return LLVMGetValueKind(v) == LLVMFunctionValueKind;
 }
 
+LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
+{
+   llvm::TargetMachine *TM = reinterpret_cast(tm);
+   LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", 
ctx);
+
+   llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
+   llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
+   return module;
+}
+
 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
 enum ac_float_mode float_mode)
 {
LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
 
llvm::FastMathFlags flags;
 
switch (float_mode) {
case AC_FLOAT_MODE_DEFAULT:
break;
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 0aa803c5bc1..6e6d15bb56c 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -76,20 +76,21 @@ LLVMTargetRef ac_get_llvm_target(const char *triple);
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
 void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
   int attr_idx, enum ac_func_attr attr);
 void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
unsigned attrib_mask);
 void ac_dump_module(LLVMModuleRef module);
 
 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
 bool ac_llvm_is_function(LLVMValueRef v);
+LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
 
 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
 enum ac_float_mode float_mode);
 
 void
 ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
 const char *name, unsigned value);
 
 static inline unsigned
 ac_get_load_intr_attribs(bool can_speculate)
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index cd8d86603bc..ce6d5e1547d 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3158,28 +3158,21 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
const struct radv_nir_compiler_options 
*options)
 {
struct radv_shader_context ctx = {0};
unsigned i;
ctx.options = options;
ctx.shader_info = shader_info;
ctx.context = LLVMContextCreate();
 
ac_llvm_context_init(, ctx.context, options->chip_class,
 options->family);
-   ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", 
ctx.context);
-   LLVMSetTarget(ctx.ac.module, options->supports_spill ? 
"amdgcn-mesa-mesa3d" : "amdgcn--");
-
-   LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
-   char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
-   LLVMSetDataLayout(ctx.ac.module, data_layout_str);
-   LLVMDisposeTargetData(data_layout);
-   LLVMDisposeMessage(data_layout_str);
+   ctx.ac.module = ac_create_module(tm, ctx.context);
 
enum ac_float_mode float_mode =
options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
   AC_FLOAT_MODE_DEFAULT;
 
ctx.ac.builder = ac_create_builder(ctx.context, float_mode);
 
memset(shader_info, 0, sizeof(*shader_info));
 
for(int i = 0; i < shader_count; ++i)
@@ -3606,24 +3599,23 @@ radv_compile_gs_copy_shader(LLVMTargetMachineRef tm,
struct radv_shader_variant_info *shader_info,
const struct radv_nir_compiler_options *options)
 {
struct radv_shader_context ctx = {0};
ctx.context = LLVMContextCreate();
ctx.options = options;
ctx.shader_info = shader_info;
 
ac_llvm_context_init(, 

Re: [Mesa-dev] [PATCH 11/11] ac/radv: using tls to store llvm related info and speed up compiles (v3)

2018-06-29 Thread Marek Olšák
I wonder if we can somehow make the TLS magic apply to RADV only.
Radeonsi can do it without TLS. Then, the RADV-specific TLS code can be
moved to RADV, and other code (if any) can be shared.

It doesn't make much sense to do the TLS initialization in
ac_llvm_compiler_init. It could be done in compile_to_memory_buffer
and the fail path there could be removed.

The call to LLVMTargetMachineEmitToMemoryBuffer can be removed.
There is no use for it. Things like if(radv) and if(radeonsi) don't
look good in the common code even if you don't write it exactly like
that.

Marek


On Tue, Jun 26, 2018 at 11:58 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> I'd like to encourage people to test this to see if it helps (like
> does it make app startup better or less hitching in dxvk).
>
> The basic idea is to store a bunch of LLVM related data structs
> in thread local storage so we can avoid reiniting them every time
> we compile a shader. Since we know llvm objects aren't thread safe
> it has to be stored using TLS to avoid any collisions.
>
> This should remove all the fixed overheads setup costs of creating
> the pass manager each time.
>
> This takes a demo app time to compile the radv meta shaders on nocache
> and exit from 1.7s to 1s.
>
> TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS
> works if you have radeonsi and radv loaded at the same time, if
> they'll magically try and use the same tls stuff, in which case
> this might explode all over the place.
>
> v2: fix llvm6 build, inline emit function, handle multiple targets
> in one thread
> v3: rebase and port onto new structure
> ---
>  src/amd/common/ac_llvm_helper.cpp | 120 --
>  src/amd/common/ac_llvm_util.c |  10 +--
>  src/amd/common/ac_llvm_util.h |   9 +++
>  src/amd/vulkan/radv_debug.h   |   1 +
>  src/amd/vulkan/radv_device.c  |   1 +
>  src/amd/vulkan/radv_shader.c  |   2 +
>  6 files changed, 132 insertions(+), 11 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_helper.cpp 
> b/src/amd/common/ac_llvm_helper.cpp
> index 27403dbe085..f1f1399b3fb 100644
> --- a/src/amd/common/ac_llvm_helper.cpp
> +++ b/src/amd/common/ac_llvm_helper.cpp
> @@ -31,12 +31,21 @@
>
>  #include "ac_llvm_util.h"
>  #include 
> -#include 
> -#include 
> -#include 
> -#include 
> +#include 
>  #include 
>  #include 
> +#include 
> +
> +#include 
> +#include 
> +#if HAVE_LLVM >= 0x0700
> +#include 
> +#endif
> +
> +#if HAVE_LLVM < 0x0700
> +#include "llvm/Support/raw_ostream.h"
> +#endif
> +#include 
>
>  void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
>  {
> @@ -101,11 +110,110 @@ 
> ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
> delete reinterpret_cast(library_info);
>  }
>
> +class ac_llvm_per_thread_info {
> +public:
> +   ac_llvm_per_thread_info(enum radeon_family arg_family,
> +   enum ac_target_machine_options arg_tm_options)
> +   : family(arg_family), tm_options(arg_tm_options),
> + OStream(CodeString) {}
> +   ~ac_llvm_per_thread_info() {
> +   ac_llvm_compiler_dispose_internal(_info);
> +   }
> +
> +   struct ac_llvm_compiler_info llvm_info;
> +   enum radeon_family family;
> +   enum ac_target_machine_options tm_options;
> +   llvm::SmallString<0> CodeString;
> +   llvm::raw_svector_ostream OStream;
> +   llvm::legacy::PassManager pass;
> +};
> +
> +/* we have to store a linked list per thread due to the possiblity of 
> multiple gpus being required */
> +static thread_local std::list 
> ac_llvm_per_thread_list;
> +
>  bool ac_compile_to_memory_buffer(struct ac_llvm_compiler_info *info,
>  LLVMModuleRef M,
>  char **ErrorMessage,
>  LLVMMemoryBufferRef *OutMemBuf)
>  {
> -   return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, 
> LLVMObjectFile,
> -  ErrorMessage, OutMemBuf);
> +   ac_llvm_per_thread_info *thread_info = nullptr;
> +   if (info->thread_stored) {
> +   for (auto  : ac_llvm_per_thread_list) {
> +   if (I.llvm_info.tm == info->tm) {
> +   thread_info = 
> +   break;
> +   }
> +   }
> +
> +   if (!thread_info) {
> +   assert(0);
> +   return false;
> +   }
> +   } else {
> +   return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, 
> LLVMObjectFile,
> +  ErrorMessage, 
> OutMemBuf);
> +   }
> +
> +   llvm::TargetMachine *TM = 
> reinterpret_cast(thread_info->llvm_info.tm);
> +   llvm::Module *Mod = llvm::unwrap(M);
> +   llvm::StringRef Data;
> +
> +   Mod->setDataLayout(TM->createDataLayout());
> +
> 

Re: [Mesa-dev] [PATCH 09/11] radeonsi: port to shared ac llvm per-thread structs.

2018-06-29 Thread Marek Olšák
On Tue, Jun 26, 2018 at 11:58 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> The new structs are pretty much based on the radeonsi code,
> so it just ports over the uses to them.
> ---
>  src/gallium/drivers/radeonsi/si_compute.c |  2 +-
>  src/gallium/drivers/radeonsi/si_pipe.c| 50 ---
>  src/gallium/drivers/radeonsi/si_pipe.h|  6 +--
>  src/gallium/drivers/radeonsi/si_shader.c  | 24 -
>  src/gallium/drivers/radeonsi/si_shader.h  | 18 ++-
>  .../drivers/radeonsi/si_shader_internal.h |  6 +--
>  .../drivers/radeonsi/si_shader_tgsi_setup.c   |  9 ++--
>  .../drivers/radeonsi/si_state_shaders.c   |  4 +-
>  8 files changed, 40 insertions(+), 79 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index cb320323db3..e8ad6ce0e16 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -86,7 +86,7 @@ static void si_create_compute_state_async(void *job, int 
> thread_index)
> struct si_compute *program = (struct si_compute *)job;
> struct si_shader *shader = >shader;
> struct si_shader_selector sel;
> -   struct si_compiler *compiler;
> +   struct ac_llvm_compiler_info *compiler;
> struct pipe_debug_callback *debug = 
> >compiler_ctx_state.debug;
>
> assert(!debug->debug_message || debug->async);
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index aca61670765..e4906a479c9 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -105,51 +105,21 @@ static const struct debug_named_value debug_options[] = 
> {
>  };
>
>  static void si_init_compiler(struct si_screen *sscreen,
> -struct si_compiler *compiler)
> +struct ac_llvm_compiler_info *compiler)
>  {
> enum ac_target_machine_options tm_options =
> (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
> (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK 
> : 0) |
> (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK 
> : 0) |
> -   (!sscreen->llvm_has_working_vgpr_indexing ? 
> AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0);
> +   (!sscreen->llvm_has_working_vgpr_indexing ? 
> AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
> +   (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0);
>
> ac_init_llvm_once();
> -   compiler->tm = ac_create_target_machine(sscreen->info.family,
> -   tm_options, 
> >triple);
> -   if (!compiler->tm)
> -   return;
> -
> -   compiler->target_library_info =
> -   gallivm_create_target_library_info(compiler->triple);
> -   if (!compiler->target_library_info)
> -   return;
> -
> -   compiler->passmgr = ac_init_passmgr(compiler->target_library_info,
> -   (sscreen->debug_flags & 
> DBG(CHECK_IR)));
> -   if (!compiler->passmgr)
> -   return;
>
> -   /* Get the data layout. */
> -   LLVMTargetDataRef data_layout = 
> LLVMCreateTargetDataLayout(compiler->tm);
> -   if (!data_layout)
> -   return;
> -   compiler->data_layout = LLVMCopyStringRepOfTargetData(data_layout);
> -   LLVMDisposeTargetData(data_layout);
> -}
> -
> -static void si_destroy_compiler(struct si_compiler *compiler)
> -{
> -   if (compiler->data_layout)
> -   LLVMDisposeMessage((char*)compiler->data_layout);
> -   if (compiler->passmgr)
> -   LLVMDisposePassManager(compiler->passmgr);
> -#if HAVE_LLVM >= 0x0700
> -   /* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it 
> there. */
> -   if (compiler->target_library_info)
> -   
> gallivm_dispose_target_library_info(compiler->target_library_info);
> -#endif
> -   if (compiler->tm)
> -   LLVMDisposeTargetMachine(compiler->tm);
> +   ac_llvm_compiler_init(compiler,
> + true,
> + sscreen->info.family,
> + tm_options);
>  }
>
>  /*
> @@ -250,7 +220,7 @@ static void si_destroy_context(struct pipe_context 
> *context)
> sctx->ws->fence_reference(>last_sdma_fence, NULL);
> r600_resource_reference(>eop_bug_scratch, NULL);
>
> -   si_destroy_compiler(>compiler);
> +   ac_llvm_compiler_dispose(>compiler);
>
> si_saved_cs_reference(>current_saved_cs, NULL);
>
> @@ -659,10 +629,10 @@ static void si_destroy_screen(struct pipe_screen* 
> pscreen)
> util_queue_destroy(>shader_compiler_queue_low_priority);
>
> for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++)
> -   si_destroy_compiler(>compiler[i]);
> +   

[Mesa-dev] [PATCH 0/6] anv: Add support for an on-disk transparent pipeline

2018-06-29 Thread Jason Ekstrand
I've been resisting this for a long time and I'm still a bit grumpy about
it but I think Pierre-Loup has convinced me that it really is needed for
steam shader caching.  This series gives apps that don't use a pipeline
cache a default in-memory cache as well as backing them with the disk cache
in case they don't save their pipeline cache off.  Fortunately, it's pretty
straightforward and non-invasive to add disk cache support.

Goodby explicit pipeline caching, you will be missed...

Jason Ekstrand (6):
  anv: Be more careful about hashing pipeline layouts
  anv: Use a default pipeline cache if none is specified
  anv/pipeline: Stop optimizing for not having a cache
  anv: Add device-level helpers for searching for and uploading kernels
  anv/pipeline_cache: Add a _locked suffix to a function
  anv: Add support for the on-disk shader cache

 src/intel/vulkan/anv_blorp.c  |  12 +-
 src/intel/vulkan/anv_descriptor_set.c |  41 +-
 src/intel/vulkan/anv_device.c |  43 ++
 src/intel/vulkan/anv_pipeline.c   | 184 +++---
 src/intel/vulkan/anv_pipeline_cache.c | 151 +
 src/intel/vulkan/anv_private.h|  23 +++-
 src/intel/vulkan/genX_pipeline.c  |   8 ++
 7 files changed, 318 insertions(+), 144 deletions(-)

-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] anv: Add support for the on-disk shader cache

2018-06-29 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_device.c | 36 ++
 src/intel/vulkan/anv_pipeline_cache.c | 98 ---
 src/intel/vulkan/anv_private.h|  3 +
 3 files changed, 126 insertions(+), 11 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index a864c702c3f..ca6e1c0cace 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -35,6 +35,7 @@
 #include "util/strtod.h"
 #include "util/debug.h"
 #include "util/build_id.h"
+#include "util/disk_cache.h"
 #include "util/mesa-sha1.h"
 #include "vk_util.h"
 #include "common/gen_defines.h"
@@ -233,6 +234,8 @@ anv_physical_device_init_uuids(struct anv_physical_device 
*device)
"build-id too short.  It needs to be a SHA");
}
 
+   memcpy(device->driver_build_sha1, build_id_data(note), 20);
+
struct mesa_sha1 sha1_ctx;
uint8_t sha1[20];
STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
@@ -271,6 +274,35 @@ anv_physical_device_init_uuids(struct anv_physical_device 
*device)
return VK_SUCCESS;
 }
 
+static void
+anv_physical_device_init_disk_cache(struct anv_physical_device *device)
+{
+#ifdef ENABLE_SHADER_CACHE
+   char renderer[9];
+   MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
+   device->chipset_id);
+   assert(len == sizeof(renderer) - 1);
+
+   char timestamp[41];
+   _mesa_sha1_format(timestamp, device->driver_build_sha1);
+
+   device->disk_cache = disk_cache_create(renderer, timestamp, 0);
+#else
+   device->disk_cache = NULL;
+#endif
+}
+
+static void
+anv_physical_device_free_disk_cache(struct anv_physical_device *device)
+{
+#ifdef ENABLE_SHADER_CACHE
+   if (device->disk_cache)
+  disk_cache_destroy(device->disk_cache);
+#else
+   assert(device->disk_cache == NULL);
+#endif
+}
+
 static VkResult
 anv_physical_device_init(struct anv_physical_device *device,
  struct anv_instance *instance,
@@ -442,6 +474,8 @@ anv_physical_device_init(struct anv_physical_device *device,
if (result != VK_SUCCESS)
   goto fail;
 
+   anv_physical_device_init_disk_cache(device);
+
if (instance->enabled_extensions.KHR_display) {
   master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
   if (master_fd >= 0) {
@@ -459,6 +493,7 @@ anv_physical_device_init(struct anv_physical_device *device,
result = anv_init_wsi(device);
if (result != VK_SUCCESS) {
   ralloc_free(device->compiler);
+  anv_physical_device_free_disk_cache(device);
   goto fail;
}
 
@@ -481,6 +516,7 @@ static void
 anv_physical_device_finish(struct anv_physical_device *device)
 {
anv_finish_wsi(device);
+   anv_physical_device_free_disk_cache(device);
ralloc_free(device->compiler);
close(device->local_fd);
if (device->master_fd >= 0)
diff --git a/src/intel/vulkan/anv_pipeline_cache.c 
b/src/intel/vulkan/anv_pipeline_cache.c
index e57cd1c75c6..d4c7262dc05 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -24,6 +24,8 @@
 #include "compiler/blob.h"
 #include "util/hash_table.h"
 #include "util/debug.h"
+#include "util/disk_cache.h"
+#include "util/mesa-sha1.h"
 #include "anv_private.h"
 
 struct anv_shader_bin *
@@ -280,6 +282,25 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
return shader;
 }
 
+static void
+anv_pipeline_cache_add_shader_bin(struct anv_pipeline_cache *cache,
+  struct anv_shader_bin *bin)
+{
+   if (!cache->cache)
+  return;
+
+   pthread_mutex_lock(>mutex);
+
+   struct hash_entry *entry = _mesa_hash_table_search(cache->cache, bin->key);
+   if (entry == NULL) {
+  /* Take a reference for the cache */
+  anv_shader_bin_ref(bin);
+  _mesa_hash_table_insert(cache->cache, bin->key, bin);
+   }
+
+   pthread_mutex_unlock(>mutex);
+}
+
 static struct anv_shader_bin *
 anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache,
  const void *key_data, uint32_t key_size,
@@ -540,7 +561,38 @@ anv_device_search_for_kernel(struct anv_device *device,
  struct anv_pipeline_cache *cache,
  const void *key_data, uint32_t key_size)
 {
-   return cache ? anv_pipeline_cache_search(cache, key_data, key_size) : NULL;
+   struct anv_shader_bin *bin;
+
+   if (cache) {
+  bin = anv_pipeline_cache_search(cache, key_data, key_size);
+  if (bin)
+ return bin;
+   }
+
+#ifdef ENABLE_SHADER_CACHE
+   struct disk_cache *disk_cache = device->instance->physicalDevice.disk_cache;
+   if (disk_cache) {
+  cache_key cache_key;
+  disk_cache_compute_key(disk_cache, key_data, key_size, cache_key);
+
+  size_t buffer_size;
+  uint8_t *buffer = disk_cache_get(disk_cache, cache_key, _size);
+  if (buffer) {
+ struct blob_reader blob;
+ blob_reader_init(, buffer, buffer_size);
+ bin = 

[Mesa-dev] [PATCH 2/6] anv: Use a default pipeline cache if none is specified

2018-06-29 Thread Jason Ekstrand
If a client is dumb enough to not specify a pipeline cache, give it a
default.  We have to create one anyway for blorp so we may as well let
the client cache shaders in it.
---
 src/intel/vulkan/anv_blorp.c  | 12 +---
 src/intel/vulkan/anv_device.c |  7 +++
 src/intel/vulkan/anv_pipeline_cache.c | 12 ++--
 src/intel/vulkan/anv_private.h|  4 +++-
 src/intel/vulkan/genX_pipeline.c  |  8 
 5 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 4dbfb7a83fd..8e6d7db6e40 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -30,11 +30,11 @@ lookup_blorp_shader(struct blorp_context *blorp,
 {
struct anv_device *device = blorp->driver_ctx;
 
-   /* The blorp cache must be a real cache */
-   assert(device->blorp_shader_cache.cache);
+   /* The default cache must be a real cache */
+   assert(device->default_pipeline_cache.cache);
 
struct anv_shader_bin *bin =
-  anv_pipeline_cache_search(>blorp_shader_cache, key, key_size);
+  anv_pipeline_cache_search(>default_pipeline_cache, key, 
key_size);
if (!bin)
   return false;
 
@@ -60,7 +60,7 @@ upload_blorp_shader(struct blorp_context *blorp,
struct anv_device *device = blorp->driver_ctx;
 
/* The blorp cache must be a real cache */
-   assert(device->blorp_shader_cache.cache);
+   assert(device->default_pipeline_cache.cache);
 
struct anv_pipeline_bind_map bind_map = {
   .surface_count = 0,
@@ -68,7 +68,7 @@ upload_blorp_shader(struct blorp_context *blorp,
};
 
struct anv_shader_bin *bin =
-  anv_pipeline_cache_upload_kernel(>blorp_shader_cache,
+  anv_pipeline_cache_upload_kernel(>default_pipeline_cache,
key, key_size, kernel, kernel_size,
NULL, 0,
prog_data, prog_data_size, _map);
@@ -90,7 +90,6 @@ upload_blorp_shader(struct blorp_context *blorp,
 void
 anv_device_init_blorp(struct anv_device *device)
 {
-   anv_pipeline_cache_init(>blorp_shader_cache, device, true);
blorp_init(>blorp, device, >isl_dev);
device->blorp.compiler = device->instance->physicalDevice.compiler;
device->blorp.lookup_shader = lookup_blorp_shader;
@@ -124,7 +123,6 @@ void
 anv_device_finish_blorp(struct anv_device *device)
 {
blorp_finish(>blorp);
-   anv_pipeline_cache_finish(>blorp_shader_cache);
 }
 
 static void
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 077f5c16e46..a864c702c3f 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -604,6 +604,9 @@ VkResult anv_CreateInstance(
   return vk_error(result);
}
 
+   instance->pipeline_cache_enabled =
+  env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
+
_mesa_locale_init();
 
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
@@ -1728,6 +1731,8 @@ VkResult anv_CreateDevice(
if (result != VK_SUCCESS)
   goto fail_workaround_bo;
 
+   anv_pipeline_cache_init(>default_pipeline_cache, device, true);
+
anv_device_init_blorp(device);
 
anv_device_init_border_colors(device);
@@ -1778,6 +1783,8 @@ void anv_DestroyDevice(
 
anv_device_finish_blorp(device);
 
+   anv_pipeline_cache_finish(>default_pipeline_cache);
+
anv_queue_finish(>queue);
 
 #ifdef HAVE_VALGRIND
diff --git a/src/intel/vulkan/anv_pipeline_cache.c 
b/src/intel/vulkan/anv_pipeline_cache.c
index 07b745b9c7a..5262753f725 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -394,15 +394,6 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
}
 }
 
-static bool
-pipeline_cache_enabled()
-{
-   static int enabled = -1;
-   if (enabled < 0)
-  enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
-   return enabled;
-}
-
 VkResult anv_CreatePipelineCache(
 VkDevice_device,
 const VkPipelineCacheCreateInfo*pCreateInfo,
@@ -421,7 +412,8 @@ VkResult anv_CreatePipelineCache(
if (cache == NULL)
   return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
+   anv_pipeline_cache_init(cache, device,
+   device->instance->pipeline_cache_enabled);
 
if (pCreateInfo->initialDataSize > 0)
   anv_pipeline_cache_load(cache,
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 139c48b7e46..4fa23357dd6 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -891,6 +891,8 @@ struct anv_instance {
 int physicalDeviceCount;
 struct anv_physical_device  physicalDevice;
 
+boolpipeline_cache_enabled;
+
 struct vk_debug_report_instance 

[Mesa-dev] [PATCH 5/6] anv/pipeline_cache: Add a _locked suffix to a function

2018-06-29 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_pipeline_cache.c | 29 ++-
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline_cache.c 
b/src/intel/vulkan/anv_pipeline_cache.c
index 2bc64f5bdd1..e57cd1c75c6 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -281,15 +281,16 @@ anv_pipeline_cache_search(struct anv_pipeline_cache 
*cache,
 }
 
 static struct anv_shader_bin *
-anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
-  const void *key_data, uint32_t key_size,
-  const void *kernel_data, uint32_t kernel_size,
-  const void *constant_data,
-  uint32_t constant_data_size,
-  const struct brw_stage_prog_data *prog_data,
-  uint32_t prog_data_size,
-  const void *prog_data_param,
-  const struct anv_pipeline_bind_map *bind_map)
+anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache,
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data,
+ uint32_t kernel_size,
+ const void *constant_data,
+ uint32_t constant_data_size,
+ const struct brw_stage_prog_data 
*prog_data,
+ uint32_t prog_data_size,
+ const void *prog_data_param,
+ const struct anv_pipeline_bind_map 
*bind_map)
 {
struct anv_shader_bin *shader =
   anv_pipeline_cache_search_locked(cache, key_data, key_size);
@@ -324,11 +325,11 @@ anv_pipeline_cache_upload_kernel(struct 
anv_pipeline_cache *cache,
   pthread_mutex_lock(>mutex);
 
   struct anv_shader_bin *bin =
- anv_pipeline_cache_add_shader(cache, key_data, key_size,
-   kernel_data, kernel_size,
-   constant_data, constant_data_size,
-   prog_data, prog_data_size,
-   prog_data->param, bind_map);
+ anv_pipeline_cache_add_shader_locked(cache, key_data, key_size,
+  kernel_data, kernel_size,
+  constant_data, 
constant_data_size,
+  prog_data, prog_data_size,
+  prog_data->param, bind_map);
 
   pthread_mutex_unlock(>mutex);
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] anv/pipeline: Stop optimizing for not having a cache

2018-06-29 Thread Jason Ekstrand
Before, we were only hashing the shader if we had a shader cache to
cache things in.  This means that if we ever get it wrong, we could end
up trying to cache a shader with an undefined hash.  Since not having a
shader cache is an extremely uncommon case, let's optimize for code
clarity and obvious correctness over avoiding a hash operation.
---
 src/intel/vulkan/anv_pipeline.c | 65 -
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index dc3b4a0e3ba..e97df58d554 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -527,18 +527,17 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
   pipeline->device->instance->physicalDevice.compiler;
struct brw_vs_prog_key key;
struct anv_shader_bin *bin = NULL;
-   unsigned char sha1[20];
 
populate_vs_prog_key(>device->info, );
 
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 
-   if (cache) {
-  anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
-   MESA_SHADER_VERTEX, spec_info,
-   , sizeof(key), sha1);
+   unsigned char sha1[20];
+   anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
+MESA_SHADER_VERTEX, spec_info,
+, sizeof(key), sha1);
+   if (cache)
   bin = anv_pipeline_cache_search(cache, sha1, 20);
-   }
 
if (bin == NULL) {
   struct brw_vs_prog_data prog_data = {};
@@ -653,8 +652,6 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
struct brw_tes_prog_key tes_key = {};
struct anv_shader_bin *tcs_bin = NULL;
struct anv_shader_bin *tes_bin = NULL;
-   unsigned char tcs_sha1[40];
-   unsigned char tes_sha1[40];
 
populate_sampler_prog_key(>device->info, _key.tex);
populate_sampler_prog_key(>device->info, _key.tex);
@@ -662,15 +659,18 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline 
*pipeline,
 
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 
+   unsigned char tcs_sha1[40];
+   unsigned char tes_sha1[40];
+   anv_pipeline_hash_shader(pipeline, layout, tcs_module, tcs_entrypoint,
+MESA_SHADER_TESS_CTRL, tcs_spec_info,
+_key, sizeof(tcs_key), tcs_sha1);
+   anv_pipeline_hash_shader(pipeline, layout, tes_module, tes_entrypoint,
+MESA_SHADER_TESS_EVAL, tes_spec_info,
+_key, sizeof(tes_key), tes_sha1);
+   memcpy(_sha1[20], tes_sha1, 20);
+   memcpy(_sha1[20], tcs_sha1, 20);
+
if (cache) {
-  anv_pipeline_hash_shader(pipeline, layout, tcs_module, tcs_entrypoint,
-   MESA_SHADER_TESS_CTRL, tcs_spec_info,
-   _key, sizeof(tcs_key), tcs_sha1);
-  anv_pipeline_hash_shader(pipeline, layout, tes_module, tes_entrypoint,
-   MESA_SHADER_TESS_EVAL, tes_spec_info,
-   _key, sizeof(tes_key), tes_sha1);
-  memcpy(_sha1[20], tes_sha1, 20);
-  memcpy(_sha1[20], tcs_sha1, 20);
   tcs_bin = anv_pipeline_cache_search(cache, tcs_sha1, sizeof(tcs_sha1));
   tes_bin = anv_pipeline_cache_search(cache, tes_sha1, sizeof(tes_sha1));
}
@@ -802,18 +802,17 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
   pipeline->device->instance->physicalDevice.compiler;
struct brw_gs_prog_key key;
struct anv_shader_bin *bin = NULL;
-   unsigned char sha1[20];
 
populate_gs_prog_key(>device->info, );
 
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 
-   if (cache) {
-  anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
-   MESA_SHADER_GEOMETRY, spec_info,
-   , sizeof(key), sha1);
+   unsigned char sha1[20];
+   anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
+MESA_SHADER_GEOMETRY, spec_info,
+, sizeof(key), sha1);
+   if (cache)
   bin = anv_pipeline_cache_search(cache, sha1, 20);
-   }
 
if (bin == NULL) {
   struct brw_gs_prog_data prog_data = {};
@@ -884,18 +883,17 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
   pipeline->device->instance->physicalDevice.compiler;
struct brw_wm_prog_key key;
struct anv_shader_bin *bin = NULL;
-   unsigned char sha1[20];
 
populate_wm_prog_key(pipeline, info, );
 
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 
-   if (cache) {
-  anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
-   MESA_SHADER_FRAGMENT, spec_info,
-   , sizeof(key), sha1);
+   unsigned char sha1[20];
+   anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
+MESA_SHADER_FRAGMENT, spec_info,
+  

[Mesa-dev] [PATCH 4/6] anv: Add device-level helpers for searching for and uploading kernels

2018-06-29 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_pipeline.c   | 127 ++
 src/intel/vulkan/anv_pipeline_cache.c |  34 +++
 src/intel/vulkan/anv_private.h|  16 
 3 files changed, 98 insertions(+), 79 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index e97df58d554..2a36f2e6bc1 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -480,33 +480,6 @@ anv_fill_binding_table(struct brw_stage_prog_data 
*prog_data, unsigned bias)
prog_data->binding_table.image_start = bias;
 }
 
-static struct anv_shader_bin *
-anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
-   struct anv_pipeline_cache *cache,
-   const void *key_data, uint32_t key_size,
-   const void *kernel_data, uint32_t kernel_size,
-   const void *constant_data,
-   uint32_t constant_data_size,
-   const struct brw_stage_prog_data *prog_data,
-   uint32_t prog_data_size,
-   const struct anv_pipeline_bind_map *bind_map)
-{
-   if (cache) {
-  return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
-  kernel_data, kernel_size,
-  constant_data, 
constant_data_size,
-  prog_data, prog_data_size,
-  bind_map);
-   } else {
-  return anv_shader_bin_create(pipeline->device, key_data, key_size,
-   kernel_data, kernel_size,
-   constant_data, constant_data_size,
-   prog_data, prog_data_size,
-   prog_data->param, bind_map);
-   }
-}
-
-
 static void
 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
 gl_shader_stage stage,
@@ -536,8 +509,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
 MESA_SHADER_VERTEX, spec_info,
 , sizeof(key), sha1);
-   if (cache)
-  bin = anv_pipeline_cache_search(cache, sha1, 20);
+   bin = anv_device_search_for_kernel(pipeline->device, cache, sha1, 20);
 
if (bin == NULL) {
   struct brw_vs_prog_data prog_data = {};
@@ -576,12 +548,12 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
   }
 
   unsigned code_size = prog_data.base.base.program_size;
-  bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
-   shader_code, code_size,
-   nir->constant_data,
-   nir->constant_data_size,
-   _data.base.base, sizeof(prog_data),
-   );
+  bin = anv_device_upload_kernel(pipeline->device, cache, sha1, 20,
+ shader_code, code_size,
+ nir->constant_data,
+ nir->constant_data_size,
+ _data.base.base, sizeof(prog_data),
+ );
   if (!bin) {
  ralloc_free(mem_ctx);
  return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
@@ -670,10 +642,10 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline 
*pipeline,
memcpy(_sha1[20], tes_sha1, 20);
memcpy(_sha1[20], tcs_sha1, 20);
 
-   if (cache) {
-  tcs_bin = anv_pipeline_cache_search(cache, tcs_sha1, sizeof(tcs_sha1));
-  tes_bin = anv_pipeline_cache_search(cache, tes_sha1, sizeof(tes_sha1));
-   }
+   tcs_bin = anv_device_search_for_kernel(pipeline->device, cache,
+  tcs_sha1, sizeof(tcs_sha1));
+   tes_bin = anv_device_search_for_kernel(pipeline->device, cache,
+  tes_sha1, sizeof(tes_sha1));
 
if (tcs_bin == NULL || tes_bin == NULL) {
   struct brw_tcs_prog_data tcs_prog_data = {};
@@ -745,14 +717,14 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline 
*pipeline,
   }
 
   unsigned code_size = tcs_prog_data.base.base.program_size;
-  tcs_bin = anv_pipeline_upload_kernel(pipeline, cache,
-   tcs_sha1, sizeof(tcs_sha1),
-   shader_code, code_size,
-   tcs_nir->constant_data,
-   tcs_nir->constant_data_size,
-   _prog_data.base.base,
-   sizeof(tcs_prog_data),
-   _map);
+  tcs_bin = 

[Mesa-dev] [PATCH 1/6] anv: Be more careful about hashing pipeline layouts

2018-06-29 Thread Jason Ekstrand
Previously, we just hashed the entire descriptor set layout verbatim.
This meant that a bunch of extra stuff such as pointers and reference
counts made its way into the cache.  It also meant that we weren't
properly hashing in the Y'CbCr conversion information information from
bound immutable samplers.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/intel/vulkan/anv_descriptor_set.c | 41 +--
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/intel/vulkan/anv_descriptor_set.c 
b/src/intel/vulkan/anv_descriptor_set.c
index 9534ba81cdb..8f7f1f3ba38 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -257,13 +257,48 @@ void anv_DestroyDescriptorSetLayout(
anv_descriptor_set_layout_unref(device, set_layout);
 }
 
+#define SHA1_UPDATE_VALUE(ctx, x) _mesa_sha1_update(ctx, &(x), sizeof(x));
+
+static void
+sha1_update_immutable_sampler(struct mesa_sha1 *ctx,
+  const struct anv_sampler *sampler)
+{
+   if (!sampler->conversion)
+  return;
+
+   /* The only thing that affects the shader is ycbcr conversion */
+   _mesa_sha1_update(ctx, sampler->conversion,
+ sizeof(*sampler->conversion));
+}
+
+static void
+sha1_update_descriptor_set_binding_layout(struct mesa_sha1 *ctx,
+   const struct anv_descriptor_set_binding_layout *layout)
+{
+   SHA1_UPDATE_VALUE(ctx, layout->array_size);
+   SHA1_UPDATE_VALUE(ctx, layout->descriptor_index);
+   SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_index);
+   SHA1_UPDATE_VALUE(ctx, layout->buffer_index);
+   _mesa_sha1_update(ctx, layout->stage, sizeof(layout->stage));
+
+   if (layout->immutable_samplers) {
+  for (uint16_t i = 0; i < layout->array_size; i++)
+ sha1_update_immutable_sampler(ctx, layout->immutable_samplers[i]);
+   }
+}
+
 static void
 sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx,
   const struct anv_descriptor_set_layout 
*layout)
 {
-   size_t size = sizeof(*layout) +
- sizeof(layout->binding[0]) * layout->binding_count;
-   _mesa_sha1_update(ctx, layout, size);
+   SHA1_UPDATE_VALUE(ctx, layout->binding_count);
+   SHA1_UPDATE_VALUE(ctx, layout->size);
+   SHA1_UPDATE_VALUE(ctx, layout->shader_stages);
+   SHA1_UPDATE_VALUE(ctx, layout->buffer_count);
+   SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_count);
+
+   for (uint16_t i = 0; i < layout->binding_count; i++)
+  sha1_update_descriptor_set_binding_layout(ctx, >binding[i]);
 }
 
 /*
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/11] ac/radv: move llvm compiler info to struct and init in one place

2018-06-29 Thread Marek Olšák
On Tue, Jun 26, 2018 at 11:58 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This creates a common per-thread compiler info struct, and adds
> the init code to it. This is mostly ported from radeonsi.
>
> The common info struct is used in radv first and replaces the
> current code.
> ---
>  src/amd/common/ac_llvm_util.c | 50 +++
>  src/amd/common/ac_llvm_util.h | 14 +
>  src/amd/vulkan/radv_nir_to_llvm.c | 39 ++--
>  src/amd/vulkan/radv_private.h |  7 ++---
>  src/amd/vulkan/radv_shader.c  | 16 +-
>  5 files changed, 91 insertions(+), 35 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index dd2469d4606..85dc9d72a5c 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -188,6 +188,56 @@ LLVMPassManagerRef 
> ac_init_passmgr(LLVMTargetLibraryInfoRef target_library_info,
> return passmgr;
>  }
>
> +bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
> +  bool add_target_library_info,
> +  enum radeon_family family,
> +  enum ac_target_machine_options tm_options)
> +{
> +   memset(info, 0, sizeof(*info));
> +   info->tm = ac_create_target_machine(family, tm_options, 
> >triple);
> +   if (!info->tm)
> +   return false;
> +
> +   /* Get the data layout. */
> +   LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(info->tm);
> +   if (!data_layout)
> +   goto fail;
> +   info->data_layout = LLVMCopyStringRepOfTargetData(data_layout);
> +   LLVMDisposeTargetData(data_layout);
> +
> +#if HAVE_LLVM < 0x0700

This #if is not needed. You already have a bool flag coming from radv.
You can modify the bool value in radv.

> +   if (add_target_library_info)
> +#endif

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/11] ac/radv: split the non-common init_once code from the common target code.

2018-06-29 Thread Marek Olšák
On Tue, Jun 26, 2018 at 11:58 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This just splits out the non-shared code and reuses ac_get_llvm_target in 
> radv.
> ---
>  src/amd/common/ac_llvm_util.c  |  6 --
>  src/amd/common/ac_llvm_util.h  |  2 ++
>  src/amd/vulkan/radv_shader.c   | 20 
>  src/gallium/drivers/radeonsi/si_pipe.c |  1 +
>  4 files changed, 11 insertions(+), 18 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index be2d92b4c08..f33691dcc92 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -57,13 +57,15 @@ static void ac_init_llvm_target()
>
>  static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
>
> +void ac_init_llvm_once(void) {

LLVM-ism ({ not on the next line)

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] nvc0/ir: return 0 in imageLoad on incomplete textures

2018-06-29 Thread Karol Herbst
We already guarded all OP_SULDP against out of bound accesses, but those
ended up just reusing whatever value was stored in the dest registers.

fixes CTS test shader_image_load_store.incomplete_textures

v2: fix for loads not ending up with predicates (bindless_texture)

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 33 +--
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h   |  1 +
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 5723847234e..f55e9a34c59 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2180,13 +2180,36 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction 
*su)
}
 }
 
+void
+NVC0LoweringPass::insertOOBSurfaceOpResult(TexInstruction *su)
+{
+   if (!su->getPredicate())
+  return;
+
+   bld.setPosition(su, true);
+
+   for (unsigned i = 0; su->defExists(i); ++i) {
+  ValueDef  = su->def(i);
+
+  Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+  assert(su->cc == CC_NOT_P);
+  mov->setPredicate(CC_P, su->getPredicate());
+  Instruction *uni = bld.mkOp2(OP_UNION, TYPE_U32, bld.getSSA(), NULL, 
mov->getDef(0));
+
+  def.replace(uni->getDef(0), true);
+  uni->setSrc(0, def.get());
+   }
+}
+
 void
 NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
 {
processSurfaceCoordsNVE4(su);
 
-   if (su->op == OP_SULDP)
+   if (su->op == OP_SULDP) {
   convertSurfaceFormat(su);
+  insertOOBSurfaceOpResult(su);
+   }
 
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
   assert(su->getPredicate());
@@ -2296,8 +2319,10 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
 
processSurfaceCoordsNVC0(su);
 
-   if (su->op == OP_SULDP)
+   if (su->op == OP_SULDP) {
   convertSurfaceFormat(su);
+  insertOOBSurfaceOpResult(su);
+   }
 
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
   const int dim = su->tex.target.getDim();
@@ -2397,8 +2422,10 @@ NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction 
*su)
 {
processSurfaceCoordsGM107(su);
 
-   if (su->op == OP_SULDP)
+   if (su->op == OP_SULDP) {
   convertSurfaceFormat(su);
+  insertOOBSurfaceOpResult(su);
+   }
 
if (su->op == OP_SUREDP) {
   Value *def = su->getDef(0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 91771fbf7e9..d7350d03b78 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -143,6 +143,7 @@ private:
void processSurfaceCoordsNVE4(TexInstruction *);
void processSurfaceCoordsNVC0(TexInstruction *);
void convertSurfaceFormat(TexInstruction *);
+   void insertOOBSurfaceOpResult(TexInstruction *);
Value *calculateSampleOffset(Value *sampleID);
 
 protected:
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 19/19] i965: Support saving the gen program with glGetProgramBinary

2018-06-29 Thread Timothy Arceri
I've sent some minor nits. Otherwise the series looks good, although I 
did only skim most of the actual new i965 blob changes. Series:


Reviewed-by: Timothy Arceri 

You might want to wait for Tapani to take a look also.

On 15/05/18 02:52, Jordan Justen wrote:

Signed-off-by: Jordan Justen 
---
  src/mesa/drivers/dri/i965/brw_program_binary.c | 72 +++---
  1 file changed, 66 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program_binary.c 
b/src/mesa/drivers/dri/i965/brw_program_binary.c
index 1fe3ffd5bf9..db03332241e 100644
--- a/src/mesa/drivers/dri/i965/brw_program_binary.c
+++ b/src/mesa/drivers/dri/i965/brw_program_binary.c
@@ -126,6 +126,16 @@ driver_blob_is_ready(void *blob, uint32_t size, bool 
with_gen_program)
 }
  }
  
+static void

+serialize_nir_part(struct blob *writer, struct gl_program *prog)
+{
+   blob_write_uint32(writer, NIR_PART);
+   intptr_t size_offset = blob_reserve_uint32(writer);
+   size_t nir_start = writer->size;
+   nir_serialize(writer, prog->nir);
+   blob_overwrite_uint32(writer, size_offset, writer->size - nir_start);
+}
+
  void
  brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog)
  {
@@ -138,11 +148,7 @@ brw_program_serialize_nir(struct gl_context *ctx, struct 
gl_program *prog)
  
 struct blob writer;

 blob_init();
-   blob_write_uint32(, NIR_PART);
-   intptr_t size_offset = blob_reserve_uint32();
-   size_t nir_start = writer.size;
-   nir_serialize(, prog->nir);
-   blob_overwrite_uint32(, size_offset, writer.size - nir_start);
+   serialize_nir_part(, prog);
 blob_write_uint32(, END_PART);
 prog->driver_cache_blob = ralloc_size(NULL, writer.size);
 memcpy(prog->driver_cache_blob, writer.data, writer.size);
@@ -237,12 +243,66 @@ brw_deserialize_program_binary(struct gl_context *ctx,
 brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
  }
  
+static void

+serialize_gen_part(struct blob *writer, struct gl_context *ctx,
+   struct gl_shader_program *sh_prog,
+   struct gl_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   union brw_any_prog_key key;
+   brw_populate_default_key(>screen->devinfo, , sh_prog, prog);
+
+   const gl_shader_stage stage = prog->info.stage;
+   uint32_t offset = 0;
+   void *prog_data = NULL;
+   if (brw_search_cache(>cache, brw_stage_cache_id(stage), ,
+brw_prog_key_size(stage), , _data,
+false)) {
+  const void *program_map = brw->cache.map + offset;
+  /* TODO: Improve perf for non-LLC. It would be best to save it at
+   * program generation time when the program is in normal memory
+   * accessible with cache to the CPU. Another easier change would be to
+   * use _mesa_streaming_load_memcpy to read from the program mapped
+   * memory.
+   */
+  blob_write_uint32(writer, GEN_PART);
+  intptr_t size_offset = blob_reserve_uint32(writer);
+  size_t gen_start = writer->size;
+  blob_write_bytes(writer, , brw_prog_key_size(stage));
+  brw_write_blob_program_data(writer, stage, program_map, prog_data);
+  blob_overwrite_uint32(writer, size_offset, writer->size - gen_start);
+   }
+}
+
  void
  brw_serialize_program_binary(struct gl_context *ctx,
   struct gl_shader_program *sh_prog,
   struct gl_program *prog)
  {
-   brw_program_serialize_nir(ctx, prog);
+   if (driver_blob_is_ready(prog->driver_cache_blob,
+prog->driver_cache_blob_size, true))
+  return;
+
+   if (prog->driver_cache_blob) {
+  if (!prog->nir) {
+ /* If we loaded from the disk shader cache, then the nir might not
+  * have been deserialized yet.
+  */
+ brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
+  }
+  ralloc_free(prog->driver_cache_blob);
+   }
+
+   struct blob writer;
+   blob_init();
+   serialize_nir_part(, prog);
+   serialize_gen_part(, ctx, sh_prog, prog);
+   blob_write_uint32(, END_PART);
+   prog->driver_cache_blob = ralloc_size(NULL, writer.size);
+   memcpy(prog->driver_cache_blob, writer.data, writer.size);
+   prog->driver_cache_blob_size = writer.size;
+   blob_finish();
  }
  
  void



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/19] i965: Add brw_populate_default_key

2018-06-29 Thread Timothy Arceri

On 15/05/18 02:52, Jordan Justen wrote:

We will need to populate the default key for ARB_get_program_binary to
allow us to retrieve the default gen program to store save in the


store save - > store ???


program binary.

Signed-off-by: Jordan Justen 
---
  src/mesa/drivers/dri/i965/brw_cs.c  | 15 ++---
  src/mesa/drivers/dri/i965/brw_cs.h  |  4 +++
  src/mesa/drivers/dri/i965/brw_gs.c  | 16 ++---
  src/mesa/drivers/dri/i965/brw_gs.h  |  4 +++
  src/mesa/drivers/dri/i965/brw_program.c | 35 
  src/mesa/drivers/dri/i965/brw_program.h | 15 +
  src/mesa/drivers/dri/i965/brw_tcs.c | 57 +++--
  src/mesa/drivers/dri/i965/brw_tes.c | 40 ++-
  src/mesa/drivers/dri/i965/brw_vs.c  | 26 ++-
  src/mesa/drivers/dri/i965/brw_vs.h  |  4 +++
  src/mesa/drivers/dri/i965/brw_wm.c  | 48 ---
  src/mesa/drivers/dri/i965/brw_wm.h  |  4 +++
  12 files changed, 195 insertions(+), 73 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cs.c 
b/src/mesa/drivers/dri/i965/brw_cs.c
index 9b1b0832b5a..614eb64bca9 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -184,6 +184,16 @@ brw_upload_cs_prog(struct brw_context *brw)
 assert(success);
  }
  
+void

+brw_cs_populate_default_key(const struct gen_device_info *devinfo,
+struct brw_cs_prog_key *key,
+struct gl_program *prog)
+{
+   memset(key, 0, sizeof(*key));
+   key->program_string_id = brw_program(prog)->id;
+
+   brw_setup_tex_for_precompile(devinfo, >tex, prog);
+}
  
  bool

  brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog)
@@ -193,10 +203,7 @@ brw_cs_precompile(struct gl_context *ctx, struct 
gl_program *prog)
  
 struct brw_program *bcp = brw_program(prog);
  
-   memset(, 0, sizeof(key));

-   key.program_string_id = bcp->id;
-
-   brw_setup_tex_for_precompile(>screen->devinfo, , prog);
+   brw_cs_populate_default_key(>screen->devinfo, , prog);
  
 uint32_t old_prog_offset = brw->cs.base.prog_offset;

 struct brw_stage_prog_data *old_prog_data = brw->cs.base.prog_data;
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h 
b/src/mesa/drivers/dri/i965/brw_cs.h
index 60eb19c3594..669d4b544eb 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -34,6 +34,10 @@ brw_upload_cs_prog(struct brw_context *brw);
  
  void

  brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key);
+void
+brw_cs_populate_default_key(const struct gen_device_info *devinfo,
+struct brw_cs_prog_key *key,
+struct gl_program *prog);
  
  #ifdef __cplusplus

  }
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
b/src/mesa/drivers/dri/i965/brw_gs.c
index f488fab009e..9d4dc942d34 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -208,6 +208,17 @@ brw_upload_gs_prog(struct brw_context *brw)
 assert(success);
  }
  
+void

+brw_gs_populate_default_key(const struct gen_device_info *devinfo,
+struct brw_gs_prog_key *key,
+struct gl_program *prog)
+{
+   memset(key, 0, sizeof(*key));
+
+   brw_setup_tex_for_precompile(devinfo, >tex, prog);
+   key->program_string_id = brw_program(prog)->id;
+}
+
  bool
  brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog)
  {
@@ -219,10 +230,7 @@ brw_gs_precompile(struct gl_context *ctx, struct 
gl_program *prog)
  
 struct brw_program *bgp = brw_program(prog);
  
-   memset(, 0, sizeof(key));

-
-   brw_setup_tex_for_precompile(>screen->devinfo, , prog);
-   key.program_string_id = bgp->id;
+   brw_gs_populate_default_key(>screen->devinfo, , prog);
  
 success = brw_codegen_gs_prog(brw, bgp, );
  
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h

index 537a41679df..cff994a9323 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.h
+++ b/src/mesa/drivers/dri/i965/brw_gs.h
@@ -40,6 +40,10 @@ brw_upload_gs_prog(struct brw_context *brw);
  void
  brw_gs_populate_key(struct brw_context *brw,
  struct brw_gs_prog_key *key);
+void
+brw_gs_populate_default_key(const struct gen_device_info *devinfo,
+struct brw_gs_prog_key *key,
+struct gl_program *prog);
  
  #ifdef __cplusplus

  } /* extern "C" */
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index ca934b91c9a..a54d90e92a7 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -48,6 +48,11 @@
  #include "brw_defines.h"
  #include "intel_batchbuffer.h"
  
+#include "brw_cs.h"

+#include "brw_gs.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+
  static bool
  brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
  {
@@ 

Re: [Mesa-dev] [PATCH 09/19] i965: Add brw_stage_cache_id to map gl stages to brw cache_ids

2018-06-29 Thread Timothy Arceri

On 15/05/18 02:52, Jordan Justen wrote:

Signed-off-by: Jordan Justen 
---
  src/mesa/drivers/dri/i965/brw_program_cache.c | 15 +++
  src/mesa/drivers/dri/i965/brw_state.h |  2 ++
  2 files changed, 17 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
b/src/mesa/drivers/dri/i965/brw_program_cache.c
index 78159288af0..40f1078ca98 100644
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -78,6 +78,21 @@ struct brw_cache_item {
 struct brw_cache_item *next;
  };
  
+enum brw_cache_id

+brw_stage_cache_id(gl_shader_stage stage)
+{
+   static const enum brw_cache_id stage_sizes[] = {



stage_sizes -> stage_ids ???


+  BRW_CACHE_VS_PROG,
+  BRW_CACHE_TCS_PROG,
+  BRW_CACHE_TES_PROG,
+  BRW_CACHE_GS_PROG,
+  BRW_CACHE_FS_PROG,
+  BRW_CACHE_CS_PROG,
+   };
+   assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes));
+   return stage_sizes[stage];
+}
+
  static unsigned
  get_program_string_id(enum brw_cache_id cache_id, const void *key)
  {
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 1b4745ef753..445f5e0b510 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -180,6 +180,8 @@ void brw_destroy_caches( struct brw_context *brw );
  
  void brw_print_program_cache(struct brw_context *brw);
  
+enum brw_cache_id brw_stage_cache_id(gl_shader_stage stage);

+
  /* intel_batchbuffer.c */
  void brw_require_statebuffer_space(struct brw_context *brw, int size);
  void *brw_state_batch(struct brw_context *brw,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/19] st/mesa: i965: Use ShaderCacheSerializeDriverBlob driver function

2018-06-29 Thread Timothy Arceri

The subject line contains i965:

On 15/05/18 02:52, Jordan Justen wrote:

Signed-off-by: Jordan Justen 
---
  src/mesa/state_tracker/st_context.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index ee76e07a7d1..0d0cfc5c63c 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -770,10 +770,12 @@ st_init_driver_functions(struct pipe_screen *screen,
screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
 PIPE_SHADER_CAP_PREFERRED_IR);
 if (preferred_ir == PIPE_SHADER_IR_NIR) {
+  functions->ShaderCacheSerializeDriverBlob =  st_serialise_nir_program;
functions->ProgramBinarySerializeDriverBlob = st_serialise_nir_program;
functions->ProgramBinaryDeserializeDriverBlob =
   st_deserialise_nir_program;
 } else {
+  functions->ShaderCacheSerializeDriverBlob =  st_serialise_tgsi_program;
functions->ProgramBinarySerializeDriverBlob = st_serialise_tgsi_program;
functions->ProgramBinaryDeserializeDriverBlob =
   st_deserialise_tgsi_program;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 6/9] nir: Add a large constants optimization pass

2018-06-29 Thread Jason Ekstrand
This pass searches for reasonably large local variables which can be
statically proven to be constant and moves them into shader constant
data.  This is especially useful when large tables are baked into the
shader source code because they can be moved into a UBO by the driver to
reduce register pressure and make indirect access cheaper.

v2 (Jason Ekstrand):
 - Use a size/align function to ensure we get the right alignments
 - Use the newly added deref offset helpers
---
 src/compiler/Makefile.sources  |   1 +
 src/compiler/nir/meson.build   |   1 +
 src/compiler/nir/nir.h |   4 +
 src/compiler/nir/nir_opt_large_constants.c | 301 +
 4 files changed, 307 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_large_constants.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 0fcbc5c5c5b..9e3fbdc2612 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -276,6 +276,7 @@ NIR_FILES = \
nir/nir_opt_if.c \
nir/nir_opt_intrinsics.c \
nir/nir_opt_loop_unroll.c \
+   nir/nir_opt_large_constants.c \
nir/nir_opt_move_comparisons.c \
nir/nir_opt_move_load_ubo.c \
nir/nir_opt_peephole_select.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index eb7fb7b121e..28aa8de7014 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -160,6 +160,7 @@ files_libnir = files(
   'nir_opt_global_to_local.c',
   'nir_opt_if.c',
   'nir_opt_intrinsics.c',
+  'nir_opt_large_constants.c',
   'nir_opt_loop_unroll.c',
   'nir_opt_move_comparisons.c',
   'nir_opt_move_load_ubo.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index cc5f88d6f54..92ab3a699cc 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2905,6 +2905,10 @@ bool nir_opt_if(nir_shader *shader);
 
 bool nir_opt_intrinsics(nir_shader *shader);
 
+bool nir_opt_large_constants(nir_shader *shader,
+ glsl_type_size_align_func size_align,
+ unsigned threshold);
+
 bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
 
 bool nir_opt_move_comparisons(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_large_constants.c 
b/src/compiler/nir/nir_opt_large_constants.c
new file mode 100644
index 000..027c6e8e5b5
--- /dev/null
+++ b/src/compiler/nir/nir_opt_large_constants.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+
+struct var_info {
+   bool is_constant;
+   bool found_read;
+};
+
+static nir_ssa_def *
+build_constant_load(nir_builder *b, nir_deref_instr *deref,
+glsl_type_size_align_func size_align)
+{
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   const unsigned bit_size = glsl_get_bit_size(deref->type);
+   const unsigned num_components = glsl_get_vector_elements(deref->type);
+
+   UNUSED unsigned var_size, var_align;
+   size_align(var->type, _size, _align);
+   assert(var->data.location % var_align == 0);
+
+   nir_intrinsic_instr *load =
+  nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_constant);
+   load->num_components = num_components;
+   nir_intrinsic_set_base(load, var->data.location);
+   nir_intrinsic_set_range(load, var_size);
+   load->src[0] = nir_src_for_ssa(nir_build_deref_offset(b, deref, 
size_align));
+   nir_ssa_dest_init(>instr, >dest,
+ num_components, bit_size, NULL);
+   nir_builder_instr_insert(b, >instr);
+
+   return >dest.ssa;
+}
+
+static void
+handle_constant_store(nir_builder *b, nir_intrinsic_instr *store,
+  glsl_type_size_align_func size_align)
+{
+   nir_deref_instr *deref = 

[Mesa-dev] [PATCH v2 9/9] anv, intel: Enable nir_opt_large_constants for Vulkan

2018-06-29 Thread Jason Ekstrand
According to RenderDoc, this shaves 99.6% of the run time off of the
ambient occlusion pass in Skyrim Special Edition when running under DXVK
and shaves 92% off the runtime for a reasonably representative frame.
When running the actual game, Skyrim goes from being a slide-show to a
very stable and playable framerate on my SKL GT4e machine.
---
 src/intel/compiler/brw_compiler.h | 6 ++
 src/intel/compiler/brw_nir.c  | 7 +++
 src/intel/vulkan/anv_device.c | 1 +
 3 files changed, 14 insertions(+)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index 2f745d92745..9dfcfcc0115 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -112,6 +112,12 @@ struct brw_compiler {
 * will attempt to push everything.
 */
bool supports_pull_constants;
+
+   /**
+* Whether or not the driver supports NIR shader constants.  This controls
+* whether nir_opt_large_constants will be run.
+*/
+   bool supports_shader_constants;
 };
 
 /**
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index dfeea73b06a..f4aee3d41ee 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -664,6 +664,13 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
 
nir = brw_nir_optimize(nir, compiler, is_scalar);
 
+   /* This needs to be run after the first optimization pass but before we
+* lower indirect derefs away
+*/
+   if (compiler->supports_shader_constants) {
+  OPT(nir_opt_large_constants, NULL, 32);
+   }
+
nir_lower_bit_size(nir, lower_bit_size_callback, NULL);
 
if (is_scalar) {
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index b3d30675b1e..077f5c16e46 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -434,6 +434,7 @@ anv_physical_device_init(struct anv_physical_device *device,
device->compiler->supports_pull_constants = false;
device->compiler->constant_buffer_0_is_relative =
   device->info.gen < 8 || !device->has_context_isolation;
+   device->compiler->supports_shader_constants = true;
 
isl_device_init(>isl_dev, >info, swizzled);
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/9] nir: Add a concept of constant data associated with a shader

2018-06-29 Thread Jason Ekstrand
This commit adds a concept to NIR of having a blob of constant data
associated with a shader.  Instead of being a UBO or uniform that can be
manipulated by the client, this constant data considered part of the
shader and remains constant across all invocations of the given shader
until the end of time.  To access this constant data from the shader, we
add a new load_constant intrinsic.  The intention is that drivers will
eventually lower load_constant intrinsics to load_ubo, load_uniform, or
something similar.  Constant data will be used by the optimization pass
in the next commit but this concept may also be useful for OpenCL.

v2 (Jason Ekstrand):
 - Rename num_constants to constant_data_size (anholt)
---
 src/compiler/nir/nir.h |  8 
 src/compiler/nir/nir_clone.c   |  6 ++
 src/compiler/nir/nir_intrinsics.py |  2 ++
 src/compiler/nir/nir_serialize.c   | 12 
 src/compiler/nir/nir_sweep.c   |  2 ++
 5 files changed, 30 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index e35bef612df..cc5f88d6f54 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2067,6 +2067,14 @@ typedef struct nir_shader {
 * access plus one
 */
unsigned num_inputs, num_uniforms, num_outputs, num_shared;
+
+   /** Constant data associated with this shader.
+*
+* Constant data is loaded through load_constant intrinsics.  See also
+* nir_opt_large_constants.
+*/
+   void *constant_data;
+   unsigned constant_data_size;
 } nir_shader;
 
 static inline nir_function_impl *
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 23bb17eeba3..989c5051a54 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -734,6 +734,12 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s)
ns->num_outputs = s->num_outputs;
ns->num_shared = s->num_shared;
 
+   ns->constant_data_size = s->constant_data_size;
+   if (s->constant_data_size > 0) {
+  ns->constant_data = ralloc_size(ns, s->constant_data_size);
+  memcpy(ns->constant_data, s->constant_data, s->constant_data_size);
+   }
+
free_clone_state();
 
return ns;
diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index d9d0bbdfccf..44a5b76beb6 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -532,6 +532,8 @@ load("per_vertex_output", 2, [BASE, COMPONENT], 
[CAN_ELIMINATE])
 load("shared", 1, [BASE], [CAN_ELIMINATE])
 # src[] = { offset }. const_index[] = { base, range }
 load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
+# src[] = { offset }. const_index[] = { base, range }
+load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
 
 # Stores work the same way as loads, except now the first source is the value
 # to store and the second (and possibly third) source specify where to store
diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c
index cc4bf23aa0f..6a30738c2d7 100644
--- a/src/compiler/nir/nir_serialize.c
+++ b/src/compiler/nir/nir_serialize.c
@@ -1116,6 +1116,10 @@ nir_serialize(struct blob *blob, const nir_shader *nir)
   write_function_impl(, fxn->impl);
}
 
+   blob_write_uint32(blob, nir->constant_data_size);
+   if (nir->constant_data_size > 0)
+  blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
+
*(uintptr_t *)(blob->data + idx_size_offset) = ctx.next_idx;
 
_mesa_hash_table_destroy(ctx.remap_table, NULL);
@@ -1169,6 +1173,14 @@ nir_deserialize(void *mem_ctx,
nir_foreach_function(fxn, ctx.nir)
   fxn->impl = read_function_impl(, fxn);
 
+   ctx.nir->constant_data_size = blob_read_uint32(blob);
+   if (ctx.nir->constant_data_size > 0) {
+  ctx.nir->constant_data =
+ ralloc_size(ctx.nir, ctx.nir->constant_data_size);
+  blob_copy_bytes(blob, ctx.nir->constant_data,
+  ctx.nir->constant_data_size);
+   }
+
free(ctx.idx_table);
 
return ctx.nir;
diff --git a/src/compiler/nir/nir_sweep.c b/src/compiler/nir/nir_sweep.c
index b14bf139c1b..aab641388db 100644
--- a/src/compiler/nir/nir_sweep.c
+++ b/src/compiler/nir/nir_sweep.c
@@ -167,6 +167,8 @@ nir_sweep(nir_shader *nir)
   sweep_function(nir, func);
}
 
+   ralloc_steal(nir, nir->constant_data);
+
/* Free everything we didn't steal back. */
ralloc_free(rubbish);
 }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/9] nir: Add a deref_instr_has_indirect helper

2018-06-29 Thread Jason Ekstrand
---
 src/compiler/nir/nir.h   |  2 ++
 src/compiler/nir/nir_deref.c | 18 ++
 2 files changed, 20 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index c16ce547642..e35bef612df 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -987,6 +987,8 @@ nir_deref_instr_get_variable(const nir_deref_instr *instr)
return instr->var;
 }
 
+bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
+
 bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
 
 typedef struct {
diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
index 1a00157c2fc..22ecde4ecca 100644
--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -102,6 +102,24 @@ nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
return progress;
 }
 
+bool
+nir_deref_instr_has_indirect(nir_deref_instr *instr)
+{
+   while (instr->deref_type != nir_deref_type_var) {
+  /* Consider casts to be indirects */
+  if (instr->deref_type == nir_deref_type_cast)
+ return true;
+
+  if (instr->deref_type == nir_deref_type_array &&
+  !nir_src_as_const_value(instr->arr.index))
+ return true;
+
+  instr = nir_deref_instr_parent(instr);
+   }
+
+   return false;
+}
+
 bool
 nir_remove_dead_derefs_impl(nir_function_impl *impl)
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 8/9] anv: Add state setup support for shader constants

2018-06-29 Thread Jason Ekstrand
---
 .../vulkan/anv_nir_apply_pipeline_layout.c| 47 
 src/intel/vulkan/anv_private.h|  1 +
 src/intel/vulkan/genX_cmd_buffer.c| 72 ++-
 3 files changed, 101 insertions(+), 19 deletions(-)

diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 
b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 37a54b2efff..c287a005bd6 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -32,6 +32,8 @@ struct apply_pipeline_layout_state {
struct anv_pipeline_layout *layout;
bool add_bounds_checks;
 
+   bool uses_constants;
+   uint8_t constants_offset;
struct {
   BITSET_WORD *used;
   uint8_t *surface_offsets;
@@ -100,6 +102,10 @@ get_used_bindings_block(nir_block *block,
 add_deref_src_binding(state, intrin->src[0]);
 break;
 
+ case nir_intrinsic_load_constant:
+state->uses_constants = true;
+break;
+
  default:
 break;
  }
@@ -172,6 +178,33 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
nir_instr_remove(>instr);
 }
 
+static void
+lower_load_constant(nir_intrinsic_instr *intrin,
+struct apply_pipeline_layout_state *state)
+{
+   nir_builder *b = >builder;
+
+   b->cursor = nir_before_instr(>instr);
+
+   nir_ssa_def *index = nir_imm_int(b, state->constants_offset);
+   nir_ssa_def *offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[0], 1),
+  nir_imm_int(b, nir_intrinsic_base(intrin)));
+
+   nir_intrinsic_instr *load_ubo =
+  nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
+   load_ubo->num_components = intrin->num_components;
+   load_ubo->src[0] = nir_src_for_ssa(index);
+   load_ubo->src[1] = nir_src_for_ssa(offset);
+   nir_ssa_dest_init(_ubo->instr, _ubo->dest,
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size, NULL);
+   nir_builder_instr_insert(b, _ubo->instr);
+
+   nir_ssa_def_rewrite_uses(>dest.ssa,
+nir_src_for_ssa(_ubo->dest.ssa));
+   nir_instr_remove(>instr);
+}
+
 static void
 lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
 unsigned *base_index,
@@ -285,6 +318,9 @@ apply_pipeline_layout_block(nir_block *block,
  case nir_intrinsic_vulkan_resource_reindex:
 lower_res_reindex_intrinsic(intrin, state);
 break;
+ case nir_intrinsic_load_constant:
+lower_load_constant(intrin, state);
+break;
  default:
 break;
  }
@@ -343,6 +379,9 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
  get_used_bindings_block(block, );
}
 
+   if (state.uses_constants)
+  map->surface_count++;
+
for (uint32_t set = 0; set < layout->num_sets; set++) {
   struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
 
@@ -365,6 +404,14 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline 
*pipeline,
unsigned surface = 0;
unsigned sampler = 0;
unsigned image = 0;
+
+   if (state.uses_constants) {
+  state.constants_offset = surface;
+  map->surface_to_descriptor[surface].set =
+ ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;
+  surface++;
+   }
+
for (uint32_t set = 0; set < layout->num_sets; set++) {
   struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index f82b88df7a0..139c48b7e46 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1570,6 +1570,7 @@ anv_descriptor_set_destroy(struct anv_device *device,
struct anv_descriptor_pool *pool,
struct anv_descriptor_set *set);
 
+#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
 
 struct anv_pipeline_binding {
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 97b321ccaeb..34fbd83d148 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2026,6 +2026,26 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
  bt_map[bias + s] = surface_state.offset + state_offset;
  continue;
+  } else if (binding->set == ANV_DESCRIPTOR_SET_SHADER_CONSTANTS) {
+ struct anv_state surface_state =
+anv_cmd_buffer_alloc_surface_state(cmd_buffer);
+
+ struct anv_address constant_data = {
+.bo = >device->dynamic_state_pool.block_pool.bo,
+.offset = pipeline->shaders[stage]->constant_data.offset,
+ };
+ unsigned constant_data_size =
+pipeline->shaders[stage]->constant_data_size;
+
+ const enum isl_format format =
+

[Mesa-dev] [PATCH v2 4/9] nir/deref: Add helpers for getting offsets

2018-06-29 Thread Jason Ekstrand
These are very similar to the related function in nir_lower_io except
that they don't handle per-vertex or packed things (that could be added,
in theory) and they take a more detailed size/align function pointer.
One day, we should consider switching nir_lower_io over to using the
more detailed size/align functions and then we could make it use these
helpers instead of having its own.
---
 src/compiler/nir/nir_deref.c | 91 
 src/compiler/nir/nir_deref.h |  6 +++
 2 files changed, 97 insertions(+)

diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
index 22ecde4ecca..94f91df5a0f 100644
--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -120,6 +120,97 @@ nir_deref_instr_has_indirect(nir_deref_instr *instr)
return false;
 }
 
+static unsigned
+type_get_array_stride(const struct glsl_type *elem_type,
+  glsl_type_size_align_func size_align)
+{
+   unsigned elem_size, elem_align;
+   glsl_get_natural_size_align_bytes(elem_type, _size, _align);
+   return ALIGN_POT(elem_size, elem_align);
+}
+
+static unsigned
+struct_type_get_field_offset(const struct glsl_type *struct_type,
+ glsl_type_size_align_func size_align,
+ unsigned field_idx)
+{
+   assert(glsl_type_is_struct(struct_type));
+   unsigned offset = 0;
+   for (unsigned i = 0; i <= field_idx; i++) {
+  unsigned elem_size, elem_align;
+  glsl_get_natural_size_align_bytes(glsl_get_struct_field(struct_type, i),
+_size, _align);
+  offset = ALIGN_POT(offset, elem_align);
+  if (i < field_idx)
+ offset += elem_size;
+   }
+   return offset;
+}
+
+unsigned
+nir_deref_instr_get_const_offset(nir_deref_instr *deref,
+ glsl_type_size_align_func size_align)
+{
+   nir_deref_path path;
+   nir_deref_path_init(, deref, NULL);
+
+   assert(path.path[0]->deref_type == nir_deref_type_var);
+   nir_deref_instr **p = [1];
+
+   unsigned offset = 0;
+   for (; *p; p++) {
+  if ((*p)->deref_type == nir_deref_type_array) {
+ offset += nir_src_as_const_value((*p)->arr.index)->u32[0] *
+   type_get_array_stride((*p)->type, size_align);
+  } else if ((*p)->deref_type == nir_deref_type_struct) {
+ /* p starts at path[1], so this is safe */
+ nir_deref_instr *parent = *(p - 1);
+ offset += struct_type_get_field_offset(parent->type, size_align,
+(*p)->strct.index);
+  } else {
+ unreachable("Unsupported deref type");
+  }
+   }
+
+   nir_deref_path_finish();
+
+   return offset;
+}
+
+nir_ssa_def *
+nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
+   glsl_type_size_align_func size_align)
+{
+   nir_deref_path path;
+   nir_deref_path_init(, deref, NULL);
+
+   assert(path.path[0]->deref_type == nir_deref_type_var);
+   nir_deref_instr **p = [1];
+
+   nir_ssa_def *offset = nir_imm_int(b, 0);
+   for (; *p; p++) {
+  if ((*p)->deref_type == nir_deref_type_array) {
+ nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
+ nir_ssa_def *stride =
+nir_imm_int(b, type_get_array_stride((*p)->type, size_align));
+ offset = nir_iadd(b, offset, nir_imul(b, index, stride));
+  } else if ((*p)->deref_type == nir_deref_type_struct) {
+ /* p starts at path[1], so this is safe */
+ nir_deref_instr *parent = *(p - 1);
+ unsigned field_offset =
+struct_type_get_field_offset(parent->type, size_align,
+ (*p)->strct.index);
+ nir_iadd(b, offset, nir_imm_int(b, field_offset));
+  } else {
+ unreachable("Unsupported deref type");
+  }
+   }
+
+   nir_deref_path_finish();
+
+   return offset;
+}
+
 bool
 nir_remove_dead_derefs_impl(nir_function_impl *impl)
 {
diff --git a/src/compiler/nir/nir_deref.h b/src/compiler/nir/nir_deref.h
index 0980bae7215..6f4141aaf82 100644
--- a/src/compiler/nir/nir_deref.h
+++ b/src/compiler/nir/nir_deref.h
@@ -48,6 +48,12 @@ void nir_deref_path_init(nir_deref_path *path,
  nir_deref_instr *deref, void *mem_ctx);
 void nir_deref_path_finish(nir_deref_path *path);
 
+unsigned nir_deref_instr_get_const_offset(nir_deref_instr *deref,
+  glsl_type_size_align_func 
size_align);
+
+nir_ssa_def *nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
+glsl_type_size_align_func size_align);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 7/9] anv: Add support for shader constant data to the pipeline cache

2018-06-29 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_blorp.c  |  1 +
 src/intel/vulkan/anv_pipeline.c   | 16 
 src/intel/vulkan/anv_pipeline_cache.c | 27 +++
 src/intel/vulkan/anv_private.h|  6 ++
 4 files changed, 50 insertions(+)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 5373faaa680..4dbfb7a83fd 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -70,6 +70,7 @@ upload_blorp_shader(struct blorp_context *blorp,
struct anv_shader_bin *bin =
   anv_pipeline_cache_upload_kernel(>blorp_shader_cache,
key, key_size, kernel, kernel_size,
+   NULL, 0,
prog_data, prog_data_size, _map);
 
if (!bin)
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 67ede46f2ae..dc3b4a0e3ba 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -485,6 +485,8 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
struct anv_pipeline_cache *cache,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
+   const void *constant_data,
+   uint32_t constant_data_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
const struct anv_pipeline_bind_map *bind_map)
@@ -492,11 +494,13 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
if (cache) {
   return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
   kernel_data, kernel_size,
+  constant_data, 
constant_data_size,
   prog_data, prog_data_size,
   bind_map);
} else {
   return anv_shader_bin_create(pipeline->device, key_data, key_size,
kernel_data, kernel_size,
+   constant_data, constant_data_size,
prog_data, prog_data_size,
prog_data->param, bind_map);
}
@@ -575,6 +579,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
   unsigned code_size = prog_data.base.base.program_size;
   bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
shader_code, code_size,
+   nir->constant_data,
+   nir->constant_data_size,
_data.base.base, sizeof(prog_data),
);
   if (!bin) {
@@ -742,6 +748,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
   tcs_bin = anv_pipeline_upload_kernel(pipeline, cache,
tcs_sha1, sizeof(tcs_sha1),
shader_code, code_size,
+   tcs_nir->constant_data,
+   tcs_nir->constant_data_size,
_prog_data.base.base,
sizeof(tcs_prog_data),
_map);
@@ -763,6 +771,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
   tes_bin = anv_pipeline_upload_kernel(pipeline, cache,
tes_sha1, sizeof(tes_sha1),
shader_code, code_size,
+   tes_nir->constant_data,
+   tes_nir->constant_data_size,
_prog_data.base.base,
sizeof(tes_prog_data),
_map);
@@ -845,6 +855,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
   const unsigned code_size = prog_data.base.base.program_size;
   bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
shader_code, code_size,
+   nir->constant_data,
+   nir->constant_data_size,
_data.base.base, sizeof(prog_data),
);
   if (!bin) {
@@ -995,6 +1007,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
   unsigned code_size = prog_data.base.program_size;
   bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
shader_code, code_size,
+ 

[Mesa-dev] [PATCH v2 3/9] nir/types: Add a natural size and alignment helper

2018-06-29 Thread Jason Ekstrand
The size and alignment are "natural" in the sense that everything is
aligned to a scalar.  This is a bit tighter than std430 where vec3s are
required to be aligned to a vec4.
---
 src/compiler/nir_types.cpp | 56 ++
 src/compiler/nir_types.h   |  6 
 2 files changed, 62 insertions(+)

diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
index d2b2a93b207..2b932b1967e 100644
--- a/src/compiler/nir_types.cpp
+++ b/src/compiler/nir_types.cpp
@@ -477,3 +477,59 @@ glsl_channel_type(const glsl_type *t)
   unreachable("Unhandled base type glsl_channel_type()");
}
 }
+
+void
+glsl_get_natural_size_align_bytes(const struct glsl_type *type,
+  unsigned *size, unsigned *align)
+{
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT8:
+   case GLSL_TYPE_INT8:
+   case GLSL_TYPE_UINT16:
+   case GLSL_TYPE_INT16:
+   case GLSL_TYPE_FLOAT16:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_UINT64:
+   case GLSL_TYPE_INT64: {
+  unsigned N = glsl_get_bit_size(type) / 8;
+  *size = N * type->components();
+  *align = N;
+  break;
+   }
+
+   case GLSL_TYPE_ARRAY: {
+  unsigned elem_size, elem_align;
+  glsl_get_natural_size_align_bytes(type->fields.array,
+_size, _align);
+  *align = elem_align;
+  *size = type->length * ALIGN_POT(elem_size, elem_align);
+  break;
+   }
+
+   case GLSL_TYPE_STRUCT:
+  *size = 0;
+  *align = 0;
+  for (unsigned i = 0; i < type->length; i++) {
+ unsigned elem_size, elem_align;
+ glsl_get_natural_size_align_bytes(type->fields.structure[i].type,
+   _size, _align);
+ *align = MAX2(*align, elem_align);
+ *size = ALIGN_POT(*size, elem_align) + elem_size;
+  }
+  break;
+
+   case GLSL_TYPE_SAMPLER:
+   case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_SUBROUTINE:
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_INTERFACE:
+   case GLSL_TYPE_FUNCTION:
+  unreachable("type does not have a natural size");
+   }
+}
diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
index 1107cfd73f2..67c4d7b5097 100644
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -184,6 +184,12 @@ const struct glsl_type *glsl_transposed_type(const struct 
glsl_type *type);
 
 const struct glsl_type *glsl_channel_type(const struct glsl_type *type);
 
+typedef void (*glsl_type_size_align_func)(const struct glsl_type *type,
+  unsigned *size, unsigned *align);
+
+void glsl_get_natural_size_align_bytes(const struct glsl_type *type,
+   unsigned *size, unsigned *align);
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/9] util/macros: Import ALIGN_POT from ralloc.c

2018-06-29 Thread Jason Ekstrand
---
 src/util/macros.h | 3 +++
 src/util/ralloc.c | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/util/macros.h b/src/util/macros.h
index 6d3df904082..95b86c7a31a 100644
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -285,6 +285,9 @@ do {   \
 #define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C))
 #define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C))
 
+/** Align a value to a power of two */
+#define ALIGN_POT(x, y) (((x) + (y) - 1) & ~((y) - 1))
+
 /**
  * Macro for declaring an explicit conversion operator.  Defaults to an
  * implicit conversion if C++11 is not supported.
diff --git a/src/util/ralloc.c b/src/util/ralloc.c
index 42cfa2e391d..5d77f75ee85 100644
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -553,8 +553,6 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t *start, 
const char *fmt,
  * other buffers.
  */
 
-#define ALIGN_POT(x, y) (((x) + (y) - 1) & ~((y) - 1))
-
 #define MIN_LINEAR_BUFSIZE 2048
 #define SUBALLOC_ALIGNMENT sizeof(uintptr_t)
 #define LMAGIC 0x87b9c7d3
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/9] anv,nir: Move large constants to a UBO

2018-06-29 Thread Jason Ekstrand
This little series adds an optimization pass to NIR and wires up up in anv
that moves large constant variables to a UBO.  This fixes a farily common
case in some filter or ambient occlusion shaders where they put some sort
of look-up table in the shader itself.  This series takes Skyrim Special
Edition running under DXVK from a slide show to a smooth and very playable
framerate on my SKL desktop.

The first part of the series adds a concept of constant data that can be
associated with a NIR shader and adds an optimization pass to move large
constant variables into this constant data section.  It's left up to the
driver to figure out how to get this constant data into the shader.  The
last three patches wire things up in ANV to put this data into an implicit
UBO and enables the optimization.

v2 (Jason Ekstrand):
 - Take anholt's feedback and make it more clear that the units on the
   number of constants is in bytes by calling it constant_data_size.
 - Break some of the deref to offset code out into helpers
 - Add new size/align helpers for types to ensure that we get alignments
   right when setting up constants.  This hasn't usually been a problem in
   the past because we align most things to a dword and 64-bit values
   aren't common.  We should start being more careful.

Jason Ekstrand (9):
  util/macros: Import ALIGN_POT from ralloc.c
  nir: Add a deref_instr_has_indirect helper
  nir/types: Add a natural size and alignment helper
  nir/deref: Add helpers for getting offsets
  nir: Add a concept of constant data associated with a shader
  nir: Add a large constants optimization pass
  anv: Add support for shader constant data to the pipeline cache
  anv: Add state setup support for shader constants
  anv,intel: Enable nir_opt_large_constants for Vulkan

 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/meson.build  |   1 +
 src/compiler/nir/nir.h|  14 +
 src/compiler/nir/nir_clone.c  |   6 +
 src/compiler/nir/nir_deref.c  | 109 +++
 src/compiler/nir/nir_deref.h  |   6 +
 src/compiler/nir/nir_intrinsics.py|   2 +
 src/compiler/nir/nir_opt_large_constants.c| 301 ++
 src/compiler/nir/nir_serialize.c  |  12 +
 src/compiler/nir/nir_sweep.c  |   2 +
 src/compiler/nir_types.cpp|  56 
 src/compiler/nir_types.h  |   6 +
 src/intel/compiler/brw_compiler.h |   6 +
 src/intel/compiler/brw_nir.c  |   7 +
 src/intel/vulkan/anv_blorp.c  |   1 +
 src/intel/vulkan/anv_device.c |   1 +
 .../vulkan/anv_nir_apply_pipeline_layout.c|  47 +++
 src/intel/vulkan/anv_pipeline.c   |  16 +
 src/intel/vulkan/anv_pipeline_cache.c |  27 ++
 src/intel/vulkan/anv_private.h|   7 +
 src/intel/vulkan/genX_cmd_buffer.c|  72 +++--
 src/util/macros.h |   3 +
 src/util/ralloc.c |   2 -
 23 files changed, 684 insertions(+), 21 deletions(-)
 create mode 100644 src/compiler/nir/nir_opt_large_constants.c

-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 26/33] nvir/nir: implement variable indexing

2018-06-29 Thread Karol Herbst
We store those arrays in local memory and reserve some space for each of the
arrays. With NIR we could store those arrays packed, but we don't do that yet
as it causes MemoryOpt to generate unaligned memory accesses.

v3: use fixed size vec4 arrays until we fix MemoryOpt
v4: fix for 64 bit types
v5: use loadFrom helper
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 58 +++
 1 file changed, 58 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index a8e126a5730..d3188aa9872 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -78,6 +78,7 @@ public:
 private:
typedef std::vector LValues;
typedef std::tr1::unordered_map NirDefMap;
+   typedef std::tr1::unordered_map NirArrayLMemOffsets;
typedef std::tr1::unordered_map NirBlockMap;
 
TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
@@ -155,6 +156,7 @@ private:
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirArrayLMemOffsets regToLmemOffset;
NirBlockMap blocks;
unsigned int curLoopDepth;
 
@@ -1267,6 +1269,7 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile 
file, operation op,
 bool
 Converter::parseNIR()
 {
+   info->bin.tlsSpace = 0;
info->io.clipDistances = nir->info.clip_distance_array_size;
info->io.cullDistances = nir->info.cull_distance_array_size;
 
@@ -1358,6 +1361,16 @@ Converter::visit(nir_function *function)
   break;
}
 
+   nir_foreach_register(reg, >impl->registers) {
+  if (reg->num_array_elems) {
+ /* TODO: packed variables would be nice, but MemoryOpt fails */
+ /* replace 4 with reg->num_components */
+ uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
+ regToLmemOffset[reg->index] = info->bin.tlsSpace;
+ info->bin.tlsSpace += size;
+  }
+   }
+
nir_index_ssa_defs(function->impl);
foreach_list_typed(nir_cf_node, node, node, >impl->body) {
   if (!visit(node))
@@ -2088,6 +2101,51 @@ Converter::visit(nir_alu_instr *insn)
 *   2. they basically just merge multiple values into one data type
 */
CASE_OPFI(mov):
+  if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) 
{
+ nir_reg_dest& reg = insn->dest.dest.reg;
+ uint32_t goffset = regToLmemOffset[reg.reg->index];
+ uint8_t comps = reg.reg->num_components;
+ uint8_t size = reg.reg->bit_size / 8;
+ uint8_t csize = 4 * size; /* TODO after fixing MemoryOpts: comps * 
size; */
+ uint32_t aoffset = csize * reg.base_offset;
+ Value *indirect = NULL;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
+  getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (uint8_t i = 0u; i < comps; ++i) {
+if (!((1u << i) & insn->dest.write_mask))
+   continue;
+
+Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + 
aoffset + i * size);
+mkStore(OP_STORE, dType, sym, indirect, getSrc(>src[0], i));
+ }
+ break;
+  } else if (!insn->src[0].src.is_ssa && 
insn->src[0].src.reg.reg->num_array_elems) {
+ LValues  = convert(>dest);
+ nir_reg_src& reg = insn->src[0].src.reg;
+ uint32_t goffset = regToLmemOffset[reg.reg->index];
+ /* uint8_t comps = reg.reg->num_components; */
+ uint8_t size = reg.reg->bit_size / 8;
+ uint8_t csize = 4 * size; /* TODO after fixing MemoryOpts: comps * 
size; */
+ uint32_t aoffset = csize * reg.base_offset;
+ Value *indirect = NULL;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), 
getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (uint8_t i = 0u; i < newDefs.size(); ++i)
+loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + 
aoffset, i, indirect);
+
+ break;
+  } else {
+ LValues  = convert(>dest);
+ for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
+mkMov(newDefs[c], getSrc(>src[0], c), dType);
+ }
+  }
+  break;
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4: {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 30/33] nvir/nir: implement images

2018-06-29 Thread Karol Herbst
v3: fix compiler warnings
v4: use loadFrom helper
v5: fix signed min/max
v6: set tex mask
add support for indirect image access
set cache mode
v7: make compatible with 884d27bcf688d36c3bbe01bceca525595add3b33
rework the whole deref thing to prepare for bindless
v8: port to deref instructions
don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 390 +-
 1 file changed, 370 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index f3f15d1d2ff..e354c32459b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -31,6 +31,7 @@
 #include "codegen/nv50_ir_lowering_helper.h"
 #include "codegen/nv50_ir_util.h"
 
+#include 
 #include 
 #include 
 
@@ -89,6 +90,8 @@ private:
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
 
+   ImgFormat convertGLImgFormat(GLuint);
+
Value* getSrc(nir_alu_src *, uint8_t component = 0);
Value* getSrc(nir_register *, uint8_t);
Value* getSrc(nir_src *, uint8_t, bool indirect = false);
@@ -118,6 +121,7 @@ private:
 
DataType getDType(nir_alu_instr*);
DataType getDType(nir_intrinsic_instr*);
+   DataType getDType(nir_intrinsic_instr*, bool isSigned);
DataType getDType(nir_op, uint8_t);
 
std::vector getSTypes(nir_alu_instr*);
@@ -139,6 +143,7 @@ private:
bool visit(nir_alu_instr *);
bool visit(nir_block *);
bool visit(nir_cf_node *);
+   bool visit(nir_deref_instr *);
bool visit(nir_function *);
bool visit(nir_if *);
bool visit(nir_instr *);
@@ -151,6 +156,11 @@ private:
 
/* tex stuff */
Value* applyProjection(Value *src, Value *proj);
+   unsigned int getNIRArgCount(TexInstruction::Target&);
+
+   /* image stuff */
+   uint16_t handleDeref(nir_deref_instr *, Value * & indirect, const 
nir_variable * &);
+   CacheMode getCacheModeFromVar(const nir_variable *);
 
nir_shader *nir;
 
@@ -243,11 +253,30 @@ Converter::getDType(nir_alu_instr *insn)
 
 DataType
 Converter::getDType(nir_intrinsic_instr *insn)
+{
+   bool isSigned;
+   switch (insn->intrinsic) {
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+  isSigned = true;
+  break;
+   default:
+  isSigned = false;
+  break;
+   }
+
+   return getDType(insn, isSigned);
+}
+
+DataType
+Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 {
if (insn->dest.is_ssa)
-  return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
+  return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
else
-  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
+  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 }
 
 DataType
@@ -415,6 +444,22 @@ Converter::getOperation(nir_intrinsic_op op)
   return OP_EMIT;
case nir_intrinsic_end_primitive:
   return OP_RESTART;
+   case nir_intrinsic_image_deref_atomic_add:
+   case nir_intrinsic_image_deref_atomic_and:
+   case nir_intrinsic_image_deref_atomic_comp_swap:
+   case nir_intrinsic_image_deref_atomic_exchange:
+   case nir_intrinsic_image_deref_atomic_max:
+   case nir_intrinsic_image_deref_atomic_min:
+   case nir_intrinsic_image_deref_atomic_or:
+   case nir_intrinsic_image_deref_atomic_xor:
+  return OP_SUREDP;
+   case nir_intrinsic_image_deref_load:
+  return OP_SULDP;
+   case nir_intrinsic_image_deref_samples:
+   case nir_intrinsic_image_deref_size:
+  return OP_SUQ;
+   case nir_intrinsic_image_deref_store:
+  return OP_SUSTP;
default:
   ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
   assert(false);
@@ -444,28 +489,31 @@ Converter::getSubOp(nir_op op)
}
 }
 
+#define CASE_OP_INTR_ATOM(nir, nvir) \
+   case nir_intrinsic_image_deref_atomic_ ## nir : \
+   case nir_intrinsic_shared_atomic_ ## nir : \
+   case nir_intrinsic_ssbo_atomic_ ## nir : \
+  return NV50_IR_SUBOP_ATOM_ ## nvir
+#define CASE_OP_INTR_ATOM_S(nir, nvir) \
+   case nir_intrinsic_shared_atomic_ ## nir : \
+   case nir_intrinsic_ssbo_atomic_ ## nir : \
+  return NV50_IR_SUBOP_ATOM_ ## nvir
 int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
-   case nir_intrinsic_ssbo_atomic_add:
-  return NV50_IR_SUBOP_ATOM_ADD;
-   case nir_intrinsic_ssbo_atomic_and:
-  return NV50_IR_SUBOP_ATOM_AND;
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-  return NV50_IR_SUBOP_ATOM_CAS;
-   case nir_intrinsic_ssbo_atomic_exchange:
-  return NV50_IR_SUBOP_ATOM_EXCH;
-   case nir_intrinsic_ssbo_atomic_or:
-  return NV50_IR_SUBOP_ATOM_OR;
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_ssbo_atomic_umax:
-  return NV50_IR_SUBOP_ATOM_MAX;
-   case 

[Mesa-dev] [PATCH v8 33/33] nvir/nir: implement intrinsic shader_clock

2018-06-29 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index f4875113d00..ed2453136fd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2344,6 +2344,14 @@ Converter::visit(nir_intrinsic_instr *insn)
   bar->subOp = getSubOp(op);
   break;
}
+   case nir_intrinsic_shader_clock: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+
+  loadImm(newDefs[0], 0u);
+  mkOp1v(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0));
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 29/33] nvir/nir: implement ssbo intrinsics

2018-06-29 Thread Karol Herbst
v4: use loadFrom helper
v5: support indirect buffer access
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 90 +++
 1 file changed, 90 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 994d8f3968a..f3f15d1d2ff 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -448,6 +448,24 @@ int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_ssbo_atomic_add:
+  return NV50_IR_SUBOP_ATOM_ADD;
+   case nir_intrinsic_ssbo_atomic_and:
+  return NV50_IR_SUBOP_ATOM_AND;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+  return NV50_IR_SUBOP_ATOM_CAS;
+   case nir_intrinsic_ssbo_atomic_exchange:
+  return NV50_IR_SUBOP_ATOM_EXCH;
+   case nir_intrinsic_ssbo_atomic_or:
+  return NV50_IR_SUBOP_ATOM_OR;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_umax:
+  return NV50_IR_SUBOP_ATOM_MAX;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umin:
+  return NV50_IR_SUBOP_ATOM_MIN;
+   case nir_intrinsic_ssbo_atomic_xor:
+  return NV50_IR_SUBOP_ATOM_XOR;
case nir_intrinsic_vote_all:
   return NV50_IR_SUBOP_VOTE_ALL;
case nir_intrinsic_vote_any:
@@ -1948,6 +1966,78 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_get_buffer_size: {
+  LValues  = convert(>dest);
+  const DataType dType = getDType(insn);
+  Value *indirectBuffer;
+  uint32_t buffer = getIndirect(>src[0], 0, indirectBuffer);
+
+  Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
+  mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, 
indirectBuffer);
+  break;
+   }
+   case nir_intrinsic_store_ssbo: {
+  DataType sType = getSType(insn->src[0], false, false);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(>src[1], 0, indirectBuffer);
+  uint32_t offset = getIndirect(>src[2], 0, indirectOffset);
+
+  for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+continue;
+ Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
+offset + i * typeSizeof(sType));
+ mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(>src[0], 
i))
+->setIndirect(0, 1, indirectBuffer);
+  }
+  info->io.globalAccess |= 0x2;
+  break;
+   }
+   case nir_intrinsic_load_ssbo: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(>src[0], 0, indirectBuffer);
+  uint32_t offset = getIndirect(>src[1], 0, indirectOffset);
+
+  for (uint8_t i = 0u; i < insn->num_components; ++i)
+ loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
+  indirectOffset, indirectBuffer);
+
+  info->io.globalAccess |= 0x1;
+  break;
+   }
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_xor: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(>src[0], 0, indirectBuffer);
+  uint32_t offset = getIndirect(>src[1], 0, indirectOffset);
+
+  Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
+  Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
+getSrc(>src[2], 0));
+  if (op == nir_intrinsic_ssbo_atomic_comp_swap)
+ atom->setSrc(2, getSrc(>src[3], 0));
+  atom->setIndirect(0, 0, indirectOffset);
+  atom->setIndirect(0, 1, indirectBuffer);
+  atom->subOp = getSubOp(op);
+
+  info->io.globalAccess |= 0x2;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 23/33] nvir/nir: implement nir_instr_type_tex

2018-06-29 Thread Karol Herbst
a lot of those fields are not valid for a lot of tex ops. Not quite sure if
it's worth the effort to check for those or just keep it like that. It seems
to kind of work.

v2: reworked offset handling
add tex support with indirect R/S arguments
handle GLSL_SAMPLER_DIM_EXTERNAL
drop reference in convert(glsl_sampler_dim&, bool, bool)
fix tg4 component selection
v5: fill up coords args with scratch values if coords provided is less than 
TexTarget.getArgCount()
v7: prepare for bindless_texture support
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 232 ++
 1 file changed, 232 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 637ec9d0664..9c9cc533006 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -80,6 +80,7 @@ private:
typedef std::tr1::unordered_map NirDefMap;
typedef std::tr1::unordered_map NirBlockMap;
 
+   TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
@@ -122,6 +123,7 @@ private:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
operation getOperation(nir_op);
+   operation getOperation(nir_texop);
operation preOperationNeeded(nir_op);
 
int getSubOp(nir_op);
@@ -142,6 +144,10 @@ private:
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
bool visit(nir_ssa_undef_instr *);
+   bool visit(nir_tex_instr *);
+
+   /* tex stuff */
+   Value* applyProjection(Value *src, Value *proj);
 
nir_shader *nir;
 
@@ -367,6 +373,36 @@ Converter::getOperation(nir_op op)
}
 }
 
+operation
+Converter::getOperation(nir_texop op)
+{
+   switch (op) {
+   case nir_texop_tex:
+  return OP_TEX;
+   case nir_texop_lod:
+  return OP_TXLQ;
+   case nir_texop_txb:
+  return OP_TXB;
+   case nir_texop_txd:
+  return OP_TXD;
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+  return OP_TXF;
+   case nir_texop_tg4:
+  return OP_TXG;
+   case nir_texop_txl:
+  return OP_TXL;
+   case nir_texop_query_levels:
+   case nir_texop_texture_samples:
+   case nir_texop_txs:
+  return OP_TXQ;
+   default:
+  ERROR("couldn't get operation for nir_texop %u\n", op);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
 operation
 Converter::preOperationNeeded(nir_op op)
 {
@@ -1459,6 +1495,8 @@ Converter::visit(nir_instr *insn)
   return visit(nir_instr_as_load_const(insn));
case nir_instr_type_ssa_undef:
   return visit(nir_instr_as_ssa_undef(insn));
+   case nir_instr_type_tex:
+  return visit(nir_instr_as_tex(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -2180,6 +2218,200 @@ Converter::visit(nir_ssa_undef_instr *insn)
return true;
 }
 
+#define CASE_SAMPLER(ty) \
+   case GLSL_SAMPLER_DIM_ ## ty : \
+  if (isArray && !isShadow) \
+ return TEX_TARGET_ ## ty ## _ARRAY; \
+  else if (!isArray && isShadow) \
+ return TEX_TARGET_## ty ## _SHADOW; \
+  else if (isArray && isShadow) \
+ return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
+  else \
+ return TEX_TARGET_ ## ty
+
+TexTarget
+Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
+{
+   switch (dim) {
+   CASE_SAMPLER(1D);
+   CASE_SAMPLER(2D);
+   CASE_SAMPLER(CUBE);
+   case GLSL_SAMPLER_DIM_3D:
+  return TEX_TARGET_3D;
+   case GLSL_SAMPLER_DIM_MS:
+  if (isArray)
+ return TEX_TARGET_2D_MS_ARRAY;
+  return TEX_TARGET_2D_MS;
+   case GLSL_SAMPLER_DIM_RECT:
+  if (isShadow)
+ return TEX_TARGET_RECT_SHADOW;
+  return TEX_TARGET_RECT;
+   case GLSL_SAMPLER_DIM_BUF:
+  return TEX_TARGET_BUFFER;
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+  return TEX_TARGET_2D;
+   default:
+  ERROR("unknown glsl_sampler_dim %u\n", dim);
+  assert(false);
+  return TEX_TARGET_COUNT;
+   }
+}
+#undef CASE_SAMPLER
+
+Value*
+Converter::applyProjection(Value *src, Value *proj)
+{
+   if (!proj)
+  return src;
+   return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
+}
+
+bool
+Converter::visit(nir_tex_instr *insn)
+{
+   switch (insn->op) {
+   case nir_texop_lod:
+   case nir_texop_query_levels:
+   case nir_texop_tex:
+   case nir_texop_texture_samples:
+   case nir_texop_tg4:
+   case nir_texop_txb:
+   case nir_texop_txd:
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_txl:
+   case nir_texop_txs: {
+  LValues  = convert(>dest);
+  std::vector srcs;
+  std::vector defs;
+  std::vector offsets;
+  uint8_t mask = 0;
+  bool lz = false;
+  Value *proj = NULL;
+  TexInstruction::Target target = convert(insn->sampler_dim, 
insn->is_array, insn->is_shadow);
+   

[Mesa-dev] [PATCH v8 20/33] nvir/nir: implement intrinsic_discard(_if)

2018-06-29 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index a410468fd50..2047abdf371 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1651,6 +1651,20 @@ Converter::visit(nir_intrinsic_instr *insn)
   loadImm(newDefs[1], mode);
   break;
}
+   case nir_intrinsic_discard:
+  mkOp(OP_DISCARD, TYPE_NONE, NULL);
+  break;
+   case nir_intrinsic_discard_if: {
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  if (insn->num_components > 1) {
+ ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
+ assert(false);
+ return false;
+  }
+  mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(>src[0], 0), 
zero);
+  mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 31/33] nvir/nir: add memory barriers

2018-06-29 Thread Karol Herbst
v5: add more barrier intrinsics

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index e354c32459b..ff9396ed715 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -506,6 +506,14 @@ Converter::getSubOp(nir_intrinsic_op op)
CASE_OP_INTR_ATOM(and, AND);
CASE_OP_INTR_ATOM(comp_swap, CAS);
CASE_OP_INTR_ATOM(exchange, EXCH);
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+  return NV50_IR_SUBOP_MEMBAR(M, GL);
+   case nir_intrinsic_memory_barrier_shared:
+  return NV50_IR_SUBOP_MEMBAR(M, CTA);
CASE_OP_INTR_ATOM(or, OR);
case nir_intrinsic_image_deref_atomic_max:
CASE_OP_INTR_ATOM_S(imax, MAX);
@@ -2302,6 +2310,17 @@ Converter::visit(nir_intrinsic_instr *insn)
   bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
   break;
}
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_shared: {
+  Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
+  bar->fixed = 1;
+  bar->subOp = getSubOp(op);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 22/33] nvir/nir: implement nir_ssa_undef_instr

2018-06-29 Thread Karol Herbst
v2: use mkOp
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp| 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 2e98e3fcce0..637ec9d0664 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -141,6 +141,7 @@ private:
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
+   bool visit(nir_ssa_undef_instr *);
 
nir_shader *nir;
 
@@ -1456,6 +1457,8 @@ Converter::visit(nir_instr *insn)
   return visit(nir_instr_as_jump(insn));
case nir_instr_type_load_const:
   return visit(nir_instr_as_load_const(insn));
+   case nir_instr_type_ssa_undef:
+  return visit(nir_instr_as_ssa_undef(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -2167,6 +2170,16 @@ Converter::visit(nir_alu_instr *insn)
 }
 #undef DEFAULT_CHECKS
 
+bool
+Converter::visit(nir_ssa_undef_instr *insn)
+{
+   LValues  = convert(>def);
+   for (uint8_t i = 0u; i < insn->def.num_components; ++i) {
+  mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
+   }
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 11/33] nvir/nir: run assignSlots

2018-06-29 Thread Karol Herbst
v2: add support for geometry shaders
set idx
add some missing mappings
fix for 64bit inputs/outputs
fix up some FP color output index messup
parse centroid flag
v3: fix arrays in outputs as well
fix input/ouput size calculation for tessellation shaders
v4: add getSlotAddress helper
fix for 64 bit typed inputs
v5: change getSlotAddress interface for easier use
fix sample inputs
fix slot counting for mat
v7: fix driver_location of images
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 626 ++
 1 file changed, 626 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 436c9f7b28b..c4fcc603861 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -67,6 +67,13 @@ private:
uint32_t getIndirect(nir_src *, uint8_t, Value*&);
uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
 
+   uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
+
+   void setInterpolate(nv50_ir_varying *,
+   uint8_t,
+   bool centroid,
+   unsigned semantics);
+
bool isFloatType(nir_alu_type);
bool isSignedType(nir_alu_type);
bool isResultFloat(nir_op);
@@ -79,6 +86,8 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   bool assignSlots();
+
nir_shader *nir;
 
NirDefMap ssaDefs;
@@ -301,6 +310,618 @@ Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t 
s, uint8_t c, Value *&
return idx;
 }
 
+static void
+vert_attrib_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VERT_ATTRIB_GENERIC0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VERT_ATTRIB_GENERIC0;
+  return;
+   }
+
+   if (slot == VERT_ATTRIB_POINT_SIZE) {
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+  return;
+   }
+
+   if (slot >= VERT_ATTRIB_TEX0) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VERT_ATTRIB_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VERT_ATTRIB_COLOR0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_COLOR1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VERT_ATTRIB_EDGEFLAG:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_FOG:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_NORMAL:
+  *name = TGSI_SEMANTIC_NORMAL;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   default:
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+  break;
+   }
+}
+
+static void
+varying_slot_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VARYING_SLOT_PATCH0) {
+  *name = TGSI_SEMANTIC_PATCH;
+  *index = slot - VARYING_SLOT_PATCH0;
+  return;
+   }
+
+   if (slot >= VARYING_SLOT_VAR0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VARYING_SLOT_VAR0;
+  return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VARYING_SLOT_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VARYING_SLOT_BFC0:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_BFC1:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_DIST0:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 0;
+  break;
+   case VARYING_SLOT_CLIP_DIST1:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_VERTEX:
+  *name = TGSI_SEMANTIC_CLIPVERTEX;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_EDGE:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FACE:
+  *name = TGSI_SEMANTIC_FACE;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FOGC:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_LAYER:
+  *name = TGSI_SEMANTIC_LAYER;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PNTC:
+  *name = TGSI_SEMANTIC_PCOORD;
+  *index = 0;
+  break;
+   case VARYING_SLOT_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PRIMITIVE_ID:
+  *name = TGSI_SEMANTIC_PRIMID;
+  *index = 0;
+  

[Mesa-dev] [PATCH v8 28/33] nvir/nir: implement nir_intrinsic_load_ubo

2018-06-29 Thread Karol Herbst
v4: use loadFrom helper
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index e5b73c1919c..994d8f3968a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1934,6 +1934,20 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
   break;
}
+   case nir_intrinsic_load_ubo: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectIndex;
+  Value *indirectOffset;
+  uint32_t index = getIndirect(>src[0], 0, indirectIndex) + 1;
+  uint32_t offset = getIndirect(>src[1], 0, indirectOffset);
+
+  for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
+  indirectOffset, indirectIndex);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 21/33] nvir/nir: implement loading system values

2018-06-29 Thread Karol Herbst
v2: support more sys values
fixed a bug where for multi component reads all values ended up in x
v3: add load_patch_vertices_in
v4: add subgroup stuff
v5: add helper invocation
v6: fix loading 64 bit system values
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 122 ++
 1 file changed, 122 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 2047abdf371..2e98e3fcce0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -83,6 +83,7 @@ private:
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
+   SVSemantic convert(nir_intrinsic_op);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
 
@@ -1462,6 +1463,70 @@ Converter::visit(nir_instr *insn)
return true;
 }
 
+SVSemantic
+Converter::convert(nir_intrinsic_op intr)
+{
+   switch (intr) {
+   case nir_intrinsic_load_base_vertex:
+  return SV_BASEVERTEX;
+   case nir_intrinsic_load_base_instance:
+  return SV_BASEINSTANCE;
+   case nir_intrinsic_load_draw_id:
+  return SV_DRAWID;
+   case nir_intrinsic_load_front_face:
+  return SV_FACE;
+   case nir_intrinsic_load_helper_invocation:
+  return SV_THREAD_KILL;
+   case nir_intrinsic_load_instance_id:
+  return SV_INSTANCE_ID;
+   case nir_intrinsic_load_invocation_id:
+  return SV_INVOCATION_ID;
+   case nir_intrinsic_load_local_group_size:
+  return SV_NTID;
+   case nir_intrinsic_load_local_invocation_id:
+  return SV_TID;
+   case nir_intrinsic_load_num_work_groups:
+  return SV_NCTAID;
+   case nir_intrinsic_load_patch_vertices_in:
+  return SV_VERTEX_COUNT;
+   case nir_intrinsic_load_primitive_id:
+  return SV_PRIMITIVE_ID;
+   case nir_intrinsic_load_sample_id:
+  return SV_SAMPLE_INDEX;
+   case nir_intrinsic_load_sample_mask_in:
+  return SV_SAMPLE_MASK;
+   case nir_intrinsic_load_sample_pos:
+  return SV_SAMPLE_POS;
+   case nir_intrinsic_load_subgroup_eq_mask:
+  return SV_LANEMASK_EQ;
+   case nir_intrinsic_load_subgroup_ge_mask:
+  return SV_LANEMASK_GE;
+   case nir_intrinsic_load_subgroup_gt_mask:
+  return SV_LANEMASK_GT;
+   case nir_intrinsic_load_subgroup_le_mask:
+  return SV_LANEMASK_LE;
+   case nir_intrinsic_load_subgroup_lt_mask:
+  return SV_LANEMASK_LT;
+   case nir_intrinsic_load_subgroup_invocation:
+  return SV_LANEID;
+   case nir_intrinsic_load_tess_coord:
+  return SV_TESS_COORD;
+   case nir_intrinsic_load_tess_level_inner:
+  return SV_TESS_INNER;
+   case nir_intrinsic_load_tess_level_outer:
+  return SV_TESS_OUTER;
+   case nir_intrinsic_load_vertex_id:
+  return SV_VERTEX_ID;
+   case nir_intrinsic_load_work_group_id:
+  return SV_CTAID;
+   default:
+  ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
+nir_intrinsic_infos[intr].name);
+  assert(false);
+  return SV_LAST;
+   }
+}
+
 bool
 Converter::visit(nir_intrinsic_instr *insn)
 {
@@ -1665,6 +1730,63 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
   break;
}
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_draw_id:
+   case nir_intrinsic_load_front_face:
+   case nir_intrinsic_load_helper_invocation:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_invocation_id:
+   case nir_intrinsic_load_local_group_size:
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_num_work_groups:
+   case nir_intrinsic_load_patch_vertices_in:
+   case nir_intrinsic_load_primitive_id:
+   case nir_intrinsic_load_sample_id:
+   case nir_intrinsic_load_sample_mask_in:
+   case nir_intrinsic_load_sample_pos:
+   case nir_intrinsic_load_subgroup_eq_mask:
+   case nir_intrinsic_load_subgroup_ge_mask:
+   case nir_intrinsic_load_subgroup_gt_mask:
+   case nir_intrinsic_load_subgroup_le_mask:
+   case nir_intrinsic_load_subgroup_lt_mask:
+   case nir_intrinsic_load_subgroup_invocation:
+   case nir_intrinsic_load_tess_coord:
+   case nir_intrinsic_load_tess_level_inner:
+   case nir_intrinsic_load_tess_level_outer:
+   case nir_intrinsic_load_vertex_id:
+   case nir_intrinsic_load_work_group_id: {
+  const DataType dType = getDType(insn);
+  SVSemantic sv = convert(op);
+  LValues  = convert(>dest);
+
+  for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ Value *def;
+ if (typeSizeof(dType) == 8)
+def = getSSA();
+ else
+def = newDefs[i];
+
+ if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
+loadImm(def, 0u);
+ } else {
+Symbol *sym = mkSysVal(sv, i);
+Instruction 

[Mesa-dev] [PATCH v8 18/33] nvir/nir: implement nir_intrinsic_store_(per_vertex_)output

2018-06-29 Thread Karol Herbst
v3: add workaround for RA issues
indirects have to be multiplied by 0x10
fix indirect access
v4: use smarter getIndirect helper
use storeTo helper
v5: don't use const_offset directly
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 46 +++
 1 file changed, 46 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index bcb2e8d28cc..32f33680bb4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1270,6 +1270,11 @@ Converter::visit(nir_function *function)
 
setPosition(entry, true);
 
+   if (info->io.genUserClip > 0) {
+  for (int c = 0; c < 4; ++c)
+ clipVtx[c] = getScratch();
+   }
+
switch (prog->getType()) {
case Program::TYPE_TESSELLATION_CONTROL:
   outBase = mkOp2v(
@@ -1296,6 +1301,9 @@ Converter::visit(nir_function *function)
bb->cfg.attach(>cfg, Graph::Edge::TREE);
setPosition(exit, true);
 
+   if (info->io.genUserClip > 0)
+  handleUserClipPlanes();
+
/* TODO: for non main function this needs to be a OP_RETURN */
mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
return true;
@@ -1470,6 +1478,44 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_vertex_output: {
+  Value *indirect;
+  DataType dType = getSType(insn->src[0], false, false);
+  uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 
2, 0, indirect);
+
+  for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+continue;
+
+ uint8_t offset = 0;
+ Value *src = getSrc(>src[0], i);
+ switch (prog->getType()) {
+ case Program::TYPE_FRAGMENT: {
+if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
+   /* TGSI uses a different interface than NIR, TGSI stores that
+* value in the z component, NIR in X
+*/
+   offset += 2;
+   src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
+}
+break;
+ }
+ case Program::TYPE_VERTEX: {
+if (info->io.genUserClip > 0) {
+   mkMov(clipVtx[i], src);
+   src = clipVtx[i];
+}
+break;
+ }
+ default:
+break;
+ }
+
+ storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + 
offset, indirect);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 12/33] nvir/nir: add loadFrom and storeTo helpler

2018-06-29 Thread Karol Herbst
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 72 +++
 1 file changed, 72 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index c4fcc603861..aa484df8fea 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -74,6 +74,13 @@ private:
bool centroid,
unsigned semantics);
 
+   Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t 
base,
+ uint8_t c, Value *indirect0 = NULL,
+ Value *indirect1 = NULL, bool patch = false);
+   void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,
+Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,
+Value *indirect1 = NULL);
+
bool isFloatType(nir_alu_type);
bool isSignedType(nir_alu_type);
bool isResultFloat(nir_op);
@@ -922,6 +929,71 @@ Converter::getSlotAddress(nir_intrinsic_instr *insn, 
uint8_t idx, uint8_t slot)
return vary[idx].slot[slot] * 4;
 }
 
+Instruction *
+Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
+uint32_t base, uint8_t c, Value *indirect0,
+Value *indirect1, bool patch)
+{
+   unsigned int tySize = typeSizeof(ty);
+
+   if (tySize == 8 &&
+   (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) 
{
+  Value *lo = getSSA();
+  Value *hi = getSSA();
+
+  Instruction *loi =
+ mkLoad(TYPE_U32, lo,
+mkSymbol(file, i, TYPE_U32, base + c * tySize),
+indirect0);
+  loi->setIndirect(0, 1, indirect1);
+  loi->perPatch = patch;
+
+  Instruction *hii =
+ mkLoad(TYPE_U32, hi,
+mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),
+indirect0);
+  hii->setIndirect(0, 1, indirect1);
+  hii->perPatch = patch;
+
+  return mkOp2(OP_MERGE, ty, def, lo, hi);
+   } else {
+  Instruction *ld =
+ mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);
+  ld->setIndirect(0, 1, indirect1);
+  ld->perPatch = patch;
+  return ld;
+   }
+}
+
+void
+Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
+   DataType ty, Value *src, uint8_t idx, uint8_t c,
+   Value *indirect0, Value *indirect1)
+{
+   uint8_t size = typeSizeof(ty);
+   uint32_t address = getSlotAddress(insn, idx, c);
+
+   if (size == 8 && indirect0) {
+  Value *split[2];
+  mkSplit(split, 4, src);
+
+  if (op == OP_EXPORT) {
+ split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);
+ split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);
+  }
+
+  mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,
+  split[0])->perPatch = info->out[idx].patch;
+  mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), 
indirect0,
+  split[1])->perPatch = info->out[idx].patch;
+   } else {
+  if (op == OP_EXPORT)
+ src = mkMov(getSSA(size), src, ty)->getDef(0);
+  mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,
+  src)->perPatch = info->out[idx].patch;
+   }
+}
+
 bool
 Converter::run()
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 24/33] nvir/nir: add getOperation for intrinsics

2018-06-29 Thread Karol Herbst
v7: don't assert in default case

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 22 +++
 1 file changed, 22 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 9c9cc533006..e79871c1ebe 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -122,10 +122,12 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   operation getOperation(nir_intrinsic_op);
operation getOperation(nir_op);
operation getOperation(nir_texop);
operation preOperationNeeded(nir_op);
 
+   int getSubOp(nir_intrinsic_op);
int getSubOp(nir_op);
 
CondCode getCondCode(nir_op);
@@ -403,6 +405,17 @@ Converter::getOperation(nir_texop op)
}
 }
 
+operation
+Converter::getOperation(nir_intrinsic_op op)
+{
+   switch (op) {
+   default:
+  ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
 operation
 Converter::preOperationNeeded(nir_op op)
 {
@@ -425,6 +438,15 @@ Converter::getSubOp(nir_op op)
}
 }
 
+int
+Converter::getSubOp(nir_intrinsic_op op)
+{
+   switch (op) {
+   default:
+  return 0;
+   }
+}
+
 CondCode
 Converter::getCondCode(nir_op op)
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 25/33] nvir/nir: implement vote and ballot

2018-06-29 Thread Karol Herbst
v2: add vote_eq support
use the new subop intrinsic helper
add ballot
v3: add read_(first_)invocation
v8: handle vectorized intrinsics
don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 44 +++
 1 file changed, 44 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index e79871c1ebe..a8e126a5730 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -442,6 +442,12 @@ int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_vote_all:
+  return NV50_IR_SUBOP_VOTE_ALL;
+   case nir_intrinsic_vote_any:
+  return NV50_IR_SUBOP_VOTE_ANY;
+   case nir_intrinsic_vote_ieq:
+  return NV50_IR_SUBOP_VOTE_UNI;
default:
   return 0;
}
@@ -1850,6 +1856,44 @@ Converter::visit(nir_intrinsic_instr *insn)
   loadImm(newDefs[0], 32u);
   break;
}
+   case nir_intrinsic_vote_all:
+   case nir_intrinsic_vote_any:
+   case nir_intrinsic_vote_ieq: {
+  LValues  = convert(>dest);
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), 
zero);
+  mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
+  mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
+  break;
+   }
+   case nir_intrinsic_ballot: {
+  LValues  = convert(>dest);
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), 
zero);
+  Instruction *ballot = mkOp1(OP_VOTE, TYPE_U32, getSSA(), pred);
+  ballot->subOp = NV50_IR_SUBOP_VOTE_ANY;
+  mkOp2(OP_MERGE, TYPE_U64, newDefs[0], ballot->getDef(0), 
loadImm(getSSA(), 0));
+  break;
+   }
+   case nir_intrinsic_read_first_invocation:
+   case nir_intrinsic_read_invocation: {
+  LValues  = convert(>dest);
+  const DataType dType = getDType(insn);
+  Value *tmp = getScratch();
+
+  if (op == nir_intrinsic_read_first_invocation) {
+ mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = 
NV50_IR_SUBOP_VOTE_ANY;
+ mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = 
NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+  } else
+ tmp = getSrc(>src[1], 0);
+
+  for (uint8_t i = 0; i < insn->num_components; ++i) {
+ mkOp3(OP_SHFL, dType, newDefs[i], getSrc(>src[0], i), tmp, 
mkImm(0x1f))
+->subOp = NV50_IR_SUBOP_SHFL_IDX;
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 27/33] nvir/nir: implement geometry shader nir_intrinsics

2018-06-29 Thread Karol Herbst
v4: use smarter getIndirect helper
use new getSlotAddress helper
use loadFrom helper
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index d3188aa9872..e5b73c1919c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -411,6 +411,10 @@ operation
 Converter::getOperation(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_emit_vertex:
+  return OP_EMIT;
+   case nir_intrinsic_end_primitive:
+  return OP_RESTART;
default:
   ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
   assert(false);
@@ -1907,6 +1911,29 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_per_vertex_input: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectVertex;
+  Value *indirectOffset;
+  uint32_t baseVertex = getIndirect(>src[0], 0, indirectVertex);
+  uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
+
+  Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
+  mkImm(baseVertex), indirectVertex);
+  for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
+  indirectOffset, vtxBase, info->in[idx].patch);
+  }
+  break;
+   }
+   case nir_intrinsic_emit_vertex:
+   case nir_intrinsic_end_primitive: {
+  uint32_t idx = nir_intrinsic_stream_id(insn);
+  mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 32/33] nvir/nir: implement load_per_vertex_output

2018-06-29 Thread Karol Herbst
v4: use smarter getIndirect helper
use new getSlotAddress helper
v5: use loadFrom helper
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index ff9396ed715..f4875113d00 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2069,6 +2069,29 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_per_vertex_output: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectVertex;
+  Value *indirectOffset;
+  uint32_t baseVertex = getIndirect(>src[0], 0, indirectVertex);
+  uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);
+  Value *vtxBase = NULL;
+
+  if (indirectVertex)
+ vtxBase = indirectVertex;
+  else
+ vtxBase = loadImm(NULL, baseVertex);
+
+  vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, 
vtxBase);
+
+  for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
+  indirectOffset, vtxBase, info->in[idx].patch);
+  }
+  break;
+   }
case nir_intrinsic_emit_vertex:
case nir_intrinsic_end_primitive: {
   uint32_t idx = nir_intrinsic_stream_id(insn);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 13/33] nvir/nir: parse NIR shader info

2018-06-29 Thread Karol Herbst
v2: parse a few more fields
v3: add special handling for GL_ISOLINES
v8: set info->prop.fp.readsSampleLocations
don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 325 +-
 1 file changed, 323 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index aa484df8fea..9ac98e04562 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -53,8 +53,10 @@ public:
 private:
typedef std::vector LValues;
typedef std::tr1::unordered_map NirDefMap;
+   typedef std::tr1::unordered_map NirBlockMap;
 
LValues& convert(nir_alu_dest *);
+   BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
@@ -94,16 +96,48 @@ private:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
bool assignSlots();
+   bool parseNIR();
+
+   bool visit(nir_block *);
+   bool visit(nir_cf_node *);
+   bool visit(nir_function *);
+   bool visit(nir_if *);
+   bool visit(nir_instr *);
+   bool visit(nir_jump_instr *);
+   bool visit(nir_loop *);
 
nir_shader *nir;
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirBlockMap blocks;
+   unsigned int curLoopDepth;
+
+   BasicBlock *exit;
+
+   union {
+  struct {
+ Value *position;
+  } fp;
+   };
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
- nir(nir) {}
+ nir(nir),
+ curLoopDepth(0) {}
+
+BasicBlock *
+Converter::convert(nir_block *block)
+{
+   NirBlockMap::iterator it = blocks.find(block->index);
+   if (it != blocks.end())
+  return (*it).second;
+
+   BasicBlock *bb = new BasicBlock(func);
+   blocks[block->index] = bb;
+   return bb;
+}
 
 bool
 Converter::isFloatType(nir_alu_type type)
@@ -994,6 +1028,283 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile 
file, operation op,
}
 }
 
+bool
+Converter::parseNIR()
+{
+   info->io.clipDistances = nir->info.clip_distance_array_size;
+   info->io.cullDistances = nir->info.cull_distance_array_size;
+
+   switch(prog->getType()) {
+   case Program::TYPE_COMPUTE:
+  info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
+  info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
+  info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
+  info->bin.smemSize = nir->info.cs.shared_size;
+  break;
+   case Program::TYPE_FRAGMENT:
+  info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
+  info->prop.fp.persampleInvocation =
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
+  info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
+  info->prop.fp.readsSampleLocations =
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
+  info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
+  info->prop.fp.usesSampleMaskIn =
+ !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
+  break;
+   case Program::TYPE_GEOMETRY:
+  info->prop.gp.inputPrim = nir->info.gs.input_primitive;
+  info->prop.gp.instanceCount = nir->info.gs.invocations;
+  info->prop.gp.maxVertices = nir->info.gs.vertices_out;
+  info->prop.gp.outputPrim = nir->info.gs.output_primitive;
+  break;
+   case Program::TYPE_TESSELLATION_CONTROL:
+   case Program::TYPE_TESSELLATION_EVAL:
+  if (nir->info.tess.primitive_mode == GL_ISOLINES)
+ info->prop.tp.domain = GL_LINES;
+  else
+ info->prop.tp.domain = nir->info.tess.primitive_mode;
+  info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
+  info->prop.tp.outputPrim =
+ nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
+  info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
+  info->prop.tp.winding = !nir->info.tess.ccw;
+  break;
+   case Program::TYPE_VERTEX:
+  info->prop.vp.usesDrawParameters =
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
+ (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
+  break;
+   default:
+  break;
+   }
+
+   return true;
+}
+
+bool
+Converter::visit(nir_function *function)
+{
+   /* we only support emiting the main function for now */
+   assert(!strcmp(function->name, "main"));
+   assert(function->impl);
+
+   /* usually the blocks will set everything up, but main is special */
+   BasicBlock *entry = new BasicBlock(prog->main);
+   exit = new BasicBlock(prog->main);
+   blocks[nir_start_block(function->impl)->index] = entry;
+   

[Mesa-dev] [PATCH v8 17/33] nvir/nir: implement nir_intrinsic_load_uniform

2018-06-29 Thread Karol Herbst
v2: use new getIndirect helper
fixes symbols for 64 bit types
v4: use smarter getIndirect helper
simplify address calculation
use loadFrom helper
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 0ae7363b161..bcb2e8d28cc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1460,6 +1460,16 @@ Converter::visit(nir_intrinsic_instr *insn)
nir_intrinsic_op op = insn->intrinsic;
 
switch (op) {
+   case nir_intrinsic_load_uniform: {
+  LValues  = convert(>dest);
+  const DataType dType = getDType(insn);
+  Value *indirect;
+  uint32_t coffset = getIndirect(insn, 0, 0, indirect);
+  for (uint8_t i = 0; i < insn->num_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, 
indirect);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 10/33] nvir/nir: add nir type helper functions

2018-06-29 Thread Karol Herbst
v4: treat imul as unsigned
v5: remove pointless !!
v7: inot is unsigned as well
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 115 ++
 1 file changed, 115 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b7fab016b19..436c9f7b28b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -67,6 +67,18 @@ private:
uint32_t getIndirect(nir_src *, uint8_t, Value*&);
uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
 
+   bool isFloatType(nir_alu_type);
+   bool isSignedType(nir_alu_type);
+   bool isResultFloat(nir_op);
+   bool isResultSigned(nir_op);
+
+   DataType getDType(nir_alu_instr*);
+   DataType getDType(nir_intrinsic_instr*);
+   DataType getDType(nir_op, uint8_t);
+
+   std::vector getSTypes(nir_alu_instr*);
+   DataType getSType(nir_src&, bool isFloat, bool isSigned);
+
nir_shader *nir;
 
NirDefMap ssaDefs;
@@ -77,6 +89,109 @@ Converter::Converter(Program *prog, nir_shader *nir, 
nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir) {}
 
+bool
+Converter::isFloatType(nir_alu_type type)
+{
+   return nir_alu_type_get_base_type(type) == nir_type_float;
+}
+
+bool
+Converter::isSignedType(nir_alu_type type)
+{
+   return nir_alu_type_get_base_type(type) == nir_type_int;
+}
+
+bool
+Converter::isResultFloat(nir_op op)
+{
+   const nir_op_info  = nir_op_infos[op];
+   if (info.output_type != nir_type_invalid)
+  return isFloatType(info.output_type);
+
+   ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
+   assert(false);
+   return true;
+}
+
+bool
+Converter::isResultSigned(nir_op op)
+{
+   switch (op) {
+   /* there is no umul and we get wrong results if we treat all muls as signed 
*/
+   case nir_op_imul:
+   case nir_op_inot:
+  return false;
+   default:
+  const nir_op_info  = nir_op_infos[op];
+  if (info.output_type != nir_type_invalid)
+ return isSignedType(info.output_type);
+  ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
+  assert(false);
+  return true;
+   }
+}
+
+DataType
+Converter::getDType(nir_alu_instr *insn)
+{
+   if (insn->dest.dest.is_ssa)
+  return getDType(insn->op, insn->dest.dest.ssa.bit_size);
+   else
+  return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
+}
+
+DataType
+Converter::getDType(nir_intrinsic_instr *insn)
+{
+   if (insn->dest.is_ssa)
+  return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
+   else
+  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
+}
+
+DataType
+Converter::getDType(nir_op op, uint8_t bitSize)
+{
+   DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), 
isResultSigned(op));
+   if (ty == TYPE_NONE) {
+  ERROR("couldn't get Type for op %s with bitSize %u\n", 
nir_op_infos[op].name, bitSize);
+  assert(false);
+   }
+   return ty;
+}
+
+std::vector
+Converter::getSTypes(nir_alu_instr *insn)
+{
+   const nir_op_info  = nir_op_infos[insn->op];
+   std::vector res(info.num_inputs);
+
+   for (uint8_t i = 0; i < info.num_inputs; ++i) {
+  if (info.input_types[i] != nir_type_invalid) {
+ res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), 
isSignedType(info.input_types[i]));
+  } else {
+ ERROR("getSType not implemented for %s idx %u\n", info.name, i);
+ assert(false);
+ res[i] = TYPE_NONE;
+ break;
+  }
+   }
+
+   return res;
+}
+
+DataType
+Converter::getSType(nir_src , bool isFloat, bool isSigned)
+{
+   uint8_t bitSize;
+   if (src.is_ssa)
+  bitSize = src.ssa->bit_size;
+   else
+  bitSize = src.reg.reg->bit_size;
+
+   return typeOfSize(bitSize / 8, isFloat, isSigned);
+}
+
 Converter::LValues&
 Converter::convert(nir_dest *dest)
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 19/33] nvir/nir: implement load_(interpolated_)input/output

2018-06-29 Thread Karol Herbst
v3: and load_output
v4: use smarter getIndirect helper
use new getSlotAddress helper
v5: don't use const_offset directly
fix for indirects
v6: add support for interpolateAt
v7: fix compiler warnings
add load_barycentric_sample
handle load_output for fragment shaders
v8: set info->prop.fp.readsSampleLocations for at_sample interpolation
don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 135 ++
 1 file changed, 135 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 32f33680bb4..a410468fd50 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1516,6 +1516,141 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+   case nir_intrinsic_load_output: {
+  LValues  = convert(>dest);
+
+  /* FBFetch */
+  if (prog->getType() == Program::TYPE_FRAGMENT &&
+  op == nir_intrinsic_load_output) {
+ std::vector defs, srcs;
+ uint8_t mask = 0;
+
+ srcs.push_back(getSSA());
+ srcs.push_back(getSSA());
+ Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 
0));
+ Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 
1));
+ mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
+ mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
+
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 
0)));
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), 
mkSysVal(SV_SAMPLE_INDEX, 0)));
+
+ for (uint8_t i = 0u; i < insn->num_components; ++i) {
+defs.push_back(newDefs[i]);
+mask |= 1 << i;
+ }
+
+ TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, 
defs, srcs);
+ texi->tex.levelZero = 1;
+ texi->tex.mask = mask;
+ texi->tex.useOffsets = 0;
+ texi->tex.r = 0x;
+ texi->tex.s = 0x;
+
+ info->prop.fp.readsFramebuffer = true;
+ break;
+  }
+
+  const DataType dType = getDType(insn);
+  Value *indirect;
+  bool input = op != nir_intrinsic_load_output;
+  operation nvirOp;
+  uint32_t mode = 0;
+
+  uint32_t idx = getIndirect(insn, op == 
nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);
+  nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
+
+  /* see load_barycentric_* handling */
+  if (prog->getType() == Program::TYPE_FRAGMENT) {
+ mode = translateInterpMode(, nvirOp);
+ if (op == nir_intrinsic_load_interpolated_input) {
+ImmediateValue immMode;
+if (getSrc(>src[0], 
1)->getUniqueInsn()->src(0).getImmediate(immMode))
+   mode |= immMode.reg.data.u32;
+ }
+  }
+
+  for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address);
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+int s = 1;
+if (typeSizeof(dType) == 8) {
+   Value *lo = getSSA();
+   Value *hi = getSSA();
+   Instruction *interp;
+
+   interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address + 4);
+   interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
+} else {
+   Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+}
+ } else {
+mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
+ }
+  }
+  break;
+   }

[Mesa-dev] [PATCH v8 05/33] nouveau: add support for nir

2018-06-29 Thread Karol Herbst
not all those nir options are actually required, it just made the work a
little easier.

v2: fix asserts
parse compute shaders
don't lower bitfield_insert
v3: fix memory leak
v4: don't lower fmod32
v5: set lower_all_io_to_temps to false
fix memory leak because we take over ownership of the nir shader
merge: use the lowering helper
v6: include TGSI debug header for proper assert call
add nv50 support
v7: fix Automake build
v8: free shader only for the set shader type

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/Automake.inc  |  3 +
 src/gallium/drivers/nouveau/Makefile.am   |  5 ++
 src/gallium/drivers/nouveau/Makefile.sources  |  1 +
 .../drivers/nouveau/codegen/nv50_ir.cpp   |  3 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.h |  1 +
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 76 +++
 src/gallium/drivers/nouveau/meson.build   |  9 ++-
 .../drivers/nouveau/nv50/nv50_program.c   | 19 -
 .../drivers/nouveau/nv50/nv50_screen.c| 40 ++
 src/gallium/drivers/nouveau/nv50/nv50_state.c | 35 -
 .../drivers/nouveau/nvc0/nvc0_program.c   | 18 -
 .../drivers/nouveau/nvc0/nvc0_screen.c| 42 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 31 +++-
 13 files changed, 267 insertions(+), 16 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

diff --git a/src/gallium/drivers/nouveau/Automake.inc 
b/src/gallium/drivers/nouveau/Automake.inc
index 1d383fcb7b1..657790494dc 100644
--- a/src/gallium/drivers/nouveau/Automake.inc
+++ b/src/gallium/drivers/nouveau/Automake.inc
@@ -8,4 +8,7 @@ TARGET_LIB_DEPS += \
$(NOUVEAU_LIBS) \
$(LIBDRM_LIBS)
 
+TARGET_COMPILER_LIB_DEPS = \
+   $(top_builddir)/src/compiler/nir/libnir.la
+
 endif
diff --git a/src/gallium/drivers/nouveau/Makefile.am 
b/src/gallium/drivers/nouveau/Makefile.am
index f6126b54481..478dfcf437b 100644
--- a/src/gallium/drivers/nouveau/Makefile.am
+++ b/src/gallium/drivers/nouveau/Makefile.am
@@ -25,6 +25,10 @@ include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CPPFLAGS = \
-I$(top_srcdir)/include/drm-uapi \
+   -I$(top_builddir)/src/compiler/nir \
+   -I$(top_srcdir)/src/compiler/nir \
+   -I$(top_srcdir)/src/mapi \
+   -I$(top_srcdir)/src/mesa \
$(GALLIUM_DRIVER_CFLAGS) \
$(LIBDRM_CFLAGS) \
$(NOUVEAU_CFLAGS)
@@ -47,6 +51,7 @@ nouveau_compiler_SOURCES = \
 
 nouveau_compiler_LDADD = \
libnouveau.la \
+   $(top_builddir)/src/compiler/nir/libnir.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
$(GALLIUM_COMMON_LIB_DEPS)
diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index ec344c63169..c6a1aff7110 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -117,6 +117,7 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_emit_nv50.cpp \
codegen/nv50_ir_from_common.cpp \
codegen/nv50_ir_from_common.h \
+   codegen/nv50_ir_from_nir.cpp \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
codegen/nv50_ir_graph.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 49425b98b91..042091c7c04 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1240,6 +1240,9 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
+   case PIPE_SHADER_IR_NIR:
+  ret = prog->makeFromNIR(info) ? 0 : -2;
+  break;
case PIPE_SHADER_IR_TGSI:
   ret = prog->makeFromTGSI(info) ? 0 : -2;
   break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index f4f3c708886..e5b4592a61e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -1255,6 +1255,7 @@ public:
inline void del(Function *fn, int& id) { allFuncs.remove(id); }
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
 
+   bool makeFromNIR(struct nv50_ir_prog_info *);
bool makeFromTGSI(struct nv50_ir_prog_info *);
bool convertToSSA();
bool optimizeSSA(int level);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
new file mode 100644
index 000..b22c62fd434
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2017 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including 

[Mesa-dev] [PATCH v8 08/33] nvir/nir: run some passes to make the conversion easier

2018-06-29 Thread Karol Herbst
v2: add constant_folding
v6: print non final NIR only for verbose debugging
v8: add passes we will need for OpenCL compute shaders

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 41 +++
 1 file changed, 41 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b22c62fd434..a04314afe19 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -31,6 +31,12 @@
 #include "codegen/nv50_ir_lowering_helper.h"
 #include "codegen/nv50_ir_util.h"
 
+static int
+type_size(const struct glsl_type *type)
+{
+   return glsl_count_attribute_slots(type, false);
+}
+
 namespace {
 
 using namespace nv50_ir;
@@ -52,6 +58,41 @@ Converter::Converter(Program *prog, nir_shader *nir, 
nv50_ir_prog_info *info)
 bool
 Converter::run()
 {
+   bool progress;
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
+  nir_print_shader(nir, stderr);
+
+   NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, 
(nir_lower_io_options)0);
+   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
+   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_lower_alu_to_scalar);
+   NIR_PASS_V(nir, nir_lower_phis_to_scalar);
+
+   do {
+  progress = false;
+  NIR_PASS(progress, nir, nir_copy_prop);
+  NIR_PASS(progress, nir, nir_opt_remove_phis);
+  NIR_PASS(progress, nir, nir_opt_trivial_continues);
+  NIR_PASS(progress, nir, nir_opt_cse);
+  NIR_PASS(progress, nir, nir_opt_algebraic);
+  NIR_PASS(progress, nir, nir_opt_constant_folding);
+  NIR_PASS(progress, nir, nir_copy_prop);
+  NIR_PASS(progress, nir, nir_opt_dce);
+  NIR_PASS(progress, nir, nir_opt_dead_cf);
+   } while (progress);
+
+   NIR_PASS_V(nir, nir_lower_locals_to_regs);
+   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_local);
+   NIR_PASS_V(nir, nir_convert_from_ssa, true);
+
+   /* Garbage collect dead instructions */
+   nir_sweep(nir);
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+  nir_print_shader(nir, stderr);
+
return false;
 }
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 09/33] nvir/nir: track defs and provide easy access functions

2018-06-29 Thread Karol Herbst
v2: add helper function for indirects
v4: add new getIndirect overload for easier use
v5: use getSSA for ssa values
we can just create the values for unassigned registers in getSrc
v6: always create at least 32 bit values
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 131 ++
 1 file changed, 131 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index a04314afe19..b7fab016b19 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -31,6 +31,9 @@
 #include "codegen/nv50_ir_lowering_helper.h"
 #include "codegen/nv50_ir_util.h"
 
+#include 
+#include 
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -48,13 +51,141 @@ public:
 
bool run();
 private:
+   typedef std::vector LValues;
+   typedef std::tr1::unordered_map NirDefMap;
+
+   LValues& convert(nir_alu_dest *);
+   LValues& convert(nir_dest *);
+   LValues& convert(nir_register *);
+   LValues& convert(nir_ssa_def *);
+
+   Value* getSrc(nir_alu_src *, uint8_t component = 0);
+   Value* getSrc(nir_register *, uint8_t);
+   Value* getSrc(nir_src *, uint8_t, bool indirect = false);
+   Value* getSrc(nir_ssa_def *, uint8_t);
+
+   uint32_t getIndirect(nir_src *, uint8_t, Value*&);
+   uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
+
nir_shader *nir;
+
+   NirDefMap ssaDefs;
+   NirDefMap regDefs;
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir) {}
 
+Converter::LValues&
+Converter::convert(nir_dest *dest)
+{
+   if (dest->is_ssa)
+  return convert(>ssa);
+   if (dest->reg.indirect) {
+  ERROR("no support for indirects.");
+  assert(false);
+   }
+   return convert(dest->reg.reg);
+}
+
+Converter::LValues&
+Converter::convert(nir_register *reg)
+{
+   NirDefMap::iterator it = regDefs.find(reg->index);
+   if (it != regDefs.end())
+  return (*it).second;
+
+   LValues newDef(reg->num_components);
+   for (uint8_t i = 0; i < reg->num_components; i++)
+  newDef[i] = getScratch(std::max(4, reg->bit_size / 8));
+   return regDefs[reg->index] = newDef;
+}
+
+Converter::LValues&
+Converter::convert(nir_ssa_def *def)
+{
+   NirDefMap::iterator it = ssaDefs.find(def->index);
+   if (it != ssaDefs.end())
+  return (*it).second;
+
+   LValues newDef(def->num_components);
+   for (uint8_t i = 0; i < def->num_components; i++)
+  newDef[i] = getSSA(std::max(4, def->bit_size / 8));
+   return ssaDefs[def->index] = newDef;
+}
+
+Value*
+Converter::getSrc(nir_alu_src *src, uint8_t component)
+{
+   if (src->abs || src->negate) {
+  ERROR("modifiers currently not supported on nir_alu_src\n");
+  assert(false);
+   }
+   return getSrc(>src, src->swizzle[component]);
+}
+
+Value*
+Converter::getSrc(nir_register *reg, uint8_t idx)
+{
+   NirDefMap::iterator it = regDefs.find(reg->index);
+   if (it == regDefs.end())
+  return convert(reg)[idx];
+   return (*it).second[idx];
+}
+
+Value*
+Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)
+{
+   if (src->is_ssa)
+  return getSrc(src->ssa, idx);
+
+   if (src->reg.indirect) {
+  if (indirect)
+ return getSrc(src->reg.indirect, idx);
+  ERROR("no support for indirects.");
+  assert(false);
+  return NULL;
+   }
+
+   return getSrc(src->reg.reg, idx);
+}
+
+Value*
+Converter::getSrc(nir_ssa_def *src, uint8_t idx)
+{
+   NirDefMap::iterator it = ssaDefs.find(src->index);
+   if (it == ssaDefs.end()) {
+  ERROR("SSA value %u not found\n", src->index);
+  assert(false);
+  return NULL;
+   }
+   return (*it).second[idx];
+}
+
+uint32_t
+Converter::getIndirect(nir_src *src, uint8_t idx, Value *)
+{
+   nir_const_value *offset = nir_src_as_const_value(*src);
+
+   if (offset) {
+  indirect = NULL;
+  return offset->u32[0];
+   }
+
+   indirect = getSrc(src, idx, true);
+   return 0;
+}
+
+uint32_t
+Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value 
*)
+{
+   int32_t idx = nir_intrinsic_base(insn) + getIndirect(>src[s], c, 
indirect);
+   if (indirect)
+  indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, 
loadImm(NULL, 4));
+   return idx;
+}
+
 bool
 Converter::run()
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 16/33] nvir/nir: implement nir_alu_instr handling

2018-06-29 Thread Karol Herbst
v2: user bitfield_insert instead of bfi
rework switch helper macros
remove some lowering code (LoweringHelper is now used for this)
v3: add pack_half_2x16_split
add unpack_half_2x16_split_x/y
v5: replace first argument with nullptr in loadImm calls
prefer getSSA over getScratch
v8: fix setting precise modifier for first instruction inside a block
add guard in case no instruction gets inserted into an empty block
don't require C++11 features

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 492 +-
 1 file changed, 491 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index a990acfc354..0ae7363b161 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -34,6 +34,31 @@
 #include 
 #include 
 
+#define CASE_OPFI(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni
+#define CASE_OPFIU(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni
+#define CASE_OPIU(ni) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni
+
+#define CASE_OPFI_RET(ni, val) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+  return val
+#define CASE_OPFIU_RET(ni, val) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return val
+#define CASE_OPIU_RET(ni, val) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return val
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -95,9 +120,17 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   operation getOperation(nir_op);
+   operation preOperationNeeded(nir_op);
+
+   int getSubOp(nir_op);
+
+   CondCode getCondCode(nir_op);
+
bool assignSlots();
bool parseNIR();
 
+   bool visit(nir_alu_instr *);
bool visit(nir_block *);
bool visit(nir_cf_node *);
bool visit(nir_function *);
@@ -116,6 +149,7 @@ private:
unsigned int curLoopDepth;
 
BasicBlock *exit;
+   Value *zero;
 
union {
   struct {
@@ -127,7 +161,10 @@ private:
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir),
- curLoopDepth(0) {}
+ curLoopDepth(0)
+{
+   zero = mkImm((uint32_t)0);
+}
 
 BasicBlock *
 Converter::convert(nir_block *block)
@@ -244,6 +281,137 @@ Converter::getSType(nir_src , bool isFloat, bool 
isSigned)
return typeOfSize(bitSize / 8, isFloat, isSigned);
 }
 
+operation
+Converter::getOperation(nir_op op)
+{
+   switch (op) {
+   // basic ops with float and int variants
+   CASE_OPFI_RET(abs, OP_ABS);
+   CASE_OPFI_RET(add, OP_ADD);
+   CASE_OPFI_RET(and, OP_AND);
+   CASE_OPFIU_RET(div, OP_DIV);
+   CASE_OPIU_RET(find_msb, OP_BFIND);
+   CASE_OPFIU_RET(max, OP_MAX);
+   CASE_OPFIU_RET(min, OP_MIN);
+   CASE_OPFIU_RET(mod, OP_MOD);
+   CASE_OPFI_RET(rem, OP_MOD);
+   CASE_OPFI_RET(mul, OP_MUL);
+   CASE_OPIU_RET(mul_high, OP_MUL);
+   CASE_OPFI_RET(neg, OP_NEG);
+   CASE_OPFI_RET(not, OP_NOT);
+   CASE_OPFI_RET(or, OP_OR);
+   CASE_OPFI_RET(eq, OP_SET);
+   CASE_OPFIU_RET(ge, OP_SET);
+   CASE_OPFIU_RET(lt, OP_SET);
+   CASE_OPFI_RET(ne, OP_SET);
+   CASE_OPIU_RET(shr, OP_SHR);
+   CASE_OPFI_RET(sub, OP_SUB);
+   CASE_OPFI_RET(xor, OP_XOR);
+   case nir_op_fceil:
+  return OP_CEIL;
+   case nir_op_fcos:
+  return OP_COS;
+   case nir_op_f2f32:
+   case nir_op_f2f64:
+   case nir_op_f2i32:
+   case nir_op_f2i64:
+   case nir_op_f2u32:
+   case nir_op_f2u64:
+   case nir_op_i2f32:
+   case nir_op_i2f64:
+   case nir_op_i2i32:
+   case nir_op_i2i64:
+   case nir_op_u2f32:
+   case nir_op_u2f64:
+   case nir_op_u2u32:
+   case nir_op_u2u64:
+  return OP_CVT;
+   case nir_op_fddx:
+   case nir_op_fddx_coarse:
+   case nir_op_fddx_fine:
+  return OP_DFDX;
+   case nir_op_fddy:
+   case nir_op_fddy_coarse:
+   case nir_op_fddy_fine:
+  return OP_DFDY;
+   case nir_op_fexp2:
+  return OP_EX2;
+   case nir_op_ffloor:
+  return OP_FLOOR;
+   case nir_op_ffma:
+  return OP_FMA;
+   case nir_op_flog2:
+  return OP_LG2;
+   case nir_op_pack_64_2x32_split:
+  return OP_MERGE;
+   case nir_op_frcp:
+  return OP_RCP;
+   case nir_op_frsq:
+  return OP_RSQ;
+   case nir_op_fsat:
+  return OP_SAT;
+   case nir_op_ishl:
+  return OP_SHL;
+   case nir_op_fsin:
+  return OP_SIN;
+   case nir_op_fsqrt:
+  return OP_SQRT;
+   case nir_op_ftrunc:
+  return OP_TRUNC;
+   default:
+  ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
+operation
+Converter::preOperationNeeded(nir_op op)
+{
+   switch (op) {
+   case nir_op_fcos:
+   case nir_op_fsin:
+  return OP_PRESIN;
+   default:
+  return OP_NOP;
+   }
+}
+
+int

[Mesa-dev] [PATCH v8 15/33] nvir/nir: add skeleton for nir_intrinsic_instr

2018-06-29 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp| 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 3084f32abb4..a990acfc354 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -103,6 +103,7 @@ private:
bool visit(nir_function *);
bool visit(nir_if *);
bool visit(nir_instr *);
+   bool visit(nir_intrinsic_instr *);
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
@@ -1270,6 +1271,8 @@ bool
 Converter::visit(nir_instr *insn)
 {
switch (insn->type) {
+   case nir_instr_type_intrinsic:
+  return visit(nir_instr_as_intrinsic(insn));
case nir_instr_type_jump:
   return visit(nir_instr_as_jump(insn));
case nir_instr_type_load_const:
@@ -1281,6 +1284,20 @@ Converter::visit(nir_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_intrinsic_instr *insn)
+{
+   nir_intrinsic_op op = insn->intrinsic;
+
+   switch (op) {
+   default:
+  ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
+  return false;
+   }
+
+   return true;
+}
+
 bool
 Converter::visit(nir_jump_instr *insn)
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 14/33] nvir/nir: implement nir_load_const_instr

2018-06-29 Thread Karol Herbst
v8: fix loading 8/16 bit constants

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 28 +++
 1 file changed, 28 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 9ac98e04562..3084f32abb4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -104,6 +104,7 @@ private:
bool visit(nir_if *);
bool visit(nir_instr *);
bool visit(nir_jump_instr *);
+   bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
 
nir_shader *nir;
@@ -1271,6 +1272,8 @@ Converter::visit(nir_instr *insn)
switch (insn->type) {
case nir_instr_type_jump:
   return visit(nir_instr_as_jump(insn));
+   case nir_instr_type_load_const:
+  return visit(nir_instr_as_load_const(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -1305,6 +1308,31 @@ Converter::visit(nir_jump_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_load_const_instr *insn)
+{
+   assert(insn->def.bit_size <= 64);
+
+   LValues  = convert(>def);
+   for (int i = 0; i < insn->def.num_components; i++) {
+  switch (insn->def.bit_size) {
+  case 64:
+ loadImm(newDefs[i], insn->value.u64[i]);
+ break;
+  case 32:
+ loadImm(newDefs[i], insn->value.u32[i]);
+ break;
+  case 16:
+ loadImm(newDefs[i], insn->value.u16[i]);
+ break;
+  case 8:
+ loadImm(newDefs[i], insn->value.u8[i]);
+ break;
+  }
+   }
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 02/33] nvc0: print the shader type when dumping headers

2018-06-29 Thread Karol Herbst
this makes debugging the shader header a little easier

Acked-by: Pierre Moreau 
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 57d98753f45..d87adc998a2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -554,6 +554,7 @@ nvc0_program_dump(struct nvc0_program *prog)
unsigned pos;
 
if (prog->type != PIPE_SHADER_COMPUTE) {
+  debug_printf("dumping HDR for type %i\n", prog->type);
   for (pos = 0; pos < ARRAY_SIZE(prog->hdr); ++pos)
  debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n",
   pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 07/33] nouveau: fix nir and TGSI shader cache collision

2018-06-29 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nouveau_screen.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index 03c1c74160f..7acf2050f10 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -156,9 +156,13 @@ nouveau_disk_cache_create(struct nouveau_screen *screen)
  _timestamp)) {
   res = asprintf(_str, "%u", mesa_timestamp);
   if (res != -1) {
+ uint64_t shader_debug_flags = 0;
+ if (screen->prefer_nir)
+shader_debug_flags |= 1 << 0;
+
  screen->disk_shader_cache =
 disk_cache_create(nouveau_screen_get_name(>base),
-  timestamp_str, 0);
+  timestamp_str, shader_debug_flags);
  free(timestamp_str);
   }
}
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 00/33] Nouveau NIR support

2018-06-29 Thread Karol Herbst
I think this is finally getting complete. There are a handful of piglit and
CTS regressions, but those are caused by issues in common nir and gallium code.

Some features remain disabled like bindless textures, because there is more
ground work needed to be able to properly do it with nir.

Changes since v7:
* port images to derefs
* remove use of C++11 features
* add more passes the code implicitly depends on
* add changes for reading out sample locations
* fix loading 8 and 16 bit constants
* minor cleanups and bugfixes

Connor Abbott (1):
  nv50/ir/ra: Fix copying compound for moves

Karol Herbst (32):
  nvc0: print the shader type when dumping headers
  nvir: move common converter code in base class
  nvir: add lowering helper
  nouveau: add support for nir
  nouveau: add env var to make nir default
  nouveau: fix nir and TGSI shader cache collision
  nvir/nir: run some passes to make the conversion easier
  nvir/nir: track defs and provide easy access functions
  nvir/nir: add nir type helper functions
  nvir/nir: run assignSlots
  nvir/nir: add loadFrom and storeTo helpler
  nvir/nir: parse NIR shader info
  nvir/nir: implement nir_load_const_instr
  nvir/nir: add skeleton for nir_intrinsic_instr
  nvir/nir: implement nir_alu_instr handling
  nvir/nir: implement nir_intrinsic_load_uniform
  nvir/nir: implement nir_intrinsic_store_(per_vertex_)output
  nvir/nir: implement load_(interpolated_)input/output
  nvir/nir: implement intrinsic_discard(_if)
  nvir/nir: implement loading system values
  nvir/nir: implement nir_ssa_undef_instr
  nvir/nir: implement nir_instr_type_tex
  nvir/nir: add getOperation for intrinsics
  nvir/nir: implement vote and ballot
  nvir/nir: implement variable indexing
  nvir/nir: implement geometry shader nir_intrinsics
  nvir/nir: implement nir_intrinsic_load_ubo
  nvir/nir: implement ssbo intrinsics
  nvir/nir: implement images
  nvir/nir: add memory barriers
  nvir/nir: implement load_per_vertex_output
  nvir/nir: implement intrinsic shader_clock

 src/gallium/drivers/nouveau/Automake.inc  |3 +
 src/gallium/drivers/nouveau/Makefile.am   |5 +
 src/gallium/drivers/nouveau/Makefile.sources  |5 +
 .../drivers/nouveau/codegen/nv50_ir.cpp   |3 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.h |1 +
 .../nouveau/codegen/nv50_ir_from_common.cpp   |  107 +
 .../nouveau/codegen/nv50_ir_from_common.h |   58 +
 .../nouveau/codegen/nv50_ir_from_nir.cpp  | 3144 +
 .../nouveau/codegen/nv50_ir_from_tgsi.cpp |  106 +-
 .../codegen/nv50_ir_lowering_helper.cpp   |  275 ++
 .../nouveau/codegen/nv50_ir_lowering_helper.h |   53 +
 .../drivers/nouveau/codegen/nv50_ir_ra.cpp|   60 +-
 src/gallium/drivers/nouveau/meson.build   |   13 +-
 src/gallium/drivers/nouveau/nouveau_screen.c  |   11 +-
 src/gallium/drivers/nouveau/nouveau_screen.h  |2 +
 .../drivers/nouveau/nv50/nv50_program.c   |   19 +-
 .../drivers/nouveau/nv50/nv50_screen.c|   44 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c |   35 +-
 .../drivers/nouveau/nvc0/nvc0_program.c   |   19 +-
 .../drivers/nouveau/nvc0/nvc0_screen.c|   61 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c |   31 +-
 21 files changed, 3907 insertions(+), 148 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h

-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v8 03/33] nvir: move common converter code in base class

2018-06-29 Thread Karol Herbst
v2: remove TGSI related bits

Signed-off-by: Karol Herbst 
Reviewed-by: Pierre Moreau 
---
 src/gallium/drivers/nouveau/Makefile.sources  |   2 +
 .../nouveau/codegen/nv50_ir_from_common.cpp   | 107 ++
 .../nouveau/codegen/nv50_ir_from_common.h |  58 ++
 .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 106 +
 src/gallium/drivers/nouveau/meson.build   |   2 +
 5 files changed, 172 insertions(+), 103 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index 65f08c7d8d8..fee5e59522e 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_build_util.h \
codegen/nv50_ir_driver.h \
codegen/nv50_ir_emit_nv50.cpp \
+   codegen/nv50_ir_from_common.cpp \
+   codegen/nv50_ir_from_common.h \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
codegen/nv50_ir_graph.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
new file mode 100644
index 000..0ad6087e588
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "codegen/nv50_ir_from_common.h"
+
+namespace nv50_ir {
+
+ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info)
+   :  BuildUtil(prog),
+  info(info) {}
+
+ConverterCommon::Subroutine *
+ConverterCommon::getSubroutine(unsigned ip)
+{
+   std::map::iterator it = sub.map.find(ip);
+
+   if (it == sub.map.end())
+  it = sub.map.insert(std::make_pair(
+  ip, Subroutine(new Function(prog, "SUB", ip.first;
+
+   return >second;
+}
+
+ConverterCommon::Subroutine *
+ConverterCommon::getSubroutine(Function *f)
+{
+   unsigned ip = f->getLabel();
+   std::map::iterator it = sub.map.find(ip);
+
+   if (it == sub.map.end())
+  it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
+
+   return >second;
+}
+
+uint8_t
+ConverterCommon::translateInterpMode(const struct nv50_ir_varying *var, 
operation& op)
+{
+   uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
+
+   if (var->flat)
+  mode = NV50_IR_INTERP_FLAT;
+   else
+   if (var->linear)
+  mode = NV50_IR_INTERP_LINEAR;
+   else
+   if (var->sc)
+  mode = NV50_IR_INTERP_SC;
+
+   op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
+  ? OP_PINTERP : OP_LINTERP;
+
+   if (var->centroid)
+  mode |= NV50_IR_INTERP_CENTROID;
+
+   return mode;
+}
+
+void
+ConverterCommon::handleUserClipPlanes()
+{
+   Value *res[8];
+   int n, i, c;
+
+   for (c = 0; c < 4; ++c) {
+  for (i = 0; i < info->io.genUserClip; ++i) {
+ Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
+TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
+ Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
+ if (c == 0)
+res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
+ else
+mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
+  }
+   }
+
+   const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
+
+   for (i = 0; i < info->io.genUserClip; ++i) {
+  n = i / 4 + first;
+  c = i % 4;
+  Symbol *sym =
+ mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
+  mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
+   }
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h 

[Mesa-dev] [PATCH v8 04/33] nvir: add lowering helper

2018-06-29 Thread Karol Herbst
this is mostly usefull for lazy IR converters not wanting to deal with 64 bit
lowering and other illegal stuff

v5: also handle SAT
v6: rename type variables
fixed lowering of NEG
add lowering of NOT
v8: don't require C++11 features

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/Makefile.sources  |   2 +
 .../codegen/nv50_ir_lowering_helper.cpp   | 275 ++
 .../nouveau/codegen/nv50_ir_lowering_helper.h |  53 
 src/gallium/drivers/nouveau/meson.build   |   2 +
 4 files changed, 332 insertions(+)
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
 create mode 100644 
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index fee5e59522e..ec344c63169 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -122,6 +122,8 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_graph.h \
codegen/nv50_ir.h \
codegen/nv50_ir_inlines.h \
+   codegen/nv50_ir_lowering_helper.cpp \
+   codegen/nv50_ir_lowering_helper.h \
codegen/nv50_ir_lowering_nv50.cpp \
codegen/nv50_ir_peephole.cpp \
codegen/nv50_ir_print.cpp \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
new file mode 100644
index 000..02380f12b9f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_helper.cpp
@@ -0,0 +1,275 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst 
+ */
+
+#include "codegen/nv50_ir_lowering_helper.h"
+
+namespace nv50_ir {
+
+bool
+LoweringHelper::visit(Instruction *insn)
+{
+   switch (insn->op) {
+   case OP_ABS:
+  return handleABS(insn);
+   case OP_CVT:
+  return handleCVT(insn);
+   case OP_MAX:
+   case OP_MIN:
+  return handleMAXMIN(insn);
+   case OP_MOV:
+  return handleMOV(insn);
+   case OP_NEG:
+  return handleNEG(insn);
+   case OP_SAT:
+  return handleSAT(insn);
+   case OP_SLCT:
+  return handleSLCT(insn->asCmp());
+   case OP_AND:
+   case OP_NOT:
+   case OP_OR:
+   case OP_XOR:
+  return handleLogOp(insn);
+   default:
+  return true;
+   }
+}
+
+bool
+LoweringHelper::handleABS(Instruction *insn)
+{
+   DataType dTy = insn->dType;
+   if (!(dTy == TYPE_U64 || dTy == TYPE_S64))
+  return true;
+
+   bld.setPosition(insn, false);
+
+   Value *neg = bld.getSSA(8);
+   Value *negComp[2], *srcComp[2];
+   Value *lo = bld.getSSA(), *hi = bld.getSSA();
+   bld.mkOp2(OP_SUB, dTy, neg, bld.mkImm((uint64_t)0), insn->getSrc(0));
+   bld.mkSplit(negComp, 4, neg);
+   bld.mkSplit(srcComp, 4, insn->getSrc(0));
+   bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, lo, TYPE_S32, negComp[0], srcComp[0], 
srcComp[1]);
+   bld.mkCmp(OP_SLCT, CC_LT, TYPE_S32, hi, TYPE_S32, negComp[1], srcComp[1], 
srcComp[1]);
+   insn->op = OP_MERGE;
+   insn->setSrc(0, lo);
+   insn->setSrc(1, hi);
+
+   return true;
+}
+
+bool
+LoweringHelper::handleCVT(Instruction *insn)
+{
+   DataType dTy = insn->dType;
+   DataType sTy = insn->sType;
+
+   if (typeSizeof(dTy) <= 4 && typeSizeof(sTy) <= 4)
+  return true;
+
+   bld.setPosition(insn, false);
+
+   if ((dTy == TYPE_S32 && sTy == TYPE_S64) ||
+   (dTy == TYPE_U32 && sTy == TYPE_U64)) {
+  Value *src[2];
+  bld.mkSplit(src, 4, insn->getSrc(0));
+  insn->op = OP_MOV;
+  insn->setSrc(0, src[0]);
+   } else if (dTy == TYPE_S64 && sTy == TYPE_S32) {
+  Value *tmp = bld.getSSA();
+  bld.mkOp2(OP_SHR, TYPE_S32, tmp, insn->getSrc(0), 
bld.loadImm(bld.getSSA(), 31));
+  insn->op = OP_MERGE;
+  insn->setSrc(1, tmp);
+   } else if (dTy == TYPE_U64 && sTy == TYPE_U32) {
+  insn->op = OP_MERGE;
+  insn->setSrc(1, 

[Mesa-dev] [PATCH v8 06/33] nouveau: add env var to make nir default

2018-06-29 Thread Karol Herbst
v2: allow for non debug builds as well
v3: move reading out env var more global
disable tg4 with multiple offsets with nir
disable caps for 64 bit types
v6: nv50 support
disable MS images
disable bindless textures

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nouveau_screen.c  |  5 +
 src/gallium/drivers/nouveau/nouveau_screen.h  |  2 ++
 .../drivers/nouveau/nv50/nv50_screen.c|  4 +++-
 .../drivers/nouveau/nvc0/nvc0_screen.c| 19 +--
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index c97b707955c..03c1c74160f 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -176,9 +176,14 @@ nouveau_screen_init(struct nouveau_screen *screen, struct 
nouveau_device *dev)
union nouveau_bo_config mm_config;
 
char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
+   char *use_nir = getenv("NV50_PROG_USE_NIR");
+
if (nv_dbg)
   nouveau_mesa_debug = atoi(nv_dbg);
 
+   if (use_nir)
+  screen->prefer_nir = strtol(use_nir, NULL, 0) == 1;
+
/* These must be set before any failure is possible, as the cleanup
 * paths assume they're responsible for deleting them.
 */
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h 
b/src/gallium/drivers/nouveau/nouveau_screen.h
index e4fbae99ca4..1229b66b26f 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -62,6 +62,8 @@ struct nouveau_screen {
 
struct disk_cache *disk_shader_cache;
 
+   bool prefer_nir;
+
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
union {
   uint64_t v[29];
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 24324d7a11e..826c01c238e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -317,6 +317,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen,
  enum pipe_shader_type shader,
  enum pipe_shader_cap param)
 {
+   const struct nouveau_screen *screen = nouveau_screen(pscreen);
+
switch (shader) {
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
@@ -370,7 +372,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
   return MIN2(16, PIPE_MAX_SAMPLERS);
case PIPE_SHADER_CAP_PREFERRED_IR:
-  return PIPE_SHADER_IR_TGSI;
+  return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
   return 32;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index b32a129c7e2..70c8a6ffbf4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -87,9 +87,11 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
 
if (bindings & PIPE_BIND_SHADER_IMAGE) {
   if (sample_count > 0 &&
-  nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) {
+  (nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS ||
+   nouveau_screen(pscreen)->prefer_nir)) {
  /* MS images are currently unsupported on Maxwell because they have to
   * be handled explicitly. */
+ /* MS images are currently unsupported with NIR */
  return false;
   }
 
@@ -109,7 +111,8 @@ static int
 nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 {
const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
-   struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+   const struct nouveau_screen *screen = nouveau_screen(pscreen);
+   struct nouveau_device *dev = screen->device;
 
switch (param) {
/* non-boolean caps */
@@ -217,7 +220,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
-   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
@@ -258,6 +260,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_QUERY_SO_OVERFLOW:
   return 1;
+   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+  /* TODO: nir doesn't support tg4 with multiple offsets */
+  return screen->prefer_nir ? 0 : 1;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
   return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
case PIPE_CAP_TGSI_FS_FBFETCH:
@@ -275,8 +280,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 

[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles

2018-06-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=77449
Bug 77449 depends on bug 99813, which changed state.

Bug 99813 Summary: F1 2015 rendering broken at medium quality
https://bugs.freedesktop.org/show_bug.cgi?id=99813

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/2] r600: correct texture offset for array index lookup

2018-06-29 Thread Gert Wollny
Am Freitag, den 29.06.2018, 17:42 +0200 schrieb Roland Scheidegger:
[...]
Thanks for the review 

> 
> So for gather4_O (where you'd need to do it differently) it must not
> be
> done, but for gather4 it has to be done?
> Still doesn't make all that much sense to me. The hw may be weird but
> I don't think it would be that weird?
Yes, this made me thinking too. First I was wondering whether all
gather4 operations ignore the offset (as in hardware bug), but no, this
is not the case. Then I looked at the created shaders. The first thing
I don't really understand is why the TGSI has

   TG4 TEMP[1], TEMP[1], IMM[1]., SAMP[0], 2D_ARRAY, TEMP[2].xyy

i.e. the offset Y coordinate is also passed into Z, but somehow this
seems to be irrelevant - I wonder whether this might pose a problem
somewhere else but here the piglit still passes.

However, the real fun comes from sb: The code created without adding
the offset to the texture coordinate: 

  0032  SET_TEXTURE_OFFSETS R0., R0.zww0,   RID:0, SID:0 CT:
  0036  GATHER4_O   R0.yzxw, R0.xy1_,   RID:18, SID:0 CT:NNUN

and this is what sb does when I add the 0.5 offset the the z
coordinate: 

  0032  SET_TEXTURE_OFFSETS R0., R0.,   RID:0, SID:0 CT:
  0036  GATHER4_O   R0.yzxw, R0.xyz_,   RID:18, SID:0 CT:NNUN

Consequently, when disabling sb, the piglit also passes when I add the
0.5 offset, I've now also tested what happens when I set this offset
0.5 via SET_TEXTURE_OFFSETS, and it seems to work and sb doesn't do
funny things. 

Best, 
Gert 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [ANNOUNCE] mesa 18.1.3

2018-06-29 Thread Ilia Mirkin
On Fri, Jun 29, 2018 at 2:00 PM, Dylan Baker  wrote:
> Hi List,
>
> Mesa 18.1.3 is now available. This is a bug fix point release in the 18.1
> series.
>
> In this release we have:
>  - numerous fixes for radv
>  - some fixes for common radeon code
>  - several meson fixes
>  - fixes for autotools with non-gnu grep
>  - fixes for both the nir and glsl compilers
>  - An i965 fix for some gpu hangs on SNB

A git shortlog has traditionally been included in such announcements
(as well as the pre-announcements). I'd strongly recommend continuing
this practice.

Cheers,

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106842] Error de prueba

2018-06-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106842

Andre Klapper  changed:

   What|Removed |Added

  Component|Demos   |Two
 QA Contact|mesa-dev@lists.freedesktop. |
   |org |
Product|Mesa|Spam
   Assignee|mesa-dev@lists.freedesktop. |dan...@fooishbar.org
   |org |

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 4/4] gallium/llvmpipe: Enable support bptc format.

2018-06-29 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Tue, Jun 26, 2018 at 4:30 PM, Denis Pauk  wrote:
> v2: none
> v3: none
>
> Signed-off-by: Denis Pauk 
> CC: Marek Olšák 
> CC: Rhys Perry 
> CC: Matt Turner 
> ---
>  src/gallium/drivers/llvmpipe/lp_screen.c  | 3 +--
>  src/gallium/drivers/llvmpipe/lp_test_format.c | 3 +--
>  2 files changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
> b/src/gallium/drivers/llvmpipe/lp_screen.c
> index 28dbd9908f..9921a941d5 100644
> --- a/src/gallium/drivers/llvmpipe/lp_screen.c
> +++ b/src/gallium/drivers/llvmpipe/lp_screen.c
> @@ -534,8 +534,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
>}
> }
>
> -   if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC ||
> -   format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
> +   if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
>/* Software decoding is not hooked up. */
>return FALSE;
> }
> diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c 
> b/src/gallium/drivers/llvmpipe/lp_test_format.c
> index e9a6e01fdc..a8aa33d8ae 100644
> --- a/src/gallium/drivers/llvmpipe/lp_test_format.c
> +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
> @@ -388,8 +388,7 @@ test_all(unsigned verbose, FILE *fp)
>}
>
>/* missing fetch funcs */
> -  if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC ||
> -  format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
> +  if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
>   continue;
>}
>
> --
> 2.18.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 3/3] st/mesa: Also check for PIPE_FORMAT_A8R8G8B8_SRGB for texture_sRGB

2018-06-29 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Fri, Jun 22, 2018 at 9:59 AM, Tomeu Vizoso
 wrote:
> and PIPE_FORMAT_R8G8B8A8_SRGB, as well.
>
> The reason for this is that when Virgl runs with GLES on the host, it
> cannot directly upload textures in BGRA.
>
> So to avoid a conversion step, consider the RGB sRGB formats as well for
> this extension.
>
> Signed-off-by: Tomeu Vizoso 
> ---
>  src/mesa/state_tracker/st_extensions.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/state_tracker/st_extensions.c 
> b/src/mesa/state_tracker/st_extensions.c
> index 274df7bd6a6a..ba0f671c363d 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -811,7 +811,9 @@ void st_init_extensions(struct pipe_screen *screen,
>{ { o(EXT_texture_sRGB),
>o(EXT_texture_sRGB_decode) },
>  { PIPE_FORMAT_A8B8G8R8_SRGB,
> -  PIPE_FORMAT_B8G8R8A8_SRGB },
> +  PIPE_FORMAT_B8G8R8A8_SRGB,
> +  PIPE_FORMAT_A8R8G8B8_SRGB,
> +  PIPE_FORMAT_R8G8B8A8_SRGB},
>  GL_TRUE }, /* at least one format must be supported */
>
>{ { o(EXT_texture_type_2_10_10_10_REV) },
> --
> 2.17.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] st/mesa: Fall back to R8G8B8A8_SRGB for ETC2

2018-06-29 Thread Marek Olšák
On Fri, Jun 22, 2018 at 9:59 AM, Tomeu Vizoso
 wrote:
> If the driver doesn't support PIPE_FORMAT_B8G8R8A8_SRGB, fall back to
> PIPE_FORMAT_R8G8B8A8_SRGB.
>
> Drivers such as Virgl will have a hard time supporting
> PIPE_FORMAT_B8G8R8A8_SRGB when the host runs GLES, as GL_ABGR isn't as
> well suported there.

B8G8R8A8 is GL_BGRA.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: fix typo (wrong gen number) in comment

2018-06-29 Thread Kenneth Graunke
On Friday, June 29, 2018 11:39:34 AM PDT Caio Marcelo de Oliveira Filho wrote:
> ---
>  src/mesa/drivers/dri/i965/genX_state_upload.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
> b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index b279f01e1a1..921b420ab81 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -1935,7 +1935,7 @@ genX(upload_wm)(struct brw_context *brw)
>  brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 
> 2));
>}
>  #elif GEN_GEN == 6
> -  /* On gen5, we have multiple shader kernels and we no longer specify a
> +  /* On gen6, we have multiple shader kernels and we no longer specify a
> * register count for each one.
> */
>wm.KernelStartPointer0 = stage_state->prog_offset +
> 

Reviewed-by: Kenneth Graunke 




signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] anv, intel: Enable nir_opt_large_constants for Vulkan

2018-06-29 Thread Jason Ekstrand
According to RenderDoc, this shaves 99.6% of the run time off of the
ambient occlusion pass in Skyrim Special Edition when running under DXVK
and shaves 92% off the runtime for a reasonably representative frame.
When running the actual game, Skyrim goes from being a slide-show to a
very stable and playable framerate on my SKL GT4e machine.
---
 src/intel/compiler/brw_compiler.h | 6 ++
 src/intel/compiler/brw_nir.c  | 7 +++
 src/intel/vulkan/anv_device.c | 1 +
 3 files changed, 14 insertions(+)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index 2f745d92745..9dfcfcc0115 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -112,6 +112,12 @@ struct brw_compiler {
 * will attempt to push everything.
 */
bool supports_pull_constants;
+
+   /**
+* Whether or not the driver supports NIR shader constants.  This controls
+* whether nir_opt_large_constants will be run.
+*/
+   bool supports_shader_constants;
 };
 
 /**
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index dfeea73b06a..2d04133fbae 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -664,6 +664,13 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
 
nir = brw_nir_optimize(nir, compiler, is_scalar);
 
+   /* This needs to be run after the first optimization pass but before we
+* lower indirect derefs away
+*/
+   if (compiler->supports_shader_constants) {
+  OPT(nir_opt_large_constants, type_size_scalar_bytes, 32);
+   }
+
nir_lower_bit_size(nir, lower_bit_size_callback, NULL);
 
if (is_scalar) {
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index b3d30675b1e..077f5c16e46 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -434,6 +434,7 @@ anv_physical_device_init(struct anv_physical_device *device,
device->compiler->supports_pull_constants = false;
device->compiler->constant_buffer_0_is_relative =
   device->info.gen < 8 || !device->has_context_isolation;
+   device->compiler->supports_shader_constants = true;
 
isl_device_init(>isl_dev, >info, swizzled);
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] anv: Add state setup support for shader constants

2018-06-29 Thread Jason Ekstrand
---
 .../vulkan/anv_nir_apply_pipeline_layout.c| 47 
 src/intel/vulkan/anv_private.h|  1 +
 src/intel/vulkan/genX_cmd_buffer.c| 72 ++-
 3 files changed, 101 insertions(+), 19 deletions(-)

diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 
b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 37a54b2efff..c287a005bd6 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -32,6 +32,8 @@ struct apply_pipeline_layout_state {
struct anv_pipeline_layout *layout;
bool add_bounds_checks;
 
+   bool uses_constants;
+   uint8_t constants_offset;
struct {
   BITSET_WORD *used;
   uint8_t *surface_offsets;
@@ -100,6 +102,10 @@ get_used_bindings_block(nir_block *block,
 add_deref_src_binding(state, intrin->src[0]);
 break;
 
+ case nir_intrinsic_load_constant:
+state->uses_constants = true;
+break;
+
  default:
 break;
  }
@@ -172,6 +178,33 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
nir_instr_remove(>instr);
 }
 
+static void
+lower_load_constant(nir_intrinsic_instr *intrin,
+struct apply_pipeline_layout_state *state)
+{
+   nir_builder *b = >builder;
+
+   b->cursor = nir_before_instr(>instr);
+
+   nir_ssa_def *index = nir_imm_int(b, state->constants_offset);
+   nir_ssa_def *offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[0], 1),
+  nir_imm_int(b, nir_intrinsic_base(intrin)));
+
+   nir_intrinsic_instr *load_ubo =
+  nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
+   load_ubo->num_components = intrin->num_components;
+   load_ubo->src[0] = nir_src_for_ssa(index);
+   load_ubo->src[1] = nir_src_for_ssa(offset);
+   nir_ssa_dest_init(_ubo->instr, _ubo->dest,
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size, NULL);
+   nir_builder_instr_insert(b, _ubo->instr);
+
+   nir_ssa_def_rewrite_uses(>dest.ssa,
+nir_src_for_ssa(_ubo->dest.ssa));
+   nir_instr_remove(>instr);
+}
+
 static void
 lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
 unsigned *base_index,
@@ -285,6 +318,9 @@ apply_pipeline_layout_block(nir_block *block,
  case nir_intrinsic_vulkan_resource_reindex:
 lower_res_reindex_intrinsic(intrin, state);
 break;
+ case nir_intrinsic_load_constant:
+lower_load_constant(intrin, state);
+break;
  default:
 break;
  }
@@ -343,6 +379,9 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
  get_used_bindings_block(block, );
}
 
+   if (state.uses_constants)
+  map->surface_count++;
+
for (uint32_t set = 0; set < layout->num_sets; set++) {
   struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
 
@@ -365,6 +404,14 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline 
*pipeline,
unsigned surface = 0;
unsigned sampler = 0;
unsigned image = 0;
+
+   if (state.uses_constants) {
+  state.constants_offset = surface;
+  map->surface_to_descriptor[surface].set =
+ ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;
+  surface++;
+   }
+
for (uint32_t set = 0; set < layout->num_sets; set++) {
   struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index f82b88df7a0..139c48b7e46 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1570,6 +1570,7 @@ anv_descriptor_set_destroy(struct anv_device *device,
struct anv_descriptor_pool *pool,
struct anv_descriptor_set *set);
 
+#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
 
 struct anv_pipeline_binding {
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 97b321ccaeb..34fbd83d148 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2026,6 +2026,26 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
  bt_map[bias + s] = surface_state.offset + state_offset;
  continue;
+  } else if (binding->set == ANV_DESCRIPTOR_SET_SHADER_CONSTANTS) {
+ struct anv_state surface_state =
+anv_cmd_buffer_alloc_surface_state(cmd_buffer);
+
+ struct anv_address constant_data = {
+.bo = >device->dynamic_state_pool.block_pool.bo,
+.offset = pipeline->shaders[stage]->constant_data.offset,
+ };
+ unsigned constant_data_size =
+pipeline->shaders[stage]->constant_data_size;
+
+ const enum isl_format format =
+

[Mesa-dev] [PATCH 3/6] nir: Add a large constants optimization pass

2018-06-29 Thread Jason Ekstrand
This pass searches for reasonably large local variables which can be
statically proven to be constant and moves them into shader constant
data.  This is especially useful when large tables are baked into the
shader source code because they can be moved into a UBO by the driver to
reduce register pressure and make indirect access cheaper.
---
 src/compiler/Makefile.sources  |   1 +
 src/compiler/nir/meson.build   |   1 +
 src/compiler/nir/nir.h |   4 +
 src/compiler/nir/nir_opt_large_constants.c | 338 +
 4 files changed, 344 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_large_constants.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 0fcbc5c5c5b..9e3fbdc2612 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -276,6 +276,7 @@ NIR_FILES = \
nir/nir_opt_if.c \
nir/nir_opt_intrinsics.c \
nir/nir_opt_loop_unroll.c \
+   nir/nir_opt_large_constants.c \
nir/nir_opt_move_comparisons.c \
nir/nir_opt_move_load_ubo.c \
nir/nir_opt_peephole_select.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index eb7fb7b121e..28aa8de7014 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -160,6 +160,7 @@ files_libnir = files(
   'nir_opt_global_to_local.c',
   'nir_opt_if.c',
   'nir_opt_intrinsics.c',
+  'nir_opt_large_constants.c',
   'nir_opt_loop_unroll.c',
   'nir_opt_move_comparisons.c',
   'nir_opt_move_load_ubo.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 6c49bce9aaa..106bc538427 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2904,6 +2904,10 @@ bool nir_opt_if(nir_shader *shader);
 
 bool nir_opt_intrinsics(nir_shader *shader);
 
+bool nir_opt_large_constants(nir_shader *shader,
+ int (*type_size)(const struct glsl_type *),
+ unsigned threshold);
+
 bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
 
 bool nir_opt_move_comparisons(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_large_constants.c 
b/src/compiler/nir/nir_opt_large_constants.c
new file mode 100644
index 000..9dfbf9a83f8
--- /dev/null
+++ b/src/compiler/nir/nir_opt_large_constants.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+struct var_info {
+   bool is_constant;
+   bool found_read;
+};
+
+static unsigned
+struct_type_get_field_offset(const struct glsl_type *struct_type,
+ int (*type_size)(const struct glsl_type *),
+ unsigned field_idx)
+{
+   assert(glsl_type_is_struct(struct_type));
+   unsigned offset = 0;
+   for (unsigned i = 0; i < field_idx; i++)
+  offset += type_size(glsl_get_struct_field(struct_type, i));
+   return offset;
+}
+
+static nir_ssa_def *
+build_constant_load(nir_builder *b,
+int (*type_size)(const struct glsl_type *),
+nir_deref_instr *deref)
+{
+   const unsigned bit_size = glsl_get_bit_size(deref->type);
+   const unsigned num_components = glsl_get_vector_elements(deref->type);
+
+   nir_ssa_def *offset = nir_imm_int(b, 0);
+   while (deref->deref_type != nir_deref_type_var) {
+  nir_deref_instr *parent = nir_deref_instr_parent(deref);
+
+  if (deref->deref_type == nir_deref_type_array) {
+ nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
+ nir_ssa_def *stride = nir_imm_int(b, type_size(deref->type));
+ offset = nir_iadd(b, offset, nir_imul(b, index, stride));
+  } else {
+ assert(deref->deref_type == nir_deref_type_struct);
+ unsigned field_offset =
+

[Mesa-dev] [PATCH 4/6] anv: Add support for shader constant data to the pipeline cache

2018-06-29 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_blorp.c  |  1 +
 src/intel/vulkan/anv_pipeline.c   | 12 
 src/intel/vulkan/anv_pipeline_cache.c | 26 ++
 src/intel/vulkan/anv_private.h|  6 ++
 4 files changed, 45 insertions(+)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 5373faaa680..4dbfb7a83fd 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -70,6 +70,7 @@ upload_blorp_shader(struct blorp_context *blorp,
struct anv_shader_bin *bin =
   anv_pipeline_cache_upload_kernel(>blorp_shader_cache,
key, key_size, kernel, kernel_size,
+   NULL, 0,
prog_data, prog_data_size, _map);
 
if (!bin)
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 67ede46f2ae..8b630f7a85a 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -485,6 +485,8 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
struct anv_pipeline_cache *cache,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
+   const void *constant_data,
+   uint32_t constant_data_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
const struct anv_pipeline_bind_map *bind_map)
@@ -492,11 +494,13 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
if (cache) {
   return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
   kernel_data, kernel_size,
+  constant_data, 
constant_data_size,
   prog_data, prog_data_size,
   bind_map);
} else {
   return anv_shader_bin_create(pipeline->device, key_data, key_size,
kernel_data, kernel_size,
+   constant_data, constant_data_size,
prog_data, prog_data_size,
prog_data->param, bind_map);
}
@@ -575,6 +579,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
   unsigned code_size = prog_data.base.base.program_size;
   bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
shader_code, code_size,
+   nir->constant_data, nir->num_constants,
_data.base.base, sizeof(prog_data),
);
   if (!bin) {
@@ -742,6 +747,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
   tcs_bin = anv_pipeline_upload_kernel(pipeline, cache,
tcs_sha1, sizeof(tcs_sha1),
shader_code, code_size,
+   tcs_nir->constant_data,
+   tcs_nir->num_constants,
_prog_data.base.base,
sizeof(tcs_prog_data),
_map);
@@ -763,6 +770,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
   tes_bin = anv_pipeline_upload_kernel(pipeline, cache,
tes_sha1, sizeof(tes_sha1),
shader_code, code_size,
+   tes_nir->constant_data,
+   tes_nir->num_constants,
_prog_data.base.base,
sizeof(tes_prog_data),
_map);
@@ -845,6 +854,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
   const unsigned code_size = prog_data.base.base.program_size;
   bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
shader_code, code_size,
+   nir->constant_data, nir->num_constants,
_data.base.base, sizeof(prog_data),
);
   if (!bin) {
@@ -995,6 +1005,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
   unsigned code_size = prog_data.base.program_size;
   bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
shader_code, code_size,
+   nir->constant_data, nir->num_constants,

[Mesa-dev] [PATCH 1/6] nir: Add a deref_instr_has_indirect helper

2018-06-29 Thread Jason Ekstrand
---
 src/compiler/nir/nir.h   |  2 ++
 src/compiler/nir/nir_deref.c | 18 ++
 2 files changed, 20 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index c16ce547642..e35bef612df 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -987,6 +987,8 @@ nir_deref_instr_get_variable(const nir_deref_instr *instr)
return instr->var;
 }
 
+bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
+
 bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
 
 typedef struct {
diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
index 1a00157c2fc..22ecde4ecca 100644
--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -102,6 +102,24 @@ nir_deref_instr_remove_if_unused(nir_deref_instr *instr)
return progress;
 }
 
+bool
+nir_deref_instr_has_indirect(nir_deref_instr *instr)
+{
+   while (instr->deref_type != nir_deref_type_var) {
+  /* Consider casts to be indirects */
+  if (instr->deref_type == nir_deref_type_cast)
+ return true;
+
+  if (instr->deref_type == nir_deref_type_array &&
+  !nir_src_as_const_value(instr->arr.index))
+ return true;
+
+  instr = nir_deref_instr_parent(instr);
+   }
+
+   return false;
+}
+
 bool
 nir_remove_dead_derefs_impl(nir_function_impl *impl)
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] nir: Add a concept of constant data associated with a shader

2018-06-29 Thread Jason Ekstrand
This commit adds a concept to NIR of having a blob of constant data
associated with a shader.  Instead of being a UBO or uniform that can be
manipulated by the client, this constant data considered part of the
shader and remains constant across all invocations of the given shader
until the end of time.  To access this constant data from the shader, we
add a new load_constant intrinsic.  The intention is that drivers will
eventually lower load_constant intrinsics to load_ubo, load_uniform, or
something similar.  Constant data will be used by the optimization pass
in the next commit but this concept may also be useful for OpenCL.
---
 src/compiler/nir/nir.h |  9 -
 src/compiler/nir/nir_clone.c   |  6 ++
 src/compiler/nir/nir_intrinsics.py |  2 ++
 src/compiler/nir/nir_serialize.c   | 10 ++
 4 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index e35bef612df..6c49bce9aaa 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2038,6 +2038,13 @@ typedef struct nir_shader {
/** list of shared compute variables (nir_variable) */
struct exec_list shared;
 
+   /** Constant data associated with this shader.
+*
+* Constant data is loaded through load_constant intrinsics.  See also
+* nir_opt_large_constants.
+*/
+   void *constant_data;
+
/** Set of driver-specific options for the shader.
 *
 * The memory for the options is expected to be kept in a single static
@@ -2066,7 +2073,7 @@ typedef struct nir_shader {
 * the highest index a load_input_*, load_uniform_*, etc. intrinsic can
 * access plus one
 */
-   unsigned num_inputs, num_uniforms, num_outputs, num_shared;
+   unsigned num_inputs, num_uniforms, num_outputs, num_shared, num_constants;
 } nir_shader;
 
 static inline nir_function_impl *
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 23bb17eeba3..1adfaccc54d 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -733,6 +733,12 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s)
ns->num_uniforms = s->num_uniforms;
ns->num_outputs = s->num_outputs;
ns->num_shared = s->num_shared;
+   ns->num_constants = s->num_constants;
+
+   if (s->num_constants > 0) {
+  ns->constant_data = ralloc_size(ns, s->num_constants);
+  memcpy(ns->constant_data, s->constant_data, s->num_constants);
+   }
 
free_clone_state();
 
diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index d9d0bbdfccf..44a5b76beb6 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -532,6 +532,8 @@ load("per_vertex_output", 2, [BASE, COMPONENT], 
[CAN_ELIMINATE])
 load("shared", 1, [BASE], [CAN_ELIMINATE])
 # src[] = { offset }. const_index[] = { base, range }
 load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
+# src[] = { offset }. const_index[] = { base, range }
+load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
 
 # Stores work the same way as loads, except now the first source is the value
 # to store and the second (and possibly third) source specify where to store
diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c
index cc4bf23aa0f..2f2ecc9b270 100644
--- a/src/compiler/nir/nir_serialize.c
+++ b/src/compiler/nir/nir_serialize.c
@@ -1106,6 +1106,10 @@ nir_serialize(struct blob *blob, const nir_shader *nir)
blob_write_uint32(blob, nir->num_uniforms);
blob_write_uint32(blob, nir->num_outputs);
blob_write_uint32(blob, nir->num_shared);
+   blob_write_uint32(blob, nir->num_constants);
+
+   if (nir->num_constants > 0)
+  blob_write_bytes(blob, nir->constant_data, nir->num_constants);
 
blob_write_uint32(blob, exec_list_length(>functions));
nir_foreach_function(fxn, nir) {
@@ -1161,6 +1165,12 @@ nir_deserialize(void *mem_ctx,
ctx.nir->num_uniforms = blob_read_uint32(blob);
ctx.nir->num_outputs = blob_read_uint32(blob);
ctx.nir->num_shared = blob_read_uint32(blob);
+   ctx.nir->num_constants = blob_read_uint32(blob);
+
+   if (ctx.nir->num_constants > 0) {
+  ctx.nir->constant_data = ralloc_size(ctx.nir, ctx.nir->num_constants);
+  blob_copy_bytes(blob, ctx.nir->constant_data, ctx.nir->num_constants);
+   }
 
unsigned num_functions = blob_read_uint32(blob);
for (unsigned i = 0; i < num_functions; i++)
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] anv,nir: Move large constants to a UBO

2018-06-29 Thread Jason Ekstrand
This little series adds an optimization pass to NIR and wires up up in anv
that moves large constant variables to a UBO.  This fixes a farily common
case in some filter or ambient occlusion shaders where they put some sort
of look-up table in the shader itself.  This series takes Skyrim Special
Edition running under DXVK from a slide show to a smooth and very playable
framerate on my SKL desktop.

The first part of the series adds a concept of constant data that can be
associated with a NIR shader and adds an optimization pass to move large
constant variables into this constant data section.  It's left up to the
driver to figure out how to get this constant data into the shader.  The
last three patches wire things up in ANV to put this data into an implicit
UBO and enables the optimization.

Jason Ekstrand (6):
  nir: Add a deref_instr_has_indirect helper
  nir: Add a concept of constant data associated with a shader
  nir: Add a large constants optimization pass
  anv: Add support for shader constant data to the pipeline cache
  anv: Add state setup support for shader constants
  anv,intel: Enable nir_opt_large_constants for Vulkan

 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/meson.build  |   1 +
 src/compiler/nir/nir.h|  15 +-
 src/compiler/nir/nir_clone.c  |   6 +
 src/compiler/nir/nir_deref.c  |  18 +
 src/compiler/nir/nir_intrinsics.py|   2 +
 src/compiler/nir/nir_opt_large_constants.c| 338 ++
 src/compiler/nir/nir_serialize.c  |  10 +
 src/intel/compiler/brw_compiler.h |   6 +
 src/intel/compiler/brw_nir.c  |   7 +
 src/intel/vulkan/anv_blorp.c  |   1 +
 src/intel/vulkan/anv_device.c |   1 +
 .../vulkan/anv_nir_apply_pipeline_layout.c|  47 +++
 src/intel/vulkan/anv_pipeline.c   |  12 +
 src/intel/vulkan/anv_pipeline_cache.c |  26 ++
 src/intel/vulkan/anv_private.h|   7 +
 src/intel/vulkan/genX_cmd_buffer.c|  72 +++-
 17 files changed, 550 insertions(+), 20 deletions(-)
 create mode 100644 src/compiler/nir/nir_opt_large_constants.c

-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/u_vbuf: drop min/max-scanning for empty indirect draws

2018-06-29 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Thu, Jun 28, 2018 at 9:22 AM, Erik Faye-Lund
 wrote:
> When building with asserts enabled, we'll end up triggering an assert
> in pipe_buffer_map_range down this code-path, due to trying to map
> an empty range. Even if we avoid that, we'll trigger another assert
> a bit later, because u_vbuf_get_minmax_index returns a min-index of
> -1 here, which gets promoted to an unsigned value, and gives us an
> out-of-bounds buffer-mapping offset.
>
> Since we can't really have a well-defined min/max range here when
> the range is empty anyway, we should just drop this dance in the
> first place. After all, no rendering is going to be produced.
>
> This fixes a crash in dEQP-GLES31.functional.draw_indirect.random.0
> on VirGL for me.
>
> Signed-off-by: Erik Faye-Lund 
> ---
> I noticed this while debugging something else, so I thought I'd send
> a patch upstream, as the problem doesn't seem unique to my usecase.
>
>  src/gallium/auxiliary/util/u_vbuf.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
> b/src/gallium/auxiliary/util/u_vbuf.c
> index 42f37c7574..76a1d143d9 100644
> --- a/src/gallium/auxiliary/util/u_vbuf.c
> +++ b/src/gallium/auxiliary/util/u_vbuf.c
> @@ -1183,6 +1183,9 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct 
> pipe_draw_info *info)
>new_info.start = data[2];
>pipe_buffer_unmap(pipe, transfer);
>new_info.indirect = NULL;
> +
> +  if (!new_info.count)
> + return;
> }
>
> if (new_info.index_size) {
> --
> 2.18.0.rc2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 14/18] mesa: add compat profile support for ARB_multi_draw_indirect

2018-06-29 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Jun 29, 2018 at 5:23 AM, Timothy Arceri  wrote:
> v2: add missing ARB_base_instance support
> ---
>  src/mesa/main/extensions_table.h |  2 +-
>  src/mesa/vbo/vbo_exec_array.c| 77 +++-
>  2 files changed, 76 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index 1446a4bd421..12b796777df 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -88,7 +88,7 @@ EXT(ARB_invalidate_subdata  , dummy_true
>  EXT(ARB_map_buffer_alignment, dummy_true 
> , GLL, GLC,  x ,  x , 2011)
>  EXT(ARB_map_buffer_range, ARB_map_buffer_range   
> , GLL, GLC,  x ,  x , 2008)
>  EXT(ARB_multi_bind  , dummy_true 
> , GLL, GLC,  x ,  x , 2013)
> -EXT(ARB_multi_draw_indirect , ARB_draw_indirect  
> ,  x , GLC,  x ,  x , 2012)
> +EXT(ARB_multi_draw_indirect , ARB_draw_indirect  
> , GLL, GLC,  x ,  x , 2012)
>  EXT(ARB_multisample , dummy_true 
> , GLL,  x ,  x ,  x , 1994)
>  EXT(ARB_multitexture, dummy_true 
> , GLL,  x ,  x ,  x , 1998)
>  EXT(ARB_occlusion_query , ARB_occlusion_query
> , GLL,  x ,  x ,  x , 2001)
> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
> index dbb2590f0c9..58bba208db1 100644
> --- a/src/mesa/vbo/vbo_exec_array.c
> +++ b/src/mesa/vbo/vbo_exec_array.c
> @@ -1749,7 +1749,38 @@ vbo_exec_MultiDrawArraysIndirect(GLenum mode, const 
> GLvoid *indirect,
>
> /* If  is zero, the array elements are treated as tightly packed. 
> */
> if (stride == 0)
> -  stride = 4 * sizeof(GLuint);  /* sizeof(DrawArraysIndirectCommand) 
> */
> +  stride = sizeof(DrawArraysIndirectCommand);
> +
> +   /* From the ARB_draw_indirect spec:
> +*
> +*"Initially zero is bound to DRAW_INDIRECT_BUFFER. In the
> +*compatibility profile, this indicates that DrawArraysIndirect and
> +*DrawElementsIndirect are to source their arguments directly from the
> +*pointer passed as their  parameters."
> +*/
> +   if (ctx->API == API_OPENGL_COMPAT &&
> +   !_mesa_is_bufferobj(ctx->DrawIndirectBuffer)) {
> +
> +  if (!_mesa_valid_draw_indirect_multi(ctx, primcount, stride,
> +   "glMultiDrawArraysIndirect"))
> + return;
> +
> +  const ubyte *ptr = (const ubyte *) indirect;
> +  for (unsigned i = 0; i < primcount; i++) {
> + DrawArraysIndirectCommand *cmd = (DrawArraysIndirectCommand *) ptr;
> + vbo_exec_DrawArraysInstancedBaseInstance(mode, cmd->first,
> +  cmd->count, cmd->primCount,
> +  cmd->baseInstance);
> +
> + if (stride == 0) {
> +ptr += sizeof(DrawArraysIndirectCommand);
> + } else {
> +ptr += stride;
> + }
> +  }
> +
> +  return;
> +   }
>
> FLUSH_FOR_DRAW(ctx);
>
> @@ -1788,7 +1819,49 @@ vbo_exec_MultiDrawElementsIndirect(GLenum mode, GLenum 
> type,
>
> /* If  is zero, the array elements are treated as tightly packed. 
> */
> if (stride == 0)
> -  stride = 5 * sizeof(GLuint);  /* 
> sizeof(DrawElementsIndirectCommand) */
> +  stride = sizeof(DrawElementsIndirectCommand);
> +
> +
> +   /* From the ARB_draw_indirect spec:
> +*
> +*"Initially zero is bound to DRAW_INDIRECT_BUFFER. In the
> +*compatibility profile, this indicates that DrawArraysIndirect and
> +*DrawElementsIndirect are to source their arguments directly from the
> +*pointer passed as their  parameters."
> +*/
> +   if (ctx->API == API_OPENGL_COMPAT &&
> +   !_mesa_is_bufferobj(ctx->DrawIndirectBuffer)) {
> +  /*
> +   * Unlike regular DrawElementsInstancedBaseVertex commands, the indices
> +   * may not come from a client array and must come from an index buffer.
> +   * If no element array buffer is bound, an INVALID_OPERATION error is
> +   * generated.
> +   */
> +  if (!_mesa_is_bufferobj(ctx->Array.VAO->IndexBufferObj)) {
> + _mesa_error(ctx, GL_INVALID_OPERATION,
> + "glMultiDrawElementsIndirect(no buffer bound "
> + "to GL_ELEMENT_ARRAY_BUFFER)");
> +
> + return;
> +  }
> +
> +  if (!_mesa_valid_draw_indirect_multi(ctx, primcount, stride,
> +   "glMultiDrawArraysIndirect"))
> + return;
> +
> +  const ubyte *ptr = (const ubyte *) indirect;
> +  for (unsigned i = 0; i < primcount; i++) {
> + 

Re: [Mesa-dev] [PATCH v2 12/18] mesa: add ARB_draw_indirect support to compat profile

2018-06-29 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Jun 29, 2018 at 5:22 AM, Timothy Arceri  wrote:
> v2: add missing ARB_base_instance support
> ---
>  src/mesa/main/bufferobj.c|  3 +-
>  src/mesa/main/extensions_table.h |  2 +-
>  src/mesa/vbo/vbo_exec_array.c| 71 +++-
>  3 files changed, 72 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
> index 67f9cd0a902..1d1e51bc015 100644
> --- a/src/mesa/main/bufferobj.c
> +++ b/src/mesa/main/bufferobj.c
> @@ -129,8 +129,7 @@ get_buffer_target(struct gl_context *ctx, GLenum target)
>   return >QueryBuffer;
>break;
> case GL_DRAW_INDIRECT_BUFFER:
> -  if ((ctx->API == API_OPENGL_CORE &&
> -   ctx->Extensions.ARB_draw_indirect) ||
> +  if ((_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_draw_indirect) ||
> _mesa_is_gles31(ctx)) {
>   return >DrawIndirectBuffer;
>}
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index f79a52cee8c..1446a4bd421 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -58,7 +58,7 @@ EXT(ARB_direct_state_access , dummy_true
>  EXT(ARB_draw_buffers, dummy_true 
> , GLL, GLC,  x ,  x , 2002)
>  EXT(ARB_draw_buffers_blend  , ARB_draw_buffers_blend 
> , GLL, GLC,  x ,  x , 2009)
>  EXT(ARB_draw_elements_base_vertex   , ARB_draw_elements_base_vertex  
> , GLL, GLC,  x ,  x , 2009)
> -EXT(ARB_draw_indirect   , ARB_draw_indirect  
> ,  x , GLC,  x ,  x , 2010)
> +EXT(ARB_draw_indirect   , ARB_draw_indirect  
> , GLL, GLC,  x ,  x , 2010)
>  EXT(ARB_draw_instanced  , ARB_draw_instanced 
> , GLL, GLC,  x ,  x , 2008)
>  EXT(ARB_enhanced_layouts, ARB_enhanced_layouts   
> , GLL, GLC,  x ,  x , 2013)
>  EXT(ARB_explicit_attrib_location, ARB_explicit_attrib_location   
> , GLL, GLC,  x ,  x , 2009)
> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
> index 792907ac044..dbb2590f0c9 100644
> --- a/src/mesa/vbo/vbo_exec_array.c
> +++ b/src/mesa/vbo/vbo_exec_array.c
> @@ -39,6 +39,21 @@
>  #include "main/macros.h"
>  #include "main/transformfeedback.h"
>
> +typedef struct {
> +   GLuint count;
> +   GLuint primCount;
> +   GLuint first;
> +   GLuint baseInstance;
> +} DrawArraysIndirectCommand;
> +
> +typedef struct {
> +   GLuint count;
> +   GLuint primCount;
> +   GLuint firstIndex;
> +   GLint  baseVertex;
> +   GLuint baseInstance;
> +} DrawElementsIndirectCommand;
> +
>
>  /**
>   * Check that element 'j' of the array has reasonable data.
> @@ -1616,6 +1631,23 @@ vbo_exec_DrawArraysIndirect(GLenum mode, const GLvoid 
> *indirect)
>_mesa_debug(ctx, "glDrawArraysIndirect(%s, %p)\n",
>_mesa_enum_to_string(mode), indirect);
>
> +   /* From the ARB_draw_indirect spec:
> +*
> +*"Initially zero is bound to DRAW_INDIRECT_BUFFER. In the
> +*compatibility profile, this indicates that DrawArraysIndirect and
> +*DrawElementsIndirect are to source their arguments directly from the
> +*pointer passed as their  parameters."
> +*/
> +   if (ctx->API == API_OPENGL_COMPAT &&
> +   !_mesa_is_bufferobj(ctx->DrawIndirectBuffer)) {
> +  DrawArraysIndirectCommand *cmd = (DrawArraysIndirectCommand *) 
> indirect;
> +
> +  vbo_exec_DrawArraysInstancedBaseInstance(mode, cmd->first, cmd->count,
> +   cmd->primCount,
> +   cmd->baseInstance);
> +  return;
> +   }
> +
> FLUSH_FOR_DRAW(ctx);
>
> if (_mesa_is_no_error_enabled(ctx)) {
> @@ -1647,6 +1679,43 @@ vbo_exec_DrawElementsIndirect(GLenum mode, GLenum 
> type, const GLvoid *indirect)
>_mesa_enum_to_string(mode),
>_mesa_enum_to_string(type), indirect);
>
> +   /* From the ARB_draw_indirect spec:
> +*
> +*"Initially zero is bound to DRAW_INDIRECT_BUFFER. In the
> +*compatibility profile, this indicates that DrawArraysIndirect and
> +*DrawElementsIndirect are to source their arguments directly from the
> +*pointer passed as their  parameters."
> +*/
> +   if (ctx->API == API_OPENGL_COMPAT &&
> +   !_mesa_is_bufferobj(ctx->DrawIndirectBuffer)) {
> +  /*
> +   * Unlike regular DrawElementsInstancedBaseVertex commands, the indices
> +   * may not come from a client array and must come from an index buffer.
> +   * If no element array buffer is bound, an INVALID_OPERATION error is
> +   * generated.
> +   */
> +  if (!_mesa_is_bufferobj(ctx->Array.VAO->IndexBufferObj)) {
> + _mesa_error(ctx, 

[Mesa-dev] [PATCH] i965: fix typo (wrong gen number) in comment

2018-06-29 Thread Caio Marcelo de Oliveira Filho
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index b279f01e1a1..921b420ab81 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1935,7 +1935,7 @@ genX(upload_wm)(struct brw_context *brw)
 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2));
   }
 #elif GEN_GEN == 6
-  /* On gen5, we have multiple shader kernels and we no longer specify a
+  /* On gen6, we have multiple shader kernels and we no longer specify a
* register count for each one.
*/
   wm.KernelStartPointer0 = stage_state->prog_offset +
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [ANNOUNCE] mesa 18.1.3

2018-06-29 Thread Dylan Baker
Hi List,

Mesa 18.1.3 is now available. This is a bug fix point release in the 18.1
series.

In this release we have:
 - numerous fixes for radv
 - some fixes for common radeon code
 - several meson fixes
 - fixes for autotools with non-gnu grep
 - fixes for both the nir and glsl compilers
 - An i965 fix for some gpu hangs on SNB

git tag: mesa-18.1.3

https://mesa.freedesktop.org/archive/mesa-18.1.3.tar.gz
MD5:  7c283b1c847829d443c65b30c787b5cd  mesa-18.1.3.tar.gz
SHA1: 611d4e7912779eb0ce11f4df59f8e633ecb451bf  mesa-18.1.3.tar.gz
SHA256: 2a1e36280d01ad18ba6d5b3fbd653ceaa109eaa031b78eb5dfaa4df452742b66  
mesa-18.1.3.tar.gz
SHA512: 
59f9eee8c9045d1cf4b0a97cd542a3f100571e193f87d795e9d435bae14a3bee9e43bdff08f5a9db83b92274710d95bef2d33db3bbcb7650812035cb7338
  mesa-18.1.3.tar.gz
PGP:  https://mesa.freedesktop.org/archive/mesa-18.1.3.tar.gz.sig

https://mesa.freedesktop.org/archive/mesa-18.1.3.tar.xz
MD5:  b34273403a605f6f98ead00f0bdf8e0b  mesa-18.1.3.tar.xz
SHA1: 0a5fb3096108a95f913411267b8a374f9ff547b2  mesa-18.1.3.tar.xz
SHA256: 54f08deeda0cd2f818e8d40140040ed013de7852573002453b7f50da9ea738ce  
mesa-18.1.3.tar.xz
SHA512: 
f6e5b81a80a309a36a04759d18364d3c71c48d1cb88f87b2f5432ef003092a22046e88ce2082031d5d52b60ba36f585d8df52e06ecc7a5158079936236f36887
  mesa-18.1.3.tar.xz
PGP:  https://mesa.freedesktop.org/archive/mesa-18.1.3.tar.xz.sig



signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa: MESA_framebuffer_flip_y extension [v3]

2018-06-29 Thread Fritz Koenig
Adds an extension to glFramebufferParameteri
that will specify if the framebuffer is vertically
flipped. Historically system framebuffers are
vertically flipped and user framebuffers are not.
Checking to see the state was done by looking at
the name field.  This adds an explicit field.

v2:
* updated spec language [for chadv]
* correctly specifying ES 3.1 [for chadv]
* refactor access to rb->Name [for jason]
* handle GetFramebufferParameteriv [for chadv]
v3:
* correct _mesa_GetMultisamplefv [for kusmabite]
---
 docs/specs/MESA_framebuffer_flip_y.spec| 84 ++
 include/GLES2/gl2ext.h |  5 ++
 src/mapi/glapi/registry/gl.xml |  6 ++
 src/mesa/drivers/dri/i915/intel_fbo.c  |  7 +-
 src/mesa/drivers/dri/i965/intel_fbo.c  |  7 +-
 src/mesa/drivers/dri/nouveau/nouveau_fbo.c |  7 +-
 src/mesa/drivers/dri/radeon/radeon_fbo.c   |  7 +-
 src/mesa/drivers/dri/radeon/radeon_span.c  |  9 ++-
 src/mesa/drivers/dri/swrast/swrast.c   |  7 +-
 src/mesa/drivers/osmesa/osmesa.c   |  5 +-
 src/mesa/drivers/x11/xm_buffer.c   |  3 +-
 src/mesa/drivers/x11/xmesaP.h  |  3 +-
 src/mesa/main/accum.c  | 17 +++--
 src/mesa/main/dd.h |  3 +-
 src/mesa/main/extensions_table.h   |  1 +
 src/mesa/main/fbobject.c   | 18 -
 src/mesa/main/framebuffer.c|  1 +
 src/mesa/main/glheader.h   |  3 +
 src/mesa/main/mtypes.h |  3 +
 src/mesa/main/readpix.c| 20 +++---
 src/mesa/state_tracker/st_cb_fbo.c |  7 +-
 src/mesa/swrast/s_blit.c   | 17 +++--
 src/mesa/swrast/s_clear.c  |  3 +-
 src/mesa/swrast/s_copypix.c| 11 +--
 src/mesa/swrast/s_depth.c  |  6 +-
 src/mesa/swrast/s_drawpix.c| 26 ---
 src/mesa/swrast/s_renderbuffer.c   |  6 +-
 src/mesa/swrast/s_renderbuffer.h   |  3 +-
 src/mesa/swrast/s_stencil.c|  3 +-
 29 files changed, 241 insertions(+), 57 deletions(-)
 create mode 100644 docs/specs/MESA_framebuffer_flip_y.spec

diff --git a/docs/specs/MESA_framebuffer_flip_y.spec 
b/docs/specs/MESA_framebuffer_flip_y.spec
new file mode 100644
index 00..dca77a9541
--- /dev/null
+++ b/docs/specs/MESA_framebuffer_flip_y.spec
@@ -0,0 +1,84 @@
+Name
+
+MESA_framebuffer_flip_y
+
+Name Strings
+
+GL_MESA_framebuffer_flip_y
+
+Contact
+
+Fritz Koenig 
+
+Contributors
+
+Fritz Koenig, Google
+Kristian Høgsberg, Google
+Chad Versace, Google
+
+Status
+
+Proposal
+
+Version
+
+Version 1, June 7, 2018
+
+Number
+
+TBD
+
+Dependencies
+
+OpenGL ES 3.1 is required, for FramebufferParameteri.
+
+Overview
+
+Rendered buffers are normally returned right side up, as accessed
+top to bottom.  This extension allows those buffers to be upside down
+when accessed top to bottom.
+
+This extension defines a new framebuffer parameter,
+GL_FRAMEBUFFER_FLIP_Y_MESA, that changes the behavior of the reads and
+writes to the framebuffer attachment points. When 
GL_FRAMEBUFFER_FLIP_Y_MESA
+is GL_TRUE, render commands and pixel transfer operations access the
+backing store of each attachment point with an y-inverted coordinate
+system. This y-inversion is relative to the coordinate system set when
+GL_FRAMEBUFFER_FLIP_Y_MESA is GL_FALSE.
+
+Access through TexSubImage2D and similar calls will notice the effect of
+the flip when they are not attached to framebuffer objects because
+GL_FRAMEBUFFER_FLIP_Y_MESA is associated with the framebuffer object and
+not the attachment points.
+
+IP Status
+
+None
+
+Issues
+
+None
+
+New Procedures and Functions
+
+None
+
+New Types
+
+None
+
+New Tokens
+
+Accepted by the  argument of FramebufferParameteri and
+GetFramebufferParameteriv:
+
+GL_FRAMEBUFFER_FLIP_Y_MESA  0x8BBB
+
+Errors
+GL_INVALID_OPERATION is returned from  GetFramebufferParameteriv if this
+is called on a winsys framebuffer.
+
+Revision History
+
+Version 1, June, 2018
+Initial draft (Fritz Koenig)
diff --git a/include/GLES2/gl2ext.h b/include/GLES2/gl2ext.h
index a7d19a1fc8..0a93bfb865 100644
--- a/include/GLES2/gl2ext.h
+++ b/include/GLES2/gl2ext.h
@@ -2334,6 +2334,11 @@ GL_APICALL void GL_APIENTRY glGetPerfQueryInfoINTEL 
(GLuint queryId, GLuint quer
 #endif
 #endif /* GL_INTEL_performance_query */
 
+#ifndef GL_MESA_framebuffer_flip_y
+#define GL_MESA_framebuffer_flip_y 1
+#define GL_FRAMEBUFFER_FLIP_Y_MESA0x8BBB
+#endif /* GL_MESA_framebuffer_flip_y */
+
 #ifndef GL_MESA_program_binary_formats
 #define GL_MESA_program_binary_formats 1
 #define GL_PROGRAM_BINARY_FORMAT_MESA 0x875F
diff --git a/src/mapi/glapi/registry/gl.xml b/src/mapi/glapi/registry/gl.xml
index 833478aa51..13882eff7b 100644
--- a/src/mapi/glapi/registry/gl.xml
+++ 

Re: [Mesa-dev] [PATCH 08/11] radeonsi: fix memory exhaustion issue with DCC statistics gathering with DRI2

2018-06-29 Thread Dylan Baker
Quoting Marek Olšák (2018-06-29 09:48:08)
> On Fri, Jun 29, 2018 at 11:40 AM, Dylan Baker  wrote:
> > Quoting Marek Olšák (2018-06-18 16:33:09)
> >> From: Marek Olšák 
> >>
> >> Cc: 18.1 
> >> ---
> >>  src/gallium/drivers/radeonsi/si_blit.c | 30 +++---
> >>  1 file changed, 27 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
> >> b/src/gallium/drivers/radeonsi/si_blit.c
> >> index fe059b36577..93cf7fe9001 100644
> >> --- a/src/gallium/drivers/radeonsi/si_blit.c
> >> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> >> @@ -1320,23 +1320,47 @@ static void si_flush_resource(struct pipe_context 
> >> *ctx,
> >> if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty)
> >> return;
> >>
> >> if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) {
> >> si_blit_decompress_color(sctx, rtex, 0, res->last_level,
> >>  0, util_max_layer(res, 0),
> >>  rtex->dcc_separate_buffer != 
> >> NULL);
> >> }
> >>
> >> /* Always do the analysis even if DCC is disabled at the moment. */
> >> -   if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) {
> >> -   rtex->separate_dcc_dirty = false;
> >> -   vi_separate_dcc_process_and_reset_stats(ctx, rtex);
> >> +   if (rtex->dcc_gather_statistics) {
> >> +   bool separate_dcc_dirty = rtex->separate_dcc_dirty;
> >> +
> >> +   /* If the color buffer hasn't been unbound and fast clear 
> >> hasn't
> >> +* been used, separate_dcc_dirty is false, but there may 
> >> have been
> >> +* new rendering. Check if the color buffer is bound and 
> >> assume
> >> +* it's dirty.
> >> +*
> >> +* Note that DRI2 never unbinds window colorbuffers, which 
> >> means
> >> +* the DCC pipeline statistics query would never be re-set 
> >> and would
> >> +* keep adding new results until all free memory is 
> >> exhausted if we
> >> +* didn't do this.
> >> +*/
> >> +   if (!separate_dcc_dirty) {
> >> +   for (unsigned i = 0; i < 
> >> sctx->framebuffer.state.nr_cbufs; i++) {
> >> +   if (sctx->framebuffer.state.cbufs[i] &&
> >> +   
> >> sctx->framebuffer.state.cbufs[i]->texture == res) {
> >> +   separate_dcc_dirty = true;
> >> +   break;
> >> +   }
> >> +   }
> >> +   }
> >> +
> >> +   if (separate_dcc_dirty) {
> >> +   rtex->separate_dcc_dirty = false;
> >> +   vi_separate_dcc_process_and_reset_stats(ctx, rtex);
> >> +   }
> >> }
> >>  }
> >>
> >>  void si_decompress_dcc(struct si_context *sctx, struct r600_texture *rtex)
> >>  {
> >> if (!rtex->dcc_offset)
> >> return;
> >>
> >> si_blit_decompress_color(sctx, rtex, 0, 
> >> rtex->buffer.b.b.last_level,
> >>  0, util_max_layer(>buffer.b.b, 0),
> >> --
> >> 2.17.1
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> > Hi Marek,
> >
> > This didn't apply cleanly to 18.1 because of
> > 1ba87f4438069964af6548f4fa05386be999f4de (radeonsi: rename r600_texture ->
> > si_texture, rxxx -> xxx or sxxx), I've attempted to rebase the commit by
> > changing "tex" to "rtex", please take a look at the commit in staging/18.1 
> > in
> > the main tree or 18.1-proposed in my tree and let me know if it looks good 
> > to
> > you.
> 
> Yes, it looks good. Thanks.
> 
> Marek

Thank you.


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/11] radeonsi: fix memory exhaustion issue with DCC statistics gathering with DRI2

2018-06-29 Thread Marek Olšák
On Fri, Jun 29, 2018 at 11:40 AM, Dylan Baker  wrote:
> Quoting Marek Olšák (2018-06-18 16:33:09)
>> From: Marek Olšák 
>>
>> Cc: 18.1 
>> ---
>>  src/gallium/drivers/radeonsi/si_blit.c | 30 +++---
>>  1 file changed, 27 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
>> b/src/gallium/drivers/radeonsi/si_blit.c
>> index fe059b36577..93cf7fe9001 100644
>> --- a/src/gallium/drivers/radeonsi/si_blit.c
>> +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> @@ -1320,23 +1320,47 @@ static void si_flush_resource(struct pipe_context 
>> *ctx,
>> if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty)
>> return;
>>
>> if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) {
>> si_blit_decompress_color(sctx, rtex, 0, res->last_level,
>>  0, util_max_layer(res, 0),
>>  rtex->dcc_separate_buffer != NULL);
>> }
>>
>> /* Always do the analysis even if DCC is disabled at the moment. */
>> -   if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) {
>> -   rtex->separate_dcc_dirty = false;
>> -   vi_separate_dcc_process_and_reset_stats(ctx, rtex);
>> +   if (rtex->dcc_gather_statistics) {
>> +   bool separate_dcc_dirty = rtex->separate_dcc_dirty;
>> +
>> +   /* If the color buffer hasn't been unbound and fast clear 
>> hasn't
>> +* been used, separate_dcc_dirty is false, but there may 
>> have been
>> +* new rendering. Check if the color buffer is bound and 
>> assume
>> +* it's dirty.
>> +*
>> +* Note that DRI2 never unbinds window colorbuffers, which 
>> means
>> +* the DCC pipeline statistics query would never be re-set 
>> and would
>> +* keep adding new results until all free memory is 
>> exhausted if we
>> +* didn't do this.
>> +*/
>> +   if (!separate_dcc_dirty) {
>> +   for (unsigned i = 0; i < 
>> sctx->framebuffer.state.nr_cbufs; i++) {
>> +   if (sctx->framebuffer.state.cbufs[i] &&
>> +   
>> sctx->framebuffer.state.cbufs[i]->texture == res) {
>> +   separate_dcc_dirty = true;
>> +   break;
>> +   }
>> +   }
>> +   }
>> +
>> +   if (separate_dcc_dirty) {
>> +   rtex->separate_dcc_dirty = false;
>> +   vi_separate_dcc_process_and_reset_stats(ctx, rtex);
>> +   }
>> }
>>  }
>>
>>  void si_decompress_dcc(struct si_context *sctx, struct r600_texture *rtex)
>>  {
>> if (!rtex->dcc_offset)
>> return;
>>
>> si_blit_decompress_color(sctx, rtex, 0, rtex->buffer.b.b.last_level,
>>  0, util_max_layer(>buffer.b.b, 0),
>> --
>> 2.17.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
> Hi Marek,
>
> This didn't apply cleanly to 18.1 because of
> 1ba87f4438069964af6548f4fa05386be999f4de (radeonsi: rename r600_texture ->
> si_texture, rxxx -> xxx or sxxx), I've attempted to rebase the commit by
> changing "tex" to "rtex", please take a look at the commit in staging/18.1 in
> the main tree or 18.1-proposed in my tree and let me know if it looks good to
> you.

Yes, it looks good. Thanks.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: MESA_framebuffer_flip_y extension [v2]

2018-06-29 Thread Fritz Koenig
On Fri, Jun 29, 2018 at 1:50 AM Erik Faye-Lund  wrote:
>
> On Thu, Jun 28, 2018 at 11:12 PM Fritz Koenig  wrote:
> >
> > Adds an extension to glFramebufferParameteri
> > that will specify if the framebuffer is vertically
> > flipped. Historically system framebuffers are
> > vertically flipped and user framebuffers are not.
> > Checking to see the state was done by looking at
> > the name field.  This adds an explicit field.
> >
> > v2:
> > * updated spec language [for chadv]
> > * correctly specifying ES 3.1 [for chadv]
> > * refactor access to rb->Name [for jason]
> > * handle GetFramebufferParameteriv [for chadv]
> > ---
> >  docs/specs/MESA_framebuffer_flip_y.spec| 84 ++
> >  include/GLES2/gl2ext.h |  5 ++
> >  src/mapi/glapi/registry/gl.xml |  6 ++
> >  src/mesa/drivers/dri/i915/intel_fbo.c  |  7 +-
> >  src/mesa/drivers/dri/i965/intel_fbo.c  |  7 +-
> >  src/mesa/drivers/dri/nouveau/nouveau_fbo.c |  7 +-
> >  src/mesa/drivers/dri/radeon/radeon_fbo.c   |  7 +-
> >  src/mesa/drivers/dri/radeon/radeon_span.c  |  9 ++-
> >  src/mesa/drivers/dri/swrast/swrast.c   |  7 +-
> >  src/mesa/drivers/osmesa/osmesa.c   |  5 +-
> >  src/mesa/drivers/x11/xm_buffer.c   |  3 +-
> >  src/mesa/drivers/x11/xmesaP.h  |  3 +-
> >  src/mesa/main/accum.c  | 17 +++--
> >  src/mesa/main/dd.h |  3 +-
> >  src/mesa/main/extensions_table.h   |  1 +
> >  src/mesa/main/fbobject.c   | 18 -
> >  src/mesa/main/framebuffer.c|  1 +
> >  src/mesa/main/glheader.h   |  3 +
> >  src/mesa/main/mtypes.h |  3 +
> >  src/mesa/main/readpix.c| 20 +++---
> >  src/mesa/state_tracker/st_cb_fbo.c |  7 +-
> >  src/mesa/swrast/s_blit.c   | 17 +++--
> >  src/mesa/swrast/s_clear.c  |  3 +-
> >  src/mesa/swrast/s_copypix.c| 11 +--
> >  src/mesa/swrast/s_depth.c  |  6 +-
> >  src/mesa/swrast/s_drawpix.c| 26 ---
> >  src/mesa/swrast/s_renderbuffer.c   |  6 +-
> >  src/mesa/swrast/s_renderbuffer.h   |  3 +-
> >  src/mesa/swrast/s_stencil.c|  3 +-
> >  29 files changed, 241 insertions(+), 57 deletions(-)
> >  create mode 100644 docs/specs/MESA_framebuffer_flip_y.spec
> >
>
> I think this needs to update the _mesa_is_winsys_fbo-check in
> _mesa_GetMultisamplefv in src/mesa/main/multisample.c to flip the
> sample-positions as well...

Thanks for pointing that one out, will add it.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] r600: set rounding mode for texture array layer selection

2018-06-29 Thread Roland Scheidegger
Am 29.06.2018 um 16:35 schrieb Gert Wollny:
> The evaluation of the array layer index is "floor(z+0.5)", and the default
> rounding mode doesn't correctly evaluate this. Therefore, set the rounding
> mode to "trunc" and z-filter mode to "point".
> For other textures make sure the the default rounding mode and z-filter are
> used.
> 
> Fixes single-sample tests out of:
>   dEQP-GLES3.functional.texture.shadow.2d_array.*
>   dEQP-GLES3.functional.texture.vertex.2d_array.*
>   dEQP-GLES3.functional.texture.filtering.2d_array.*
> (With the single sample tests the rounding accuracy is tested too)
> 
> v2: * reword comments and commit message
> * clear S_03C008_TRUNC_COORD for all non-array types
> 
> Signed-off-by: Gert Wollny 
> ---
>  src/gallium/drivers/r600/evergreen_state.c | 22 ++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index a484f0078a..b8ed4fea33 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -2413,6 +2413,28 @@ static void evergreen_emit_sampler_states(struct 
> r600_context *rctx,
>   rstate = texinfo->states.states[i];
>   assert(rstate);
>  
> + /* For texture arrays the formula select the layer is (floor(z 
> + 0.5)) and
to select

> +  * apparently tha hardware doesn't trigger this when the 
> texture is in ARRAY mode
> +  * Neither does the default z-rounding mode provide the 
> required 0.5 shift
> +  * nor does it round with sufficinet accuracy. Consequently set 
> the coordinate
sufficient. Albeit I'd still argue the rounding is done with quite
sufficient accuracy to be spec compliant.

> +  * interpolation and truncate mode here to get "floor" for 
> positive coordinates.
> +  * Adding the 0.5 offset is done in the shader.
> +  * Also  make sure that for other texture types the default is 
> used.
You could also mention this alters all coordinates.

So, I'm still really not fond of the idea, since there's no proof it
causes any issues rather than venture into unknown territory, but whatever.

Acked-by: Roland Scheidegger 


> +  */
> + struct r600_pipe_sampler_view   *rview = 
> texinfo->views.views[i];
> + if (rview) {
> + rstate->tex_sampler_words[0] &= C_03C000_Z_FILTER;
> + enum pipe_texture_target target = 
> rview->base.texture->target;
> + if (target == PIPE_TEXTURE_2D_ARRAY ||
> + target == PIPE_TEXTURE_CUBE_ARRAY ||
> + target == PIPE_TEXTURE_1D_ARRAY) {
> + rstate->tex_sampler_words[0] |= 
> S_03C000_Z_FILTER(V_03C000_SQ_TEX_Z_FILTER_POINT);
> + rstate->tex_sampler_words[2] |= 
> S_03C008_TRUNCATE_COORD(1);
> + } else {
> + rstate->tex_sampler_words[2] &= 
> C_03C008_TRUNCATE_COORD;
> + }
> + }
> +
>   radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags);
>   radeon_emit(cs, (resource_id_base + i) * 3);
>   radeon_emit_array(cs, rstate->tex_sampler_words, 3);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/2] r600: correct texture offset for array index lookup

2018-06-29 Thread Roland Scheidegger
Am 29.06.2018 um 16:35 schrieb Gert Wollny:
> For texture array lookup the slice index is evaluated according to
>   idx = floor(z + 0.5)
> 
> This patch implements the first part by adding 0.5 to the according
> texture coordinate when appropriate.
> 
> Fixes multi-sample tests out of:
>   dEQP-GLES3.functional.texture.shadow.2d_array.*
>   dEQP-GLES3.functional.texture.vertex.2d_array.*
>   dEQP-GLES3.functional.texture.filtering.2d_array.*
> (In the multi-sample case the rounding accuracy is not tested.)
> 
> v2: - Don't apply texture offset correction for GATHER*O (corrects piglit
>   failures reported by Dave Airlie)
> - unconditionally set the texture offset to 1 (=0.5) because the shader
>   can't set an offset for the array index (Roland Scheidegger)
> - Add Fixes comment to commit message
> 
> Signed-off-by: Gert Wollny 
> ---
>  src/gallium/drivers/r600/r600_shader.c | 18 +-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index c466a48262..4d17b3d875 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -7456,6 +7456,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>   int8_t offset_x = 0, offset_y = 0, offset_z = 0;
>   boolean has_txq_cube_array_z = false;
>   unsigned sampler_index_mode;
> + int *array_index_offset = NULL;
>  
>   if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
>   ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
> @@ -8411,18 +8412,33 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>   opcode == FETCH_OP_SAMPLE_C_LB) {
>   /* the array index is read from Y */
>   tex.coord_type_y = 0;
> + array_index_offset = _y;
>   } else {
>   /* the array index is read from Z */
>   tex.coord_type_z = 0;
>   tex.src_sel_z = tex.src_sel_y;
> + array_index_offset = _z;
> +
>   }
>   } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
>  inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
>  ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
>   inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
> - (ctx->bc->chip_class >= EVERGREEN)))
> + (ctx->bc->chip_class >= EVERGREEN))) {
>   /* the array index is read from Z */
>   tex.coord_type_z = 0;
> + array_index_offset = _z;
> + }
> +
> + /* We have array access, the coordinates are not int and we use the
> +  * offset registers -> add 0.5 to the array index to adjust it according
> +  * to floor(z + 0.5). The floor opretaion is set as TRUNC in the texture
operation


So for gather4_O (where you'd need to do it differently) it must not be
done, but for gather4 it has to be done?
Still doesn't make all that much sense to me. The hw may be weird but I
don't think it would be that weird?
But anyway, as long as it doesn't regress anything,
Acked-by: Roland Scheidegger 


> +  * state.
> +  */
> + if (array_index_offset && opcode != FETCH_OP_LD &&
> + opcode != FETCH_OP_GATHER4_C_O && opcode != FETCH_OP_GATHER4_O) {
> + *array_index_offset = 1;
> + }
>  
>   /* mask unused source components */
>   if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) {
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/11] radeonsi: fix memory exhaustion issue with DCC statistics gathering with DRI2

2018-06-29 Thread Dylan Baker
Quoting Marek Olšák (2018-06-18 16:33:09)
> From: Marek Olšák 
> 
> Cc: 18.1 
> ---
>  src/gallium/drivers/radeonsi/si_blit.c | 30 +++---
>  1 file changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
> b/src/gallium/drivers/radeonsi/si_blit.c
> index fe059b36577..93cf7fe9001 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -1320,23 +1320,47 @@ static void si_flush_resource(struct pipe_context 
> *ctx,
> if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty)
> return;
>  
> if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) {
> si_blit_decompress_color(sctx, rtex, 0, res->last_level,
>  0, util_max_layer(res, 0),
>  rtex->dcc_separate_buffer != NULL);
> }
>  
> /* Always do the analysis even if DCC is disabled at the moment. */
> -   if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) {
> -   rtex->separate_dcc_dirty = false;
> -   vi_separate_dcc_process_and_reset_stats(ctx, rtex);
> +   if (rtex->dcc_gather_statistics) {
> +   bool separate_dcc_dirty = rtex->separate_dcc_dirty;
> +
> +   /* If the color buffer hasn't been unbound and fast clear 
> hasn't
> +* been used, separate_dcc_dirty is false, but there may have 
> been
> +* new rendering. Check if the color buffer is bound and 
> assume
> +* it's dirty.
> +*
> +* Note that DRI2 never unbinds window colorbuffers, which 
> means
> +* the DCC pipeline statistics query would never be re-set 
> and would
> +* keep adding new results until all free memory is exhausted 
> if we
> +* didn't do this.
> +*/
> +   if (!separate_dcc_dirty) {
> +   for (unsigned i = 0; i < 
> sctx->framebuffer.state.nr_cbufs; i++) {
> +   if (sctx->framebuffer.state.cbufs[i] &&
> +   sctx->framebuffer.state.cbufs[i]->texture 
> == res) {
> +   separate_dcc_dirty = true;
> +   break;
> +   }
> +   }
> +   }
> +
> +   if (separate_dcc_dirty) {
> +   rtex->separate_dcc_dirty = false;
> +   vi_separate_dcc_process_and_reset_stats(ctx, rtex);
> +   }
> }
>  }
>  
>  void si_decompress_dcc(struct si_context *sctx, struct r600_texture *rtex)
>  {
> if (!rtex->dcc_offset)
> return;
>  
> si_blit_decompress_color(sctx, rtex, 0, rtex->buffer.b.b.last_level,
>  0, util_max_layer(>buffer.b.b, 0),
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Hi Marek,

This didn't apply cleanly to 18.1 because of
1ba87f4438069964af6548f4fa05386be999f4de (radeonsi: rename r600_texture ->
si_texture, rxxx -> xxx or sxxx), I've attempted to rebase the commit by
changing "tex" to "rtex", please take a look at the commit in staging/18.1 in
the main tree or 18.1-proposed in my tree and let me know if it looks good to
you.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 32/53] intel/fs: Mark LINTERP opcode as writing accumulator implicitly on pre-Gen7.

2018-06-29 Thread Dylan Baker
Quoting Jason Ekstrand (2018-05-24 14:56:14)
> From: Francisco Jerez 
> 
> ---
>  src/intel/compiler/brw_shader.cpp | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/compiler/brw_shader.cpp 
> b/src/intel/compiler/brw_shader.cpp
> index 141b64e..61211ef 100644
> --- a/src/intel/compiler/brw_shader.cpp
> +++ b/src/intel/compiler/brw_shader.cpp
> @@ -984,7 +984,8 @@ backend_instruction::writes_accumulator_implicitly(const 
> struct gen_device_info
> return writes_accumulator ||
>(devinfo->gen < 6 &&
> ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
> -(opcode >= FS_OPCODE_DDX_COARSE && opcode <= 
> FS_OPCODE_LINTERP)));
> +(opcode >= FS_OPCODE_DDX_COARSE && opcode <= 
> FS_OPCODE_LINTERP))) ||
> +  (devinfo->gen < 7 && opcode == FS_OPCODE_LINTERP);
>  }
>  
>  bool
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Hi Jason,

This doesn't apply to the 18.1 branch. At the very least
d3cd6b7215c11054b587fb0fd621c53c6d62c64b from your series is also needed,
although there are still conflicts after that which are taking a bit more time
to track down, I can look into it later, but I'd like to get the 18.1.3 release
done first. If you'd like to drop this instead let me know.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/2] r600: set rounding mode for texture array layer selection

2018-06-29 Thread Gert Wollny
The evaluation of the array layer index is "floor(z+0.5)", and the default
rounding mode doesn't correctly evaluate this. Therefore, set the rounding
mode to "trunc" and z-filter mode to "point".
For other textures make sure the the default rounding mode and z-filter are
used.

Fixes single-sample tests out of:
  dEQP-GLES3.functional.texture.shadow.2d_array.*
  dEQP-GLES3.functional.texture.vertex.2d_array.*
  dEQP-GLES3.functional.texture.filtering.2d_array.*
(With the single sample tests the rounding accuracy is tested too)

v2: * reword comments and commit message
* clear S_03C008_TRUNC_COORD for all non-array types

Signed-off-by: Gert Wollny 
---
 src/gallium/drivers/r600/evergreen_state.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index a484f0078a..b8ed4fea33 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2413,6 +2413,28 @@ static void evergreen_emit_sampler_states(struct 
r600_context *rctx,
rstate = texinfo->states.states[i];
assert(rstate);
 
+   /* For texture arrays the formula select the layer is (floor(z 
+ 0.5)) and
+* apparently tha hardware doesn't trigger this when the 
texture is in ARRAY mode
+* Neither does the default z-rounding mode provide the 
required 0.5 shift
+* nor does it round with sufficinet accuracy. Consequently set 
the coordinate
+* interpolation and truncate mode here to get "floor" for 
positive coordinates.
+* Adding the 0.5 offset is done in the shader.
+* Also  make sure that for other texture types the default is 
used.
+*/
+   struct r600_pipe_sampler_view   *rview = 
texinfo->views.views[i];
+   if (rview) {
+   rstate->tex_sampler_words[0] &= C_03C000_Z_FILTER;
+   enum pipe_texture_target target = 
rview->base.texture->target;
+   if (target == PIPE_TEXTURE_2D_ARRAY ||
+   target == PIPE_TEXTURE_CUBE_ARRAY ||
+   target == PIPE_TEXTURE_1D_ARRAY) {
+   rstate->tex_sampler_words[0] |= 
S_03C000_Z_FILTER(V_03C000_SQ_TEX_Z_FILTER_POINT);
+   rstate->tex_sampler_words[2] |= 
S_03C008_TRUNCATE_COORD(1);
+   } else {
+   rstate->tex_sampler_words[2] &= 
C_03C008_TRUNCATE_COORD;
+   }
+   }
+
radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags);
radeon_emit(cs, (resource_id_base + i) * 3);
radeon_emit_array(cs, rstate->tex_sampler_words, 3);
-- 
2.16.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/2] r600: correct texture offset for array index lookup

2018-06-29 Thread Gert Wollny
For texture array lookup the slice index is evaluated according to
  idx = floor(z + 0.5)

This patch implements the first part by adding 0.5 to the according
texture coordinate when appropriate.

Fixes multi-sample tests out of:
  dEQP-GLES3.functional.texture.shadow.2d_array.*
  dEQP-GLES3.functional.texture.vertex.2d_array.*
  dEQP-GLES3.functional.texture.filtering.2d_array.*
(In the multi-sample case the rounding accuracy is not tested.)

v2: - Don't apply texture offset correction for GATHER*O (corrects piglit
  failures reported by Dave Airlie)
- unconditionally set the texture offset to 1 (=0.5) because the shader
  can't set an offset for the array index (Roland Scheidegger)
- Add Fixes comment to commit message

Signed-off-by: Gert Wollny 
---
 src/gallium/drivers/r600/r600_shader.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index c466a48262..4d17b3d875 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -7456,6 +7456,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
int8_t offset_x = 0, offset_y = 0, offset_z = 0;
boolean has_txq_cube_array_z = false;
unsigned sampler_index_mode;
+   int *array_index_offset = NULL;
 
if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
@@ -8411,18 +8412,33 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
opcode == FETCH_OP_SAMPLE_C_LB) {
/* the array index is read from Y */
tex.coord_type_y = 0;
+   array_index_offset = _y;
} else {
/* the array index is read from Z */
tex.coord_type_z = 0;
tex.src_sel_z = tex.src_sel_y;
+   array_index_offset = _z;
+
}
} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
   inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
   ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
-   (ctx->bc->chip_class >= EVERGREEN)))
+   (ctx->bc->chip_class >= EVERGREEN))) {
/* the array index is read from Z */
tex.coord_type_z = 0;
+   array_index_offset = _z;
+   }
+
+   /* We have array access, the coordinates are not int and we use the
+* offset registers -> add 0.5 to the array index to adjust it according
+* to floor(z + 0.5). The floor opretaion is set as TRUNC in the texture
+* state.
+*/
+   if (array_index_offset && opcode != FETCH_OP_LD &&
+   opcode != FETCH_OP_GATHER4_C_O && opcode != FETCH_OP_GATHER4_O) {
+   *array_index_offset = 1;
+   }
 
/* mask unused source components */
if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) {
-- 
2.16.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/2] r600: Fix array texture slice index evaluation

2018-06-29 Thread Gert Wollny
I reworded parts of the comments, and simplified the patch against the shader
to correct the piglit failures and taking into account that a shader can not
set an offset for the array index at all.

Apart from the dEQP tests I've run
  ./piglit run gpu -t texture 
with no changes. (I always got hangs with "./piglit run gpu -x atomicity",
with or without these patches, some image_store_load tests seem to behave badly)

I'm still thinking about how relevant the TRUNC versus default rounding mode is
and what is actually the more correct approach for point sampling. That's also 
why I
think that it is better to keep the patches apart, to better be able to biscet 
possible
issues.

best,
Gert

Gert Wollny (2):
  r600: correct texture offset for array index lookup
  r600: set rounding mode for texture array layer selection

 src/gallium/drivers/r600/evergreen_state.c | 22 ++
 src/gallium/drivers/r600/r600_shader.c | 18 +-
 2 files changed, 39 insertions(+), 1 deletion(-)

-- 
2.16.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] nv50/ir: improve maintainability of Target*::initOpInfo()

2018-06-29 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Fri, Jun 29, 2018 at 9:51 AM, Rhys Perry  wrote:
> This is mainly useful for when one needs to add new opcodes in a painless
> and reliable way.
>
> Signed-off-by: Rhys Perry 
> ---
> Changes in v2:
> - use operation instead of uint32_t
> - use ARRAY_SIZE()
> - add "set below" comments
>
> I don't have push access.
>
>  .../nouveau/codegen/nv50_ir_target_nv50.cpp| 27 
> --
>  .../nouveau/codegen/nv50_ir_target_nvc0.cpp| 24 ++-
>  2 files changed, 28 insertions(+), 23 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> index 83b4102b0a..ad76d2dcb4 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> @@ -111,16 +111,15 @@ void TargetNV50::initOpInfo()
>  {
> unsigned int i, j;
>
> -   static const uint32_t commutative[(OP_LAST + 31) / 32] =
> +   static const operation commutativeList[] =
> {
> -  // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, 
> SET_XOR,
> -  // SET, SELP, SLCT
> -  0x0ce0ca00, 0x007e, 0x, 0x
> +  OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_AND, OP_OR, OP_XOR, OP_MAX, OP_MIN,
> +  OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT
> };
> -   static const uint32_t shortForm[(OP_LAST + 31) / 32] =
> +   static const operation shortFormList[] =
> {
> -  // MOV, ADD, SUB, MUL, MAD, SAD, RCP, L/PINTERP, TEX, TXF
> -  0x00014e40, 0x0080, 0x1260, 0x
> +  OP_MOV, OP_ADD, OP_SUB, OP_MUL, OP_MAD, OP_SAD, OP_RCP, OP_LINTERP,
> +  OP_PINTERP, OP_TEX, OP_TXF
> };
> static const operation noDestList[] =
> {
> @@ -157,18 +156,22 @@ void TargetNV50::initOpInfo()
>
>opInfo[i].hasDest = 1;
>opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
> -  opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
> +  opInfo[i].commutative = false; /* set below */
>opInfo[i].pseudo = (i < OP_MOV);
>opInfo[i].predicate = !opInfo[i].pseudo;
>opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
> -  opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
> +  opInfo[i].minEncSize = 8; /* set below */
> }
> -   for (i = 0; i < sizeof(noDestList) / sizeof(noDestList[0]); ++i)
> +   for (i = 0; i < ARRAY_SIZE(commutativeList); ++i)
> +  opInfo[commutativeList[i]].commutative = true;
> +   for (i = 0; i < ARRAY_SIZE(shortFormList); ++i)
> +  opInfo[shortFormList[i]].minEncSize = 4;
> +   for (i = 0; i < ARRAY_SIZE(noDestList); ++i)
>opInfo[noDestList[i]].hasDest = 0;
> -   for (i = 0; i < sizeof(noPredList) / sizeof(noPredList[0]); ++i)
> +   for (i = 0; i < ARRAY_SIZE(noPredList); ++i)
>opInfo[noPredList[i]].predicate = 0;
>
> -   for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
> +   for (i = 0; i < ARRAY_SIZE(_initProps); ++i) {
>const struct opProperties *prop = &_initProps[i];
>
>for (int s = 0; s < 3; ++s) {
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> index 8938d19f6c..1a62dce951 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> @@ -191,17 +191,15 @@ void TargetNVC0::initOpInfo()
>  {
> unsigned int i, j;
>
> -   static const uint32_t commutative[(OP_LAST + 31) / 32] =
> +   static const operation commutative[] =
> {
> -  // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, 
> SET_XOR,
> -  // SET, SELP, SLCT
> -  0x0ce0ca00, 0x007e, 0x, 0x
> +  OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_AND, OP_OR, OP_XOR, OP_MAX, OP_MIN,
> +  OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT
> };
>
> -   static const uint32_t shortForm[(OP_LAST + 31) / 32] =
> +   static const operation shortForm[] =
> {
> -  // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN
> -  0x0ce0ca00, 0x, 0x, 0x
> +  OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_AND, OP_OR, OP_XOR, OP_MAX, OP_MIN
> };
>
> static const operation noDest[] =
> @@ -240,15 +238,19 @@ void TargetNVC0::initOpInfo()
>
>opInfo[i].hasDest = 1;
>opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
> -  opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
> +  opInfo[i].commutative = false; /* set below */
>opInfo[i].pseudo = (i < OP_MOV);
>opInfo[i].predicate = !opInfo[i].pseudo;
>opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
> -  opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
> +  opInfo[i].minEncSize = 8; /* set below */
> }
> -   for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)

[Mesa-dev] [PATCH 03/18] nir/linker: use empty block info to assign uniform locations

2018-06-29 Thread Alejandro Piñeiro
For the cases of uniforms that doesn't have an explicit
location. Under ARB_gl_spirv those are exceptions, like uniform atomic
counters.
---
 src/compiler/glsl/gl_nir_link_uniforms.c | 31 +--
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/compiler/glsl/gl_nir_link_uniforms.c 
b/src/compiler/glsl/gl_nir_link_uniforms.c
index 388c1ab63fc..77d3eaa5f2b 100644
--- a/src/compiler/glsl/gl_nir_link_uniforms.c
+++ b/src/compiler/glsl/gl_nir_link_uniforms.c
@@ -79,6 +79,8 @@ nir_setup_uniform_remap_tables(struct gl_context *ctx,
}
 
/* Reserve locations for rest of the uniforms. */
+   link_util_update_empty_uniform_locations(prog);
+
for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
   struct gl_uniform_storage *uniform = >data->UniformStorage[i];
 
@@ -93,22 +95,23 @@ nir_setup_uniform_remap_tables(struct gl_context *ctx,
   if (uniform->remap_location != UNMAPPED_UNIFORM_LOC)
  continue;
 
-  /* How many new entries for this uniform? */
+  /* How many entries for this uniform? */
   const unsigned entries = MAX2(1, uniform->array_elements);
 
-  /* @FIXME: By now, we add un-assigned uniform locations to the end of
-   * the uniform file. We need to keep track of empty locations and use
-   * them.
-   */
-  unsigned chosen_location = prog->NumUniformRemapTable;
-
-  /* resize remap table to fit new entries */
-  prog->UniformRemapTable =
- reralloc(prog,
-  prog->UniformRemapTable,
-  struct gl_uniform_storage *,
-  prog->NumUniformRemapTable + entries);
-  prog->NumUniformRemapTable += entries;
+  unsigned chosen_location =
+ link_util_find_empty_block(prog, >data->UniformStorage[i]);
+
+  if (chosen_location == -1) {
+ chosen_location = prog->NumUniformRemapTable;
+
+ /* resize remap table to fit new entries */
+ prog->UniformRemapTable =
+reralloc(prog,
+ prog->UniformRemapTable,
+ struct gl_uniform_storage *,
+ prog->NumUniformRemapTable + entries);
+ prog->NumUniformRemapTable += entries;
+  }
 
   /* set the base location in remap table for the uniform */
   uniform->remap_location = chosen_location;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/18] nir/linker: handle uniforms without explicit location

2018-06-29 Thread Alejandro Piñeiro
ARB_gl_spirv points that uniforms in general need explicit
location. But there are still some cases of uniforms without location,
like for example uniform atomic counters. Those doesn't have a
location from the OpenGL point of view (they are identified with a
binding), but Mesa internally assigns it a location.

Signed-off-by: Eduardo Lima 
Signed-off-by: Alejandro Piñeiro 
Signed-off-by: Neil Roberts 
---

The @FIXME included on the patch below is solved with the follow-up
path "nir/linker: use empty block info to assign uniform locations",
so perhaps it makes sense to just squash both patches. I don't have a
strong opinion on that, but I think that it would be easier to review
as splitted patches.


 src/compiler/glsl/gl_nir_link_uniforms.c | 61 ++--
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/gl_nir_link_uniforms.c 
b/src/compiler/glsl/gl_nir_link_uniforms.c
index c6961fbb6ca..388c1ab63fc 100644
--- a/src/compiler/glsl/gl_nir_link_uniforms.c
+++ b/src/compiler/glsl/gl_nir_link_uniforms.c
@@ -36,6 +36,8 @@
  * normal uniforms as mandatory, and so on).
  */
 
+#define UNMAPPED_UNIFORM_LOC ~0u
+
 static void
 nir_setup_uniform_remap_tables(struct gl_context *ctx,
struct gl_shader_program *prog)
@@ -58,8 +60,59 @@ nir_setup_uniform_remap_tables(struct gl_context *ctx,
for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
   struct gl_uniform_storage *uniform = >data->UniformStorage[i];
 
+  if (prog->data->UniformStorage[i].remap_location == UNMAPPED_UNIFORM_LOC)
+ continue;
+
+  /* How many new entries for this uniform? */
+  const unsigned entries = MAX2(1, uniform->array_elements);
+  unsigned num_slots = glsl_get_component_slots(uniform->type);
+
+  uniform->storage = [data_pos];
+
+  /* Set remap table entries point to correct gl_uniform_storage. */
+  for (unsigned j = 0; j < entries; j++) {
+ unsigned element_loc = uniform->remap_location + j;
+ prog->UniformRemapTable[element_loc] = uniform;
+
+ data_pos += num_slots;
+  }
+   }
+
+   /* Reserve locations for rest of the uniforms. */
+   for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
+  struct gl_uniform_storage *uniform = >data->UniformStorage[i];
+
+  if (uniform->is_shader_storage)
+ continue;
+
+  /* Built-in uniforms should not get any location. */
+  if (uniform->builtin)
+ continue;
+
+  /* Explicit ones have been set already. */
+  if (uniform->remap_location != UNMAPPED_UNIFORM_LOC)
+ continue;
+
   /* How many new entries for this uniform? */
   const unsigned entries = MAX2(1, uniform->array_elements);
+
+  /* @FIXME: By now, we add un-assigned uniform locations to the end of
+   * the uniform file. We need to keep track of empty locations and use
+   * them.
+   */
+  unsigned chosen_location = prog->NumUniformRemapTable;
+
+  /* resize remap table to fit new entries */
+  prog->UniformRemapTable =
+ reralloc(prog,
+  prog->UniformRemapTable,
+  struct gl_uniform_storage *,
+  prog->NumUniformRemapTable + entries);
+  prog->NumUniformRemapTable += entries;
+
+  /* set the base location in remap table for the uniform */
+  uniform->remap_location = chosen_location;
+
   unsigned num_slots = glsl_get_component_slots(uniform->type);
 
   uniform->storage = [data_pos];
@@ -302,8 +355,12 @@ nir_link_uniform(struct gl_context *ctx,
   }
   uniform->active_shader_mask |= 1 << stage;
 
-  /* Uniform has an explicit location */
-  uniform->remap_location = location;
+  if (location >= 0) {
+ /* Uniform has an explicit location */
+ uniform->remap_location = location;
+  } else {
+ uniform->remap_location = UNMAPPED_UNIFORM_LOC;
+  }
 
   /* @FIXME: the initialization of the following will be done as we
* implement support for their specific features, like SSBO, atomics,
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/18] spirv/nir: tweak nir type when storage class is SpvStorageClassAtomicCounter

2018-06-29 Thread Alejandro Piñeiro
GLSL types differentiates uint from atomic uint. On SPIR-V the type is
uint, and the variable has a specific storage class. So we need to
tweak the type based on the storage class.

Ideally we would like to get the proper type at vtn_handle_type, but
we don't have the storage class at that moment.

We tweak only the nir type, as is the one that really requires it.
---

Again, this commit probably could be squashed with the follow-up patch
"nir/spirv: Fix atomic counter (multidimensional-)arrays". Sending as
two different patches to make easier the review.


 src/compiler/spirv/vtn_variables.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index c56d74d683b..a40c30c8a75 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1643,7 +1643,17 @@ vtn_create_variable(struct vtn_builder *b, struct 
vtn_value *val,
   /* For these, we create the variable normally */
   var->var = rzalloc(b->shader, nir_variable);
   var->var->name = ralloc_strdup(var->var, val->name);
-  var->var->type = var->type->type;
+
+  /* Need to tweak the nir type here as at vtn_handle_type we don't have
+   * the access to storage_class, that is the one that points us that is
+   * an atomic uint.
+   */
+  if (glsl_get_base_type(var->type->type) == GLSL_TYPE_UINT &&
+  storage_class == SpvStorageClassAtomicCounter) {
+ var->var->type = glsl_atomic_uint_type();
+  } else {
+ var->var->type = var->type->type;
+  }
   var->var->data.mode = nir_mode;
   var->var->data.location = -1;
   var->var->interface_type = NULL;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >