Module: Mesa
Branch: main
Commit: 4f622d62d0dd8967f16ddd6a3f989310c6543c1a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4f622d62d0dd8967f16ddd6a3f989310c6543c1a

Author: Marek Olšák <[email protected]>
Date:   Wed Jul 20 11:23:26 2022 -0400

ac/nir: add ac_nir_lower_resinfo

Emulating image_get_resinfo should be faster than using the hw.

Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17693>

---

 src/amd/common/ac_nir.h               |   2 +
 src/amd/common/ac_nir_lower_resinfo.c | 332 ++++++++++++++++++++++++++++++++++
 src/amd/common/meson.build            |   1 +
 3 files changed, 335 insertions(+)

diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h
index 601f48ca0ff..9a9b39d0ddc 100644
--- a/src/amd/common/ac_nir.h
+++ b/src/amd/common/ac_nir.h
@@ -166,6 +166,8 @@ ac_nir_cull_triangle(nir_builder *b,
 bool
 ac_nir_lower_global_access(nir_shader *shader);
 
+bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/common/ac_nir_lower_resinfo.c 
b/src/amd/common/ac_nir_lower_resinfo.c
new file mode 100644
index 00000000000..3aa5071cba0
--- /dev/null
+++ b/src/amd/common/ac_nir_lower_resinfo.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright © 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* Implement query_size, query_levels, and query_samples by extracting the 
information from
+ * descriptors. This is expected to be faster than image_resinfo.
+ */
+
+#include "ac_nir.h"
+#include "nir_builder.h"
+#include "amdgfxregs.h"
+
+static nir_ssa_def *get_field(nir_builder *b, nir_ssa_def *desc, unsigned 
index, unsigned mask)
+{
+   return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, 
util_bitcount(mask));
+}
+
+static nir_ssa_def *handle_null_desc(nir_builder *b, nir_ssa_def *desc, 
nir_ssa_def *value)
+{
+   nir_ssa_def *is_null = nir_ieq_imm(b, nir_channel(b, desc, 1), 0);
+   return nir_bcsel(b, is_null, nir_imm_int(b, 0), value);
+}
+
+static nir_ssa_def *query_samples(nir_builder *b, nir_ssa_def *desc, enum 
glsl_sampler_dim dim)
+{
+   nir_ssa_def *samples;
+
+   if (dim == GLSL_SAMPLER_DIM_MS) {
+      /* LAST_LEVEL contains log2(num_samples). */
+      samples = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL);
+      samples = nir_ishl(b, nir_imm_int(b, 1), samples);
+   } else {
+      samples = nir_imm_int(b, 1);
+   }
+
+   return handle_null_desc(b, desc, samples);
+}
+
+static nir_ssa_def *query_levels(nir_builder *b, nir_ssa_def *desc)
+{
+   nir_ssa_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
+   nir_ssa_def *last_level = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL);
+
+   nir_ssa_def *levels = nir_iadd_imm(b, nir_isub(b, last_level, base_level), 
1);
+
+   return handle_null_desc(b, desc, levels);
+}
+
+static nir_ssa_def *
+lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
+                 enum glsl_sampler_dim dim, bool is_array, enum amd_gfx_level 
gfx_level)
+{
+   if (dim == GLSL_SAMPLER_DIM_BUF) {
+      nir_ssa_def *size = nir_channel(b, desc, 2);
+
+      if (gfx_level == GFX8) {
+         /* On GFX8, the descriptor contains the size in bytes,
+          * but TXQ must return the size in elements.
+          * The stride is always non-zero for resources using TXQ.
+          * Divide the size by the stride.
+          */
+         size = nir_udiv(b, size, get_field(b, desc, 1, ~C_008F04_STRIDE));
+      }
+      return size;
+   }
+
+   /* Cube textures return (height, height) instead of (width, height) because 
it's fewer
+    * instructions.
+    */
+   bool has_width = dim != GLSL_SAMPLER_DIM_CUBE;
+   bool has_height = dim != GLSL_SAMPLER_DIM_1D;
+   bool has_depth = dim == GLSL_SAMPLER_DIM_3D;
+   nir_ssa_def *width = NULL, *height = NULL, *layers = NULL, *base_array = 
NULL;
+   nir_ssa_def *last_array = NULL, *depth = NULL;
+
+   /* Get the width, height, depth, layers. */
+   if (gfx_level >= GFX10) {
+      if (has_width) {
+         nir_ssa_def *width_lo = get_field(b, desc, 1, ~C_00A004_WIDTH_LO);
+         nir_ssa_def *width_hi = get_field(b, desc, 2, ~C_00A008_WIDTH_HI);
+         /* Use iadd to get s_lshl2_add_u32 in the end. */
+         width = nir_iadd(b, width_lo, nir_ishl_imm(b, width_hi, 2));
+      }
+      if (has_height)
+         height = get_field(b, desc, 2, ~C_00A008_HEIGHT);
+      if (has_depth)
+         depth = get_field(b, desc, 4, ~C_00A010_DEPTH);
+
+      if (is_array) {
+         last_array = get_field(b, desc, 4, ~C_00A010_DEPTH);
+         base_array = get_field(b, desc, 4, ~C_00A010_BASE_ARRAY);
+      }
+   } else {
+      if (has_width)
+         width = get_field(b, desc, 2, ~C_008F18_WIDTH);
+      if (has_height)
+         height = get_field(b, desc, 2, ~C_008F18_HEIGHT);
+      if (has_depth)
+         depth = get_field(b, desc, 4, ~C_008F20_DEPTH);
+
+      if (is_array) {
+         base_array = get_field(b, desc, 5, ~C_008F24_BASE_ARRAY);
+
+         if (gfx_level == GFX9) {
+            last_array = get_field(b, desc, 4, ~C_008F20_DEPTH);
+         } else {
+            last_array = get_field(b, desc, 5, ~C_008F24_LAST_ARRAY);
+         }
+      }
+   }
+
+   /* All values are off by 1. */
+   if (has_width)
+      width = nir_iadd_imm(b, width, 1);
+   if (has_height)
+      height = nir_iadd_imm(b, height, 1);
+   if (has_depth)
+      depth = nir_iadd_imm(b, depth, 1);
+
+   if (is_array) {
+      layers = nir_isub(b, last_array, base_array);
+      layers = nir_iadd_imm(b, layers, 1);
+   }
+
+   /* Minify the dimensions according to base_level + lod. */
+   if (dim != GLSL_SAMPLER_DIM_MS && dim != GLSL_SAMPLER_DIM_RECT) {
+      nir_ssa_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
+      nir_ssa_def *level = lod ? nir_iadd(b, base_level, lod->ssa) : 
base_level;
+
+      if (has_width)
+         width = nir_ushr(b, width, level);
+      if (has_height)
+         height = nir_ushr(b, height, level);
+      if (has_depth)
+         depth = nir_ushr(b, depth, level);
+
+      /* 1D and square texture can't have 0 size unless the lod is 
out-of-bounds, which is
+       * undefined. Only non-square targets can have one of the sizes 0 with 
an in-bounds lod
+       * after minification.
+       */
+      if (has_width && has_height) {
+         if (has_width)
+            width = nir_umax(b, width, nir_imm_int(b, 1));
+         if (has_height)
+            height = nir_umax(b, height, nir_imm_int(b, 1));
+         if (has_depth)
+            depth = nir_umax(b, depth, nir_imm_int(b, 1));
+      }
+   }
+
+   nir_ssa_def *result = NULL;
+
+   /* Construct the result. */
+   switch (dim) {
+   case GLSL_SAMPLER_DIM_1D:
+      result = is_array ? nir_vec2(b, width, layers) : width;
+      break;
+   case GLSL_SAMPLER_DIM_CUBE:
+      result = is_array ? nir_vec3(b, height, height, layers) : nir_vec2(b, 
height, height);
+      break;
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_MS:
+   case GLSL_SAMPLER_DIM_RECT:
+      result = is_array ? nir_vec3(b, width, height, layers) : nir_vec2(b, 
width, height);
+      break;
+   case GLSL_SAMPLER_DIM_3D:
+      result = nir_vec3(b, width, height, depth);
+      break;
+   default:
+      unreachable("invalid sampler dim");
+   }
+
+   return handle_null_desc(b, desc, result);
+}
+
+static bool lower_resinfo(nir_builder *b, nir_instr *instr, void *data)
+{
+   enum amd_gfx_level gfx_level = *(enum amd_gfx_level*)data;
+   nir_ssa_def *result = NULL, *dst = NULL;
+
+   if (instr->type == nir_instr_type_intrinsic) {
+      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+      const struct glsl_type *type;
+      enum glsl_sampler_dim dim;
+      bool is_array;
+      nir_ssa_def *desc = NULL;
+
+      dst = &intr->dest.ssa;
+      b->cursor = nir_before_instr(instr);
+
+      switch (intr->intrinsic) {
+      case nir_intrinsic_image_size:
+      case nir_intrinsic_image_samples:
+         dim = nir_intrinsic_image_dim(intr);
+         is_array = nir_intrinsic_image_array(intr);
+         desc = nir_image_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 
8,
+                                         32, intr->src[0].ssa);
+         break;
+
+      case nir_intrinsic_image_deref_size:
+      case nir_intrinsic_image_deref_samples:
+         type = nir_instr_as_deref(intr->src[0].ssa->parent_instr)->type;
+         dim = glsl_get_sampler_dim(type);
+         is_array = glsl_sampler_type_is_array(type);
+         desc = nir_image_deref_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF 
? 4 : 8,
+                                               32, intr->src[0].ssa);
+         break;
+
+      case nir_intrinsic_bindless_image_size:
+      case nir_intrinsic_bindless_image_samples:
+         dim = nir_intrinsic_image_dim(intr);
+         is_array = nir_intrinsic_image_array(intr);
+         desc = nir_bindless_image_descriptor_amd(b, dim == 
GLSL_SAMPLER_DIM_BUF ? 4 : 8,
+                                                  32, intr->src[0].ssa);
+         break;
+
+      default:
+         return false;
+      }
+
+      switch (intr->intrinsic) {
+      case nir_intrinsic_image_size:
+      case nir_intrinsic_image_deref_size:
+      case nir_intrinsic_bindless_image_size:
+         result = lower_query_size(b, desc, NULL, dim, is_array, gfx_level);
+         break;
+
+      case nir_intrinsic_image_samples:
+      case nir_intrinsic_image_deref_samples:
+      case nir_intrinsic_bindless_image_samples:
+         result = query_samples(b, desc, dim);
+         break;
+
+      default:
+         assert(!desc);
+         return false;
+      }
+   } else if (instr->type == nir_instr_type_tex) {
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      nir_tex_instr *new_tex;
+      nir_ssa_def *desc = NULL;
+      nir_src *lod = NULL;
+
+      dst = &tex->dest.ssa;
+      b->cursor = nir_before_instr(instr);
+
+      switch (tex->op) {
+      case nir_texop_txs:
+      case nir_texop_query_levels:
+      case nir_texop_texture_samples:
+         for (unsigned i = 0; i < tex->num_srcs; i++) {
+            switch (tex->src[i].src_type) {
+            case nir_tex_src_texture_deref:
+            case nir_tex_src_texture_handle:
+               new_tex = nir_tex_instr_create(b->shader, 1);
+               new_tex->op = nir_texop_descriptor_amd;
+               new_tex->sampler_dim = tex->sampler_dim;
+               new_tex->is_array = tex->is_array;
+               new_tex->texture_index = tex->texture_index;
+               new_tex->sampler_index = tex->sampler_index;
+               new_tex->dest_type = nir_type_int32;
+               nir_src_copy(&new_tex->src[0].src, &tex->src[i].src);
+               new_tex->src[0].src_type = tex->src[i].src_type;
+               nir_ssa_dest_init(&new_tex->instr, &new_tex->dest,
+                                 nir_tex_instr_dest_size(new_tex), 32, NULL);
+               nir_builder_instr_insert(b, &new_tex->instr);
+               desc = &new_tex->dest.ssa;
+               break;
+
+            case nir_tex_src_lod:
+               lod = &tex->src[i].src;
+               break;
+
+            default:;
+            }
+         }
+
+         switch (tex->op) {
+         case nir_texop_txs:
+            result = lower_query_size(b, desc, lod, tex->sampler_dim, 
tex->is_array,
+                                      gfx_level);
+            break;
+         case nir_texop_query_levels:
+            result = query_levels(b, desc);
+            break;
+         case nir_texop_texture_samples:
+            result = query_samples(b, desc, tex->sampler_dim);
+            break;
+         default:
+            unreachable("shouldn't get here");
+         }
+         break;
+
+      default:
+         return false;
+      }
+   }
+
+   if (!result)
+      return false;
+
+   nir_ssa_def_rewrite_uses_after(dst, result, instr);
+   nir_instr_remove(instr);
+   return true;
+}
+
+bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level)
+{
+   return nir_shader_instructions_pass(nir, lower_resinfo,
+                                       nir_metadata_dominance |
+                                       nir_metadata_block_index,
+                                       &gfx_level);
+}
diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build
index 0b511b534a0..1d72ce7f0f1 100644
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@@ -96,6 +96,7 @@ amd_common_files = files(
   'ac_nir_cull.c',
   'ac_nir_lower_esgs_io_to_mem.c',
   'ac_nir_lower_global_access.c',
+  'ac_nir_lower_resinfo.c',
   'ac_nir_lower_taskmesh_io_to_mem.c',
   'ac_nir_lower_tess_io_to_mem.c',
   'ac_nir_lower_ngg.c',

Reply via email to