Mesa (main): radeonsi: convert "gfx11_create_sh_query_result_cs" shader to nir

GitLab Mirror Tue, 02 Jan 2024 03:07:41 -0800

Module: Mesa
Branch: main
Commit: f119f34742c41bff7d77ab69cc171a3ac68e2f97
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f119f34742c41bff7d77ab69cc171a3ac68e2f97


Author: Ganesh Belgur Ramachandra <[email protected]>
Date:   Tue Nov 21 02:55:54 2023 -0600

radeonsi: convert "gfx11_create_sh_query_result_cs" shader to nir

Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25972>

---

 src/gallium/drivers/radeonsi/meson.build         |   1 -
 src/gallium/drivers/radeonsi/si_pipe.h           |   2 -
 src/gallium/drivers/radeonsi/si_shaderlib_nir.c  | 252 +++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c | 228 --------------------
 4 files changed, 252 insertions(+), 231 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/meson.build 
b/src/gallium/drivers/radeonsi/meson.build
index 29afa87c0b6..3c3903044f4 100644
--- a/src/gallium/drivers/radeonsi/meson.build
+++ b/src/gallium/drivers/radeonsi/meson.build
@@ -74,7 +74,6 @@ files_libradeonsi = files(
   'si_shader_internal.h',
   'si_shader_nir.c',
   'si_shaderlib_nir.c',
-  'si_shaderlib_tgsi.c',
   'si_sqtt.c',
   'si_state.c',
   'si_state.h',
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index eabfe3a1883..af3fd4151d9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1674,8 +1674,6 @@ void *si_clear_render_target_shader(struct si_context 
*sctx, enum pipe_texture_t
 void *si_clear_12bytes_buffer_shader(struct si_context *sctx);
 void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, 
bool is_array);
 void *si_create_query_result_cs(struct si_context *sctx);
-
-/* si_shaderlib_tgsi.c */
 void *gfx11_create_sh_query_result_cs(struct si_context *sctx);
 
 /* gfx11_query.c */
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c 
b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
index 5d65e9caa39..1ab8a0a5a65 100644
--- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
@@ -8,6 +8,7 @@
 #define AC_SURFACE_INCLUDE_NIR
 #include "ac_surface.h"
 #include "si_pipe.h"
+#include "si_query.h"
 
 #include "nir_format_convert.h"
 
@@ -1253,3 +1254,254 @@ void *si_create_query_result_cs(struct si_context *sctx)
 
    return create_shader_state(sctx, b.shader);
 }
+
+/* Create the compute shader that is used to collect the results of gfx10+
+ * shader queries.
+ *
+ * One compute grid with a single thread is launched for every query result
+ * buffer. The thread (optionally) reads a previous summary buffer, then
+ * accumulates data from the query result buffer, and writes the result either
+ * to a summary buffer to be consumed by the next grid invocation or to the
+ * user-supplied buffer.
+ *
+ * Data layout:
+ *
+ * CONST
+ *  0.x = config;
+ *          [0:2] the low 3 bits indicate the mode:
+ *             0: sum up counts
+ *             1: determine result availability and write it as a boolean
+ *             2: SO_OVERFLOW
+ *          3: SO_ANY_OVERFLOW
+ *        the remaining bits form a bitfield:
+ *          8: write result as a 64-bit value
+ *  0.y = offset in bytes to counts or stream for SO_OVERFLOW mode
+ *  0.z = chain bit field:
+ *          1: have previous summary buffer
+ *          2: write next summary buffer
+ *  0.w = result_count
+ */
+void *gfx11_create_sh_query_result_cs(struct si_context *sctx)
+{
+   const nir_shader_compiler_options *options =
+   sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, 
PIPE_SHADER_COMPUTE);
+
+   nir_builder b =
+      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, 
"gfx11_create_sh_query_result_cs");
+   b.shader->info.workgroup_size[0] = 1;
+   b.shader->info.workgroup_size[1] = 1;
+   b.shader->info.workgroup_size[2] = 1;
+   b.shader->info.num_ubos = 1;
+   b.shader->info.num_ssbos = 3;
+   b.shader->num_uniforms = 1;
+
+   nir_def *zero = nir_imm_int(&b, 0);
+   nir_def *one = nir_imm_int(&b, 1);
+   nir_def *two = nir_imm_int(&b, 2);
+   nir_def *four = nir_imm_int(&b, 4);
+   nir_def *minus_one = nir_imm_int(&b, 0xffffffff);
+
+   /* uint32_t acc_result = 0, acc_missing = 0; */
+   nir_function_impl *e = nir_shader_get_entrypoint(b.shader);
+   nir_variable *acc_result = nir_local_variable_create(e, glsl_uint_type(), 
"acc_result");
+   nir_store_var(&b, acc_result, zero, 0x1);
+   nir_variable *acc_missing = nir_local_variable_create(e, glsl_uint_type(), 
"acc_missing");
+   nir_store_var(&b, acc_missing, zero, 0x1);
+
+   /* uint32_t buff_0[4] = load_ubo(0, 0); */
+   nir_def *buff_0 = nir_load_ubo(&b, 4, 32, zero, zero, .range_base = 0, 
.range = 16);
+
+   /* if((chain & 1) {
+    *    uint32_t result[2] = load_ssbo(1, 0);
+    *    acc_result = result[0];
+    *    acc_missing = result[1];
+    * }
+    */
+   nir_def *is_prev_summary_buffer = nir_i2b(&b, nir_iand(&b, nir_channel(&b, 
buff_0, 2), one));
+   nir_if *if_prev_summary_buffer = nir_push_if(&b, is_prev_summary_buffer); {
+      nir_def *result = nir_load_ssbo(&b, 2, 32, one, zero);
+         nir_store_var(&b, acc_result, nir_channel(&b, result, 0), 0x1);
+         nir_store_var(&b, acc_missing, nir_channel(&b, result, 1), 0x1);
+   }
+   nir_pop_if(&b, if_prev_summary_buffer);
+
+   /* uint32_t mode = config & 0b111;
+    * bool is_overflow = mode >= 2;
+    */
+   nir_def *mode = nir_iand_imm(&b, nir_channel(&b, buff_0, 0), 0b111);
+   nir_def *is_overflow = nir_uge(&b, mode, two);
+
+   /* uint32_t result_remaining = (is_overflow && acc_result) ? 0 : 
result_count; */
+   nir_variable *result_remaining = nir_local_variable_create(e, 
glsl_uint_type(), "result_remaining");
+   nir_variable *base_offset = nir_local_variable_create(e, glsl_uint_type(), 
"base_offset");
+   nir_def *state = nir_iand(&b,
+                             nir_isub(&b, zero, nir_b2i32(&b, is_overflow)),
+                             nir_load_var(&b, acc_result));
+   nir_def *value = nir_bcsel(&b, nir_i2b(&b, state), zero, nir_channel(&b, 
buff_0, 3));
+   nir_store_var(&b, result_remaining, value, 0x1);
+
+   /* uint32_t base_offset = 0; */
+   nir_store_var(&b, base_offset, zero, 0x1);
+
+   /* Outer loop begin.
+    *   while (!result_remaining) {
+    *      ...
+    */
+   nir_loop *loop_outer = nir_push_loop(&b); {
+      nir_def *condition = nir_load_var(&b, result_remaining);
+      nir_if *if_not_condition = nir_push_if(&b, nir_ieq(&b, condition, 
zero)); {
+         nir_jump(&b, nir_jump_break);
+      }
+      nir_pop_if(&b, if_not_condition);
+
+      /* result_remaining--; */
+      condition = nir_iadd(&b, condition, minus_one);
+      nir_store_var(&b, result_remaining, condition, 0x1);
+
+      /* uint32_t fence = load_ssbo(0, base_offset + 
sizeof(gfx11_sh_query_buffer_mem.stream)); */
+      nir_def *b_offset = nir_load_var(&b, base_offset);
+      uint64_t buffer_mem_stream_size = sizeof(((struct 
gfx11_sh_query_buffer_mem*)0)->stream);
+      nir_def *fence = nir_load_ssbo(&b, 1, 32, zero,
+                                    nir_iadd_imm(&b, b_offset, 
buffer_mem_stream_size));
+
+      /* if (!fence) {
+       *    acc_missing = ~0u;
+       *    break;
+       * }
+       */
+      nir_def *is_zero = nir_ieq(&b, fence, zero);
+      nir_def *y_value = nir_isub(&b, zero, nir_b2i32(&b, is_zero));
+      nir_store_var(&b, acc_missing, y_value, 0x1);
+      nir_if *if_ssbo_zero = nir_push_if(&b, is_zero); {
+         nir_jump(&b, nir_jump_break);
+      }
+      nir_pop_if(&b, if_ssbo_zero);
+
+      /* stream_offset = base_offset + offset; */
+      nir_def *s_offset = nir_iadd(&b, b_offset, nir_channel(&b, buff_0, 1));
+
+      /* if (!(config & 7)) {
+       *    acc_result += buffer[0]@stream_offset;
+       * }
+       */
+      nir_if *if_sum_up_counts = nir_push_if(&b, nir_ieq(&b, mode, zero)); {
+         nir_def *x_value = nir_load_ssbo(&b, 1, 32, zero, s_offset);
+         x_value = nir_iadd(&b, nir_load_var(&b, acc_result), x_value);
+         nir_store_var(&b, acc_result, x_value, 0x1);
+      }
+      nir_pop_if(&b, if_sum_up_counts);
+
+      /* if (is_overflow) {
+       *    uint32_t count = (config & 1) ? 4 : 1;
+       *    ...
+       */
+      nir_if *if_overflow = nir_push_if(&b, is_overflow); {
+         nir_def *is_result_available = nir_i2b(&b, nir_iand(&b, mode, one));
+         nir_def *initial_count = nir_bcsel(&b, is_result_available, four, 
one);
+
+         nir_variable *count =
+            nir_local_variable_create(e, glsl_uint_type(), "count");
+         nir_store_var(&b, count, initial_count, 0x1);
+
+         nir_variable *stream_offset =
+            nir_local_variable_create(e, glsl_uint_type(), "stream_offset");
+         nir_store_var(&b, stream_offset, s_offset, 0x1);
+
+         /* Inner loop begin.
+          *    do {
+          *       ...
+          */
+         nir_loop *loop_inner = nir_push_loop(&b); {
+            /* uint32_t buffer[4] = load_ssbo(0, stream_offset + 2 * 
sizeof(uint64_t)); */
+            nir_def *stream_offset_value = nir_load_var(&b, stream_offset);
+            nir_def *buffer =
+               nir_load_ssbo(&b, 4, 32, zero,
+                             nir_iadd_imm(&b, stream_offset_value, 2 * 
sizeof(uint64_t)));
+
+            /* if (generated != emitted) {
+             *    acc_result = 1;
+             *    base_offset = 0;
+             *    break;
+             * }
+             */
+            nir_def *generated = nir_channel(&b, buffer, 0);
+            nir_def *emitted = nir_channel(&b, buffer, 2);
+            nir_if *if_not_equal = nir_push_if(&b, nir_ine(&b, generated, 
emitted)); {
+               nir_store_var(&b, acc_result, one, 0x1);
+               nir_store_var(&b, base_offset, zero, 0x1);
+               nir_jump(&b, nir_jump_break);
+            }
+            nir_pop_if(&b, if_not_equal);
+
+            /* stream_offset += sizeof(gfx11_sh_query_buffer_mem.stream[0]); */
+            uint64_t buffer_mem_stream0_size =
+               sizeof(((struct gfx11_sh_query_buffer_mem*)0)->stream[0]);
+            stream_offset_value = nir_iadd_imm(&b, stream_offset_value, 
buffer_mem_stream0_size);
+            nir_store_var(&b, stream_offset, stream_offset_value, 0x1);
+
+            /* } while(count--); */
+            nir_def *loop_count = nir_load_var(&b, count);
+            loop_count = nir_iadd(&b, loop_count, minus_one);
+            nir_store_var(&b, count, loop_count, 0x1);
+
+            nir_if *if_zero = nir_push_if(&b, nir_ieq(&b, loop_count, zero)); {
+               nir_jump(&b, nir_jump_break);
+            }
+            nir_pop_if(&b, if_zero);
+         }
+         nir_pop_loop(&b, loop_inner); /* Inner loop end */
+      }
+      nir_pop_if(&b, if_overflow);
+
+      /* base_offset += sizeof(gfx11_sh_query_buffer_mem); */
+      nir_def *buffer_mem_size = nir_imm_int(&b, sizeof(struct 
gfx11_sh_query_buffer_mem));
+      nir_store_var(&b, base_offset, nir_iadd(&b, nir_load_var(&b, 
base_offset), buffer_mem_size), 0x1);
+   }
+   nir_pop_loop(&b, loop_outer); /* Outer loop end */
+
+   nir_def *acc_result_value = nir_load_var(&b, acc_result);
+   nir_def *y_value = nir_load_var(&b, acc_missing);
+
+   /* if ((chain & 2)) {
+    *    store_ssbo(<acc_result, acc_missing>, 2, 0);
+    *    ...
+    */
+   nir_def *is_write_summary_buffer = nir_i2b(&b, nir_iand(&b, nir_channel(&b, 
buff_0, 2), two));
+   nir_if *if_write_summary_buffer = nir_push_if(&b, is_write_summary_buffer); 
{
+      nir_store_ssbo(&b, nir_vec2(&b, acc_result_value, y_value), two, zero);
+   } nir_push_else(&b, if_write_summary_buffer); {
+
+      /* } else {
+       *    if ((config & 7) == 1) {
+       *       acc_result = acc_missing ? 0 : 1;
+       *       acc_missing = 0;
+       *    }
+       *    ...
+       */
+      nir_def *is_result_available = nir_ieq(&b, mode, one);
+      nir_def *is_zero = nir_ieq(&b, y_value, zero);
+      acc_result_value = nir_bcsel(&b, is_result_available, nir_b2i32(&b, 
is_zero), acc_result_value);
+      nir_def *ny = nir_bcsel(&b, is_result_available, zero, y_value);
+
+      /* if (!acc_missing) {
+       *    store_ssbo(acc_result, 2, 0);
+       *    if (config & 8)) {
+       *       store_ssbo(0, 2, 4)
+       *    }
+       * }
+       */
+      nir_if *if_zero = nir_push_if(&b, nir_ieq(&b, ny, zero)); {
+         nir_store_ssbo(&b, acc_result_value, two, zero);
+
+         nir_def *is_so_any_overflow = nir_i2b(&b, nir_iand_imm(&b, 
nir_channel(&b, buff_0, 0), 8));
+         nir_if *if_so_any_overflow = nir_push_if(&b, is_so_any_overflow); {
+            nir_store_ssbo(&b, zero, two, four);
+         }
+         nir_pop_if(&b, if_so_any_overflow);
+      }
+      nir_pop_if(&b, if_zero);
+   }
+   nir_pop_if(&b, if_write_summary_buffer);
+
+   return create_shader_state(sctx, b.shader);
+}
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c 
b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
deleted file mode 100644
index 2eab564ca26..00000000000
--- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * SPDX-License-Identifier: MIT
- */
-
-#include "si_pipe.h"
-#include "tgsi/tgsi_text.h"
-#include "tgsi/tgsi_ureg.h"
-
-/* Create the compute shader that is used to collect the results of gfx10+
- * shader queries.
- *
- * One compute grid with a single thread is launched for every query result
- * buffer. The thread (optionally) reads a previous summary buffer, then
- * accumulates data from the query result buffer, and writes the result either
- * to a summary buffer to be consumed by the next grid invocation or to the
- * user-supplied buffer.
- *
- * Data layout:
- *
- * BUFFER[0] = query result buffer (layout is defined by 
gfx10_sh_query_buffer_mem)
- * BUFFER[1] = previous summary buffer
- * BUFFER[2] = next summary buffer or user-supplied buffer
- *
- * CONST
- *  0.x = config; the low 3 bits indicate the mode:
- *          0: sum up counts
- *          1: determine result availability and write it as a boolean
- *          2: SO_OVERFLOW
- *          3: SO_ANY_OVERFLOW
- *        the remaining bits form a bitfield:
- *          8: write result as a 64-bit value
- *  0.y = offset in bytes to counts or stream for SO_OVERFLOW mode
- *  0.z = chain bit field:
- *          1: have previous summary buffer
- *          2: write next summary buffer
- *  0.w = result_count
- */
-void *gfx11_create_sh_query_result_cs(struct si_context *sctx)
-{
-   /* TEMP[0].x = accumulated result so far
-    * TEMP[0].y = result missing
-    * TEMP[0].z = whether we're in overflow mode
-    */
-   static const char text_tmpl[] =
-         "COMP\n"
-         "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n"
-         "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
-         "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
-         "DCL BUFFER[0]\n"
-         "DCL BUFFER[1]\n"
-         "DCL BUFFER[2]\n"
-         "DCL CONST[0][0..0]\n"
-         "DCL TEMP[0..5]\n"
-         "IMM[0] UINT32 {0, 7, 256, 4294967295}\n"
-         "IMM[1] UINT32 {1, 2, 4, 8}\n"
-         "IMM[2] UINT32 {16, 32, 64, 128}\n"
-
-         /* acc_result = 0;
-          * acc_missing = 0;
-          */
-         "MOV TEMP[0].xy, IMM[0].xxxx\n"
-
-         /* if (chain & 1) {
-          *    acc_result = buffer[1][0];
-          *    acc_missing = buffer[1][1];
-          * }
-          */
-         "AND TEMP[5], CONST[0][0].zzzz, IMM[1].xxxx\n"
-         "UIF TEMP[5]\n"
-         "LOAD TEMP[0].xy, BUFFER[1], IMM[0].xxxx\n"
-         "ENDIF\n"
-
-         /* is_overflow (TEMP[0].z) = (config & 7) >= 2; */
-         "AND TEMP[5].x, CONST[0][0].xxxx, IMM[0].yyyy\n"
-         "USGE TEMP[0].z, TEMP[5].xxxx, IMM[1].yyyy\n"
-
-         /* result_remaining (TEMP[1].x) = (is_overflow && acc_result) ? 0 : 
result_count; */
-         "AND TEMP[5].x, TEMP[0].zzzz, TEMP[0].xxxx\n"
-         "UCMP TEMP[1].x, TEMP[5].xxxx, IMM[0].xxxx, CONST[0][0].wwww\n"
-
-         /* base_offset (TEMP[1].y) = 0; */
-         "MOV TEMP[1].y, IMM[0].xxxx\n"
-
-         /* for (;;) {
-          *    if (!result_remaining) {
-          *       break;
-          *    }
-          *    result_remaining--;
-          */
-         "BGNLOOP\n"
-         "  USEQ TEMP[5], TEMP[1].xxxx, IMM[0].xxxx\n"
-         "  UIF TEMP[5]\n"
-         "     BRK\n"
-         "  ENDIF\n"
-         "  UADD TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww\n"
-
-         /*    fence = buffer[0]@(base_offset + 
sizeof(gfx10_sh_query_buffer_mem.stream)); */
-         "  UADD TEMP[5].x, TEMP[1].yyyy, IMM[2].wwww\n"
-         "  LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
-
-         /*    if (!fence) {
-          *       acc_missing = ~0u;
-          *       break;
-          *    }
-          */
-         "  USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n"
-         "  UIF TEMP[5]\n"
-         "     MOV TEMP[0].y, TEMP[5].xxxx\n"
-         "     BRK\n"
-         "  ENDIF\n"
-
-         /*    stream_offset (TEMP[2].x) = base_offset + offset; */
-         "  UADD TEMP[2].x, TEMP[1].yyyy, CONST[0][0].yyyy\n"
-
-         /*    if (!(config & 7)) {
-          *       acc_result += buffer[0]@stream_offset;
-          *    }
-          */
-         "  AND TEMP[5].x, CONST[0][0].xxxx, IMM[0].yyyy\n"
-         "  USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n"
-         "  UIF TEMP[5]\n"
-         "     LOAD TEMP[5].x, BUFFER[0], TEMP[2].xxxx\n"
-         "     UADD TEMP[0].x, TEMP[0].xxxx, TEMP[5].xxxx\n"
-         "  ENDIF\n"
-
-         /*    if ((config & 7) >= 2) {
-          *       count (TEMP[2].y) = (config & 1) ? 4 : 1;
-          */
-         "  AND TEMP[5].x, CONST[0][0].xxxx, IMM[0].yyyy\n"
-         "  USGE TEMP[5], TEMP[5].xxxx, IMM[1].yyyy\n"
-         "  UIF TEMP[5]\n"
-         "     AND TEMP[5].x, CONST[0][0].xxxx, IMM[1].xxxx\n"
-         "     UCMP TEMP[2].y, TEMP[5].xxxx, IMM[1].zzzz, IMM[1].xxxx\n"
-
-         /*       do {
-          *          generated = buffer[0]@(stream_offset + 2 * 
sizeof(uint64_t));
-          *          emitted = buffer[0]@(stream_offset + 3 * 
sizeof(uint64_t));
-          *          if (generated != emitted) {
-          *             acc_result = 1;
-          *             result_remaining = 0;
-          *             break;
-          *          }
-          *
-          *          stream_offset += 
sizeof(gfx10_sh_query_buffer_mem.stream[0]);
-          *       } while (--count);
-          *    }
-          */
-         "     BGNLOOP\n"
-         "        UADD TEMP[5].x, TEMP[2].xxxx, IMM[2].xxxx\n"
-         "        LOAD TEMP[4].xyzw, BUFFER[0], TEMP[5].xxxx\n"
-         "        USNE TEMP[5], TEMP[4].xyxy, TEMP[4].zwzw\n"
-         "        UIF TEMP[5]\n"
-         "           MOV TEMP[0].x, IMM[1].xxxx\n"
-         "           MOV TEMP[1].y, IMM[0].xxxx\n"
-         "           BRK\n"
-         "        ENDIF\n"
-
-         "        UADD TEMP[2].y, TEMP[2].yyyy, IMM[0].wwww\n"
-         "        USEQ TEMP[5], TEMP[2].yyyy, IMM[0].xxxx\n"
-         "        UIF TEMP[5]\n"
-         "           BRK\n"
-         "        ENDIF\n"
-         "        UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy\n"
-         "     ENDLOOP\n"
-         "  ENDIF\n"
-
-         /*    base_offset += sizeof(gfx10_sh_query_buffer_mem);
-          * } // end outer loop
-          */
-         "  UADD TEMP[1].y, TEMP[1].yyyy, IMM[0].zzzz\n"
-         "ENDLOOP\n"
-
-         /* if (chain & 2) {
-          *    buffer[2][0] = acc_result;
-          *    buffer[2][1] = acc_missing;
-          * } else {
-          */
-         "AND TEMP[5], CONST[0][0].zzzz, IMM[1].yyyy\n"
-         "UIF TEMP[5]\n"
-         "  STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0]\n"
-         "ELSE\n"
-
-         /*    if ((config & 7) == 1) {
-          *       acc_result = acc_missing ? 0 : 1;
-          *       acc_missing = 0;
-          *    }
-          */
-         "  AND TEMP[5], CONST[0][0].xxxx, IMM[0].yyyy\n"
-         "  USEQ TEMP[5], TEMP[5].xxxx, IMM[1].xxxx\n"
-         "  UIF TEMP[5]\n"
-         "     UCMP TEMP[0].x, TEMP[0].yyyy, IMM[0].xxxx, IMM[1].xxxx\n"
-         "     MOV TEMP[0].y, IMM[0].xxxx\n"
-         "  ENDIF\n"
-
-         /*    if (!acc_missing) {
-          *       buffer[2][0] = acc_result;
-          *       if (config & 8) {
-          *          buffer[2][1] = 0;
-          *       }
-          *    }
-          * }
-          */
-         "  USEQ TEMP[5], TEMP[0].yyyy, IMM[0].xxxx\n"
-         "  UIF TEMP[5]\n"
-         "     STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n"
-         "     AND TEMP[5], CONST[0][0].xxxx, IMM[1].wwww\n"
-         "     UIF TEMP[5]\n"
-         "        STORE BUFFER[2].x, IMM[1].zzzz, TEMP[0].yyyy\n"
-         "     ENDIF\n"
-         "  ENDIF\n"
-         "ENDIF\n"
-         "END\n";
-
-   struct tgsi_token tokens[1024];
-   struct pipe_compute_state state = {};
-
-   if (!tgsi_text_translate(text_tmpl, tokens, ARRAY_SIZE(tokens))) {
-      assert(false);
-      return NULL;
-   }
-
-   state.ir_type = PIPE_SHADER_IR_TGSI;
-   state.prog = tokens;
-
-   return sctx->b.create_compute_state(&sctx->b, &state);
-}

Mesa (main): radeonsi: convert "gfx11_create_sh_query_result_cs" shader to nir

Reply via email to