Module: Mesa
Branch: main
Commit: e83ec595df652d6e156bfc45db646ec2af2441f1
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e83ec595df652d6e156bfc45db646ec2af2441f1

Author: Samuel Pitoiset <[email protected]>
Date:   Mon Oct 31 13:35:26 2022 +0100

radv: use 32-bit GDS counters for primitives generated query with NGG

Do not need to use 64-bit operations.

Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19410>

---

 src/amd/vulkan/radv_query.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index c12a460f03b..5fd4617d9ba 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -626,9 +626,9 @@ build_pg_query_shader(struct radv_device *device)
     *  if (avail & 0x80000000) {
     *          result = src_data[2] - src_data[0];
     *          if (use_gds) {
-    *                  uint64_t ngg_gds_result = 0;
-    *                  ngg_gds_result += src_data[5] - src_data[4];
-    *                  result += ngg_gds_result;
+    *                  uint32_t ngg_gds_result = 0;
+    *                  ngg_gds_result += src_data[9] - src_data[8];
+    *                  result += (uint64_t)ngg_gds_result;
     *          }
     *          available = true;
     *  }
@@ -702,13 +702,13 @@ build_pg_query_shader(struct radv_device *device)
    nir_push_if(&b, nir_i2b(&b, uses_gds));
    {
       nir_ssa_def *gds_start =
-         nir_load_ssbo(&b, 1, 64, src_buf, nir_iadd(&b, input_base, 
nir_imm_int(&b, 32)), .align_mul = 8);
+         nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd(&b, input_base, 
nir_imm_int(&b, 32)), .align_mul = 4);
       nir_ssa_def *gds_end =
-         nir_load_ssbo(&b, 1, 64, src_buf, nir_iadd(&b, input_base, 
nir_imm_int(&b, 40)), .align_mul = 8);
+         nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd(&b, input_base, 
nir_imm_int(&b, 36)), .align_mul = 4);
 
       nir_ssa_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start);
 
-      nir_store_var(&b, result, nir_iadd(&b, nir_load_var(&b, result), 
ngg_gds_result), 0x1);
+      nir_store_var(&b, result, nir_iadd(&b, nir_load_var(&b, result), 
nir_u2u64(&b, ngg_gds_result)), 0x1);
    }
    nir_pop_if(&b, NULL);
 
@@ -1121,8 +1121,8 @@ radv_CreateQueryPool(VkDevice _device, const 
VkQueryPoolCreateInfo *pCreateInfo,
    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
       pool->stride = 32;
       if (pool->uses_gds) {
-         /* When the query pool needs GDS, allocate 2x64-bit values for 
begin/end. */
-         pool->stride += 8 * 2;
+         /* When the query pool needs GDS, allocate 2x32-bit values for 
begin/end. */
+         pool->stride += 4 * 2;
       }
       break;
    case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
@@ -1385,8 +1385,10 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
          primitive_storage_needed = src64[2] - src64[0];
 
          if (pool->uses_gds) {
+            uint32_t const *src32 = (uint32_t const *)src;
+
             /* Accumulate the result that was copied from GDS in case NGG 
shader has been used. */
-            primitive_storage_needed += src64[5] - src64[4];
+            primitive_storage_needed += src32[9] - src32[8];
          }
 
          if (flags & VK_QUERY_RESULT_64_BIT) {
@@ -1963,7 +1965,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct 
radv_query_pool *pool,
 
       if (pool->uses_gds) {
          /* generated prim counter */
-         gfx10_copy_gds_query(cmd_buffer, 4 + index * 4, va + 40);
+         gfx10_copy_gds_query(cmd_buffer, 4 + index * 4, va + 36);
 
          cmd_buffer->state.active_prims_gen_gds_queries--;
       }

Reply via email to