Module: Mesa
Branch: staging/20.2
Commit: b7c3713706694815321957d9ab1c986f0872dab8
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b7c3713706694815321957d9ab1c986f0872dab8

Author: Bas Nieuwenhuizen <[email protected]>
Date:   Wed Sep 30 13:07:43 2020 +0200

radv: Use atomics to read query results.

The volatile pattern gives me flaky results for 32-bit builds on
ChromeOS Android. This is because on 32-bit the volatile 64-bit
loads gets split into 2 32-bit loads each.

So if we read the lower dword first and then the upper dword, it
can happen that the upper dword is already changed but the lower
dword isn't yet. In particular for occlusion queries this gives
false readings, as the upper dword commonly only constains the
ready bit.

With the GCC atomic intrinsics we get a call to __atomic_load_8
in libatomic.so which does the right thing.

An alternative fix would be to  explicitly split the 32-bit loads
in the right order and do a bunch of retries if things change, though
that gets messy quickly and for 32-bit builds only doesn't feel worth
it that much.

CC: mesa-stable
Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6933>
(cherry picked from commit 7568c97df14f8702efcc5691cd8c2fff8f9bff49)

---

 .pick_status.json           |  2 +-
 src/amd/vulkan/radv_query.c | 42 ++++++++++++++++++++++--------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index d86b1d965cd..6cc5966ef96 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -427,7 +427,7 @@
         "description": "radv: Use atomics to read query results.",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 1331028abb0..366841d88c0 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1376,31 +1376,32 @@ VkResult radv_GetQueryPoolResults(
 
                switch (pool->type) {
                case VK_QUERY_TYPE_TIMESTAMP: {
-                       volatile uint64_t const *src64 = (volatile uint64_t 
const *)src;
-                       available = *src64 != TIMESTAMP_NOT_READY;
+                       uint64_t const *src64 = (uint64_t const *)src;
+                       uint64_t value;
 
-                       if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-                               while (*src64 == TIMESTAMP_NOT_READY)
-                                       ;
-                               available = true;
-                       }
+                       do {
+                               value = p_atomic_read(src64);
+                       } while (value == TIMESTAMP_NOT_READY &&
+                                (flags & VK_QUERY_RESULT_WAIT_BIT));
+
+                       available = value != TIMESTAMP_NOT_READY;
 
                        if (!available && !(flags & 
VK_QUERY_RESULT_PARTIAL_BIT))
                                result = VK_NOT_READY;
 
                        if (flags & VK_QUERY_RESULT_64_BIT) {
                                if (available || (flags & 
VK_QUERY_RESULT_PARTIAL_BIT))
-                                       *(uint64_t*)dest = *src64;
+                                       *(uint64_t*)dest = value;
                                dest += 8;
                        } else {
                                if (available || (flags & 
VK_QUERY_RESULT_PARTIAL_BIT))
-                                       *(uint32_t*)dest = *(volatile 
uint32_t*)src;
+                                       *(uint32_t*)dest = (uint32_t)value;
                                dest += 4;
                        }
                        break;
                }
                case VK_QUERY_TYPE_OCCLUSION: {
-                       volatile uint64_t const *src64 = (volatile uint64_t 
const *)src;
+                       uint64_t const *src64 = (uint64_t const *)src;
                        uint32_t db_count = 
device->physical_device->rad_info.num_render_backends;
                        uint32_t enabled_rb_mask = 
device->physical_device->rad_info.enabled_rb_mask;
                        uint64_t sample_count = 0;
@@ -1413,8 +1414,8 @@ VkResult radv_GetQueryPoolResults(
                                        continue;
 
                                do {
-                                       start = src64[2 * i];
-                                       end = src64[2 * i + 1];
+                                       start = p_atomic_read(src64 + 2 * i);
+                                       end = p_atomic_read(src64 + 2 * i + 1);
                                } while ((!(start & (1ull << 63)) || !(end & 
(1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT));
 
                                if (!(start & (1ull << 63)) || !(end & (1ull << 
63)))
@@ -1439,16 +1440,17 @@ VkResult radv_GetQueryPoolResults(
                        break;
                }
                case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
-                       if (flags & VK_QUERY_RESULT_WAIT_BIT)
-                               while(!*(volatile uint32_t*)(pool->ptr + 
pool->availability_offset + 4 * query))
-                                       ;
-                       available = *(volatile uint32_t*)(pool->ptr + 
pool->availability_offset + 4 * query);
+                       const uint32_t *avail_ptr = (const uint32_t*)(pool->ptr 
+ pool->availability_offset + 4 * query);
+
+                       do {
+                               available = p_atomic_read(avail_ptr);
+                       } while (!available && (flags & 
VK_QUERY_RESULT_WAIT_BIT));
 
                        if (!available && !(flags & 
VK_QUERY_RESULT_PARTIAL_BIT))
                                result = VK_NOT_READY;
 
-                       const volatile uint64_t *start = (uint64_t*)src;
-                       const volatile uint64_t *stop = (uint64_t*)(src + 
pipelinestat_block_size);
+                       const uint64_t *start = (uint64_t*)src;
+                       const uint64_t *stop = (uint64_t*)(src + 
pipelinestat_block_size);
                        if (flags & VK_QUERY_RESULT_64_BIT) {
                                uint64_t *dst = (uint64_t*)dest;
                                dest += 
util_bitcount(pool->pipeline_stats_mask) * 8;
@@ -1476,7 +1478,7 @@ VkResult radv_GetQueryPoolResults(
                        break;
                }
                case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
-                       volatile uint64_t const *src64 = (volatile uint64_t 
const *)src;
+                       uint64_t const *src64 = (uint64_t const *)src;
                        uint64_t num_primitives_written;
                        uint64_t primitive_storage_needed;
 
@@ -1488,7 +1490,7 @@ VkResult radv_GetQueryPoolResults(
                         */
                        available = 1;
                        for (int j = 0; j < 4; j++) {
-                               if (!(src64[j] & 0x8000000000000000UL))
+                               if (!(p_atomic_read(src64 + j) & 
0x8000000000000000UL))
                                        available = 0;
                        }
 

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to