This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 9a6b5ca197bc61581239752933c6e3319f4775d1
Author:     Lynne <[email protected]>
AuthorDate: Tue May 26 11:34:40 2026 +0900
Commit:     Lynne <[email protected]>
CommitDate: Tue May 26 17:47:04 2026 +0900

    vulkan/ffv1_enc_rct_search: fix slice dimension iterations
    
    This was a mess, we were using incorrect pixels outside of the image 
boundaries as
    valid, the iteration had undefined behaviour since it was non-uniform 
across the workgroup.
    
    Calculate the per-invoc iterations from the slice dimensions instead, 
making all of
    them identical. And add a valid flag to decide whether to use them or not. 
And fix the
    synchronization.
    
    Sponsored-by: Sovereign Tech Fund
---
 libavcodec/vulkan/ffv1_enc_rct_search.comp.glsl | 46 ++++++++++++++++++-------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/libavcodec/vulkan/ffv1_enc_rct_search.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_rct_search.comp.glsl
index dc25f50831..4c4330f802 100644
--- a/libavcodec/vulkan/ffv1_enc_rct_search.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc_rct_search.comp.glsl
@@ -22,6 +22,7 @@
 
 #pragma shader_stage(compute)
 #extension GL_GOOGLE_include_directive : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
 
 #define ENCODE
 #define SB_QUALI
@@ -94,18 +95,25 @@ uint get_dist(ivec3 cur)
 shared uint score_cols[gl_WorkGroupSize.y] = { };
 shared uint score_mode[16] = { };
 
-void process(ivec2 pos)
+/* One scoring step: publish this lane's tx_pix to shared, then read the
+ * neighbours' to compute the prediction error. `valid` selects whether
+ * this lane has a real pixel; invalid lanes write zero into pix_buf so
+ * the cache stays well-defined while still participating in the barrier. */
+void process(ivec2 pos, bool valid, int i)
 {
-    ivec3 pix = load_components(pos);
+    ivec3 pix = valid ? load_components(pos) : ivec3(0);
+    ivec3 tx_pix = transform_sample(pix, rct_y_coeff[i]);
+    pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] =
+        valid ? tx_pix : ivec3(0);
+    barrier();
 
-    for (int i = 0; i < NUM_CHECKS; i++) {
-        ivec3 tx_pix = transform_sample(pix, rct_y_coeff[i]);
-        pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] = 
tx_pix;
-        memoryBarrierShared();
+    uint dist = 0u;
+    if (valid)
+        dist = get_dist(tx_pix);
 
-        uint dist = get_dist(tx_pix);
-        atomicAdd(score_mode[i], dist);
-    }
+    uint sum = subgroupAdd(dist);
+    if (subgroupElect())
+        atomicAdd(score_mode[i], sum);
 }
 
 void coeff_search(uint slice_idx)
@@ -120,9 +128,23 @@ void coeff_search(uint slice_idx)
     uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
                            gl_NumWorkGroups.y, 0);
 
-    for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += 
gl_WorkGroupSize.y) {
-        for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += 
gl_WorkGroupSize.x) {
-            process(ivec2(x, y));
+    /* Uniform iteration: every lane in the workgroup runs the same number
+     * of tile iterations so that the in-process barrier is always reached
+     * by all lanes. Lanes outside the slice extents pass valid=false. */
+    uint sw = sxe - sxs;
+    uint sh = sye - sys;
+    uint n_xi = (sw + gl_WorkGroupSize.x - 1u) / gl_WorkGroupSize.x;
+    uint n_yi = (sh + gl_WorkGroupSize.y - 1u) / gl_WorkGroupSize.y;
+
+    for (uint yi = 0u; yi < n_yi; yi++) {
+        uint y = sys + yi*gl_WorkGroupSize.y + gl_LocalInvocationID.y;
+        for (uint xi = 0u; xi < n_xi; xi++) {
+            uint x = sxs + xi*gl_WorkGroupSize.x + gl_LocalInvocationID.x;
+            bool valid = (x < sxe) && (y < sye);
+            for (int i = 0; i < NUM_CHECKS; i++) {
+                process(ivec2(x, y), valid, i);
+                barrier();
+            }
         }
     }
 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to