This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 58bd5ad6309c9a0aa27b9aad8bad6dc1415bcb36
Author:     Lynne <[email protected]>
AuthorDate: Wed Dec 24 04:10:39 2025 +0100
Commit:     Lynne <[email protected]>
CommitDate: Wed Dec 31 15:00:47 2025 +0100

    vulkan/prores_raw_idct: use the same prores_idct method for loading coeffs
    
    This saves 2 barriers.
    Also implement workbank avoidance.
---
 libavcodec/vulkan/prores_raw_idct.comp | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/libavcodec/vulkan/prores_raw_idct.comp 
b/libavcodec/vulkan/prores_raw_idct.comp
index ffd71d1d73..c9850d17d7 100644
--- a/libavcodec/vulkan/prores_raw_idct.comp
+++ b/libavcodec/vulkan/prores_raw_idct.comp
@@ -63,30 +63,32 @@ void main(void)
     uint8_t qmat_buf[64] = qmat;
 
     [[unroll]]
-    for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x) {
-        int v = int(imageLoad(dst, offs + 2*ivec2(BLOCK_ID*8, 0) + 
scan[i])[0]);
+    for (uint y = 0; y < 8; y++) {
+        uint block_off = y*8 + ROW_ID;
+        int v = int(imageLoad(dst, offs + 2*ivec2(BLOCK_ID*8, 0) + 
scan[block_off])[0]);
         float vf = float(sign_extend(v, 16)) / 32768.0;
-        vf *= qmat_buf[i] * qscale;
-        blocks[BLOCK_ID][COMP_ID*64 + i] = (vf / (64*4.56)) *
-                                           idct_scale[i];
+        vf *= qmat_buf[block_off] * qscale;
+        blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID] = (vf / (64*4.56)) *
+                                                      idct_scale[block_off];
     }
 
+    /* Column-wise iDCT */
+    idct8(BLOCK_ID, COMP_ID*72 + ROW_ID, 9);
     barrier();
-    idct8(BLOCK_ID, COMP_ID*64 + ROW_ID*8, 1);
 
-    blocks[BLOCK_ID][COMP_ID*64 + ROW_ID] += 0.5;
+    blocks[BLOCK_ID][COMP_ID*72 + ROW_ID * 9] += 0.5f;
 
+    /* Row-wise iDCT */
+    idct8(BLOCK_ID, COMP_ID*72 + ROW_ID * 9, 1);
     barrier();
-    idct8(BLOCK_ID, COMP_ID*64 + ROW_ID, 8);
 
-    barrier();
     [[unroll]]
-    for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x) {
-        int v = int(round(blocks[BLOCK_ID][COMP_ID*64 + i]*4095.0));
+    for (uint y = 0; y < 8; y++) {
+        int v = int(round(blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID]*4095.0));
         v = clamp(v, 0, 4095);
         v <<= 4;
         imageStore(dst,
-                   offs + 2*ivec2(BLOCK_ID*8 + (i & 7), i >> 3),
+                   offs + 2*ivec2(BLOCK_ID*8 + ROW_ID, y),
                    ivec4(v));
     }
 }

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to