This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit c0a697a1bc02805a532bf5854478dde4a28c8d83
Author:     Lynne <[email protected]>
AuthorDate: Sun Feb 8 03:58:11 2026 +0100
Commit:     Lynne <[email protected]>
CommitDate: Thu Feb 19 19:42:30 2026 +0100

    vulkan_ffv1: use regular descriptors for slice state
    
    HUGE speedup on AMD, HUGE speedup everywhere.
---
 libavcodec/vulkan/ffv1_dec.comp.glsl              | 30 +++++++++++------
 libavcodec/vulkan/ffv1_dec_reset.comp.glsl        | 40 ++++++++++++++---------
 libavcodec/vulkan/ffv1_dec_rgb.comp.glsl          |  2 +-
 libavcodec/vulkan/ffv1_enc_golomb.comp.glsl       |  1 +
 libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl |  1 +
 libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl   |  1 +
 libavcodec/vulkan/ffv1_vlc.glsl                   |  8 ++++-
 libavcodec/vulkan_ffv1.c                          | 28 +++++++++++++---
 8 files changed, 79 insertions(+), 32 deletions(-)

diff --git a/libavcodec/vulkan/ffv1_dec.comp.glsl 
b/libavcodec/vulkan/ffv1_dec.comp.glsl
index 1f37c23b2a..720fa14cd2 100644
--- a/libavcodec/vulkan/ffv1_dec.comp.glsl
+++ b/libavcodec/vulkan/ffv1_dec.comp.glsl
@@ -33,10 +33,14 @@ layout (set = 1, binding = 1, scalar) readonly buffer 
slice_offsets_buf {
 layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf {
     uint32_t slice_status[];
 };
-layout (set = 1, binding = 3) uniform uimage2D dec[];
+layout (set = 1, binding = 4) uniform uimage2D dec[];
 
 #ifndef GOLOMB
 
+layout (set = 1, binding = 3, scalar) buffer slice_state_buf {
+    uint8_t slice_rc_state[];
+};
+
 #define READ(c, idx) get_rac_noadapt(c, idx)
 int get_isymbol(inout RangeCoder c)
 {
@@ -114,10 +118,9 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
         ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
                             quant_table_idx, extend_lookup[quant_table_idx]);
 
-        uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]);
-        u8buf cd = u8buf(uint64_t(slice_state) + context_off);
+        uint rc_off = state_off + CONTEXT_SIZE*abs(pr[0]) + 
gl_LocalInvocationID.x;
 
-        rc_state[gl_LocalInvocationID.x] = cd[gl_LocalInvocationID.x].v;
+        rc_state[gl_LocalInvocationID.x] = slice_rc_state[rc_off];
         rc_dec[gl_LocalInvocationID.x] = false;
         barrier();
 
@@ -128,7 +131,8 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
         barrier();
         uint i = gl_LocalInvocationID.x;
         if (rc_dec[i])
-            cd[i].v = zero_one_state[rc_state[i] + (rc_data[i] ? 256 : 0)];
+            slice_rc_state[rc_off] = zero_one_state[rc_state[i] +
+                                                    (rc_data[i] ? 256 : 0)];
 
         if (gl_LocalInvocationID.x == 0) {
             if (pr[0] < 0)
@@ -139,7 +143,13 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
         }
     }
 }
-#else
+
+#else /* GOLOMB */
+
+layout (set = 1, binding = 3, scalar) buffer slice_state_buf {
+    VlcState slice_vlc_state[];
+};
+
 GetBitContext gb;
 
 void golomb_init(inout SliceContext sc)
@@ -172,8 +182,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
         ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
                             quant_table_idx, extend_lookup[quant_table_idx]);
 
-        uint context_off = state_off + VLC_STATE_SIZE*abs(pr[0]);
-        VlcState sb = VlcState(uint64_t(slice_state) + context_off);
+        uint vlc_off = state_off + abs(pr[0]);
 
         if (pr[0] == 0 && run_mode == 0)
             run_mode = 1;
@@ -201,14 +210,14 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
             if (run_count < 0) {
                 run_mode  = 0;
                 run_count = 0;
-                diff = read_vlc_symbol(gb, sb, bits);
+                diff = read_vlc_symbol(gb, slice_vlc_state[vlc_off], bits);
                 if (diff >= 0)
                     diff++;
             } else {
                 diff = 0;
             }
         } else {
-            diff = read_vlc_symbol(gb, sb, bits);
+            diff = read_vlc_symbol(gb, slice_vlc_state[vlc_off], bits);
         }
 
         if (pr[0] < 0)
@@ -298,6 +307,7 @@ void decode_slice(inout SliceContext sc, const uint 
slice_idx)
                                uvec4(0, 1, 1, 2))*plane_state_size;
 
 #ifdef GOLOMB
+    slice_state_off >>= 3; // division by VLC_STATE_SIZE
     golomb_init(sc);
 #endif
 
diff --git a/libavcodec/vulkan/ffv1_dec_reset.comp.glsl 
b/libavcodec/vulkan/ffv1_dec_reset.comp.glsl
index e708d03036..1aeb196e1e 100644
--- a/libavcodec/vulkan/ffv1_dec_reset.comp.glsl
+++ b/libavcodec/vulkan/ffv1_dec_reset.comp.glsl
@@ -26,6 +26,18 @@
 #include "common.glsl"
 #include "ffv1_common.glsl"
 
+#ifdef GOLOMB
+#define PS_SHIFT 3
+layout (set = 1, binding = 1, scalar) writeonly buffer slice_state_buf {
+    VlcState slice_vlc_state[];
+};
+#else
+#define PS_SHIFT 2
+layout (set = 1, binding = 1, scalar) writeonly buffer slice_state_buf {
+    uint32_t slice_rc_state[];
+};
+#endif
+
 void main(void)
 {
     const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + 
gl_WorkGroupID.x;
@@ -34,30 +46,26 @@ void main(void)
         return;
 
     const uint8_t qidx = 
slice_ctx[slice_idx].quant_table_idx[gl_WorkGroupID.z];
+
     uint contexts = context_count[qidx];
-    uint64_t slice_state_off = uint64_t(slice_state) +
-                               slice_idx*plane_state_size*codec_planes;
+    uint plane_state_len = plane_state_size >> PS_SHIFT;
+    uint offs = slice_idx*plane_state_len*codec_planes +
+                gl_WorkGroupID.z*plane_state_len +
+                gl_LocalInvocationID.x;
 
 #ifdef GOLOMB
-    uint64_t start = slice_state_off +
-                     (gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) +
-                      gl_LocalInvocationID.x)*VLC_STATE_SIZE;
     for (uint x = gl_LocalInvocationID.x; x < contexts; x += 
gl_WorkGroupSize.x) {
-        VlcState sb = VlcState(start);
-        sb.drift     =  int16_t(0);
-        sb.error_sum = uint16_t(4);
-        sb.bias      =   int8_t(0);
-        sb.count     =  uint8_t(1);
-        start += gl_WorkGroupSize.x*VLC_STATE_SIZE;
+        slice_vlc_state[offs].drift     =  int16_t(0);
+        slice_vlc_state[offs].error_sum = uint16_t(4);
+        slice_vlc_state[offs].bias      =   int8_t(0);
+        slice_vlc_state[offs].count     =  uint8_t(1);
+        offs += gl_WorkGroupSize.x;
     }
 #else
-    uint64_t start = slice_state_off +
-                     gl_WorkGroupID.z*plane_state_size +
-                     (gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */
     uint count_total = contexts*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
     for (uint x = gl_LocalInvocationID.x; x < count_total; x += 
gl_WorkGroupSize.x) {
-        u32buf(start).v = 0x80808080;
-        start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */);
+        slice_rc_state[offs] = 0x80808080;
+        offs += gl_WorkGroupSize.x;
     }
 #endif
 }
diff --git a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl 
b/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl
index fe0d6957df..72dc31ba15 100644
--- a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl
+++ b/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl
@@ -24,7 +24,7 @@
 #extension GL_GOOGLE_include_directive : require
 #extension GL_EXT_shader_image_load_formatted : require
 
-layout (set = 1, binding = 4) writeonly uniform uimage2D dst[];
+layout (set = 1, binding = 5) writeonly uniform uimage2D dst[];
 
 #define RGB
 #include "ffv1_dec.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_enc_golomb.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_golomb.comp.glsl
index a120564602..459c65d954 100644
--- a/libavcodec/vulkan/ffv1_enc_golomb.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc_golomb.comp.glsl
@@ -23,5 +23,6 @@
 #pragma shader_stage(compute)
 #extension GL_GOOGLE_include_directive : require
 
+#define VLC_BUFFER
 #define GOLOMB
 #include "ffv1_enc.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl
index 277f88c6c3..23eca0c7ed 100644
--- a/libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl
@@ -23,5 +23,6 @@
 #pragma shader_stage(compute)
 #extension GL_GOOGLE_include_directive : require
 
+#define VLC_BUFFER
 #define GOLOMB
 #include "ffv1_enc_reset.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl
index 8efffd19e8..c7a3d17fd5 100644
--- a/libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl
@@ -23,5 +23,6 @@
 #pragma shader_stage(compute)
 #extension GL_GOOGLE_include_directive : require
 
+#define VLC_BUFFER
 #define GOLOMB
 #include "ffv1_enc_rgb.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_vlc.glsl b/libavcodec/vulkan/ffv1_vlc.glsl
index e1c6cf66de..68353ae9ce 100644
--- a/libavcodec/vulkan/ffv1_vlc.glsl
+++ b/libavcodec/vulkan/ffv1_vlc.glsl
@@ -24,7 +24,13 @@
 #define VULKAN_FFV1_VLC_H
 
 #define VLC_STATE_SIZE 8
-layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer 
VlcState {
+#ifdef VLC_BUFFER
+layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer
+#else
+struct
+#endif
+
+VlcState {
     uint32_t error_sum;
     int16_t  drift;
     int8_t   bias;
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index 3b1bce97d1..0e2cda1028 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -404,6 +404,12 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
                                     slice_state,
                                     0, fp->slice_data_size*f->slice_count,
                                     VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader,
+                                    1, 1, 0,
+                                    slice_state,
+                                    f->slice_count*fp->slice_data_size,
+                                    VK_WHOLE_SIZE,
+                                    VK_FORMAT_UNDEFINED);
 
     ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader);
     ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
@@ -458,16 +464,22 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
                                     slice_status,
                                     0, 2*f->slice_count*sizeof(uint32_t),
                                     VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
+                                    1, 3, 0,
+                                    slice_state,
+                                    f->slice_count*fp->slice_data_size,
+                                    VK_WHOLE_SIZE,
+                                    VK_FORMAT_UNDEFINED);
 
     ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
                                   decode_dst, decode_dst_view,
-                                  1, 3,
+                                  1, 4,
                                   VK_IMAGE_LAYOUT_GENERAL,
                                   VK_NULL_HANDLE);
     if (is_rgb)
         ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
                                       f->picture.f, vp->view.out,
-                                      1, 4,
+                                      1, 5,
                                       VK_IMAGE_LAYOUT_GENERAL,
                                       VK_NULL_HANDLE);
 
@@ -602,8 +614,12 @@ static int init_reset_shader(FFV1Context *f, 
FFVulkanContext *s,
             .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
             .stages = VK_SHADER_STAGE_COMPUTE_BIT,
         },
+        { /* slice_state_buf */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
     };
-    ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0);
+    ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0);
 
     if (ac == AC_GOLOMB_RICE)
         RET(ff_vk_shader_link(s, shd,
@@ -660,6 +676,10 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
             .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
             .stages = VK_SHADER_STAGE_COMPUTE_BIT,
         },
+        { /* slice_state_buf */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
         { /* dec */
             .type   = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
             .stages = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -671,7 +691,7 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
             .elems  = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
         },
     };
-    ff_vk_shader_add_descriptor_set(s, shd, desc_set, 4 + rgb, 0, 0);
+    ff_vk_shader_add_descriptor_set(s, shd, desc_set, 5 + rgb, 0, 0);
 
     if (ac == AC_GOLOMB_RICE) {
         if (rgb)

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to