This is an automated email from the git hooks/post-receive script.
Git pushed a commit to branch master
in repository ffmpeg.
The following commit(s) were added to refs/heads/master by this push:
new 3829f4ba6a vulkan/prores: reduce push constants size
3829f4ba6a is described below
commit 3829f4ba6ae1a1f3f499a585ef46d8ab231eb9bb
Author: averne <[email protected]>
AuthorDate: Tue Dec 23 14:32:49 2025 +0100
Commit: Lynne <[email protected]>
CommitDate: Sat Jan 17 17:33:31 2026 +0000
vulkan/prores: reduce push constants size
The VK specs only mandates 128B, and some platforms
don't actually implement more. This moves the quantization
matrices to the per-frame buffer.
---
libavcodec/vulkan/prores_idct.comp.glsl | 19 ++++++-----------
libavcodec/vulkan/prores_vld.comp.glsl | 3 ---
libavcodec/vulkan_prores.c | 36 +++++++++++++++++++++------------
3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/libavcodec/vulkan/prores_idct.comp.glsl
b/libavcodec/vulkan/prores_idct.comp.glsl
index ee9eddf19d..800a93db66 100644
--- a/libavcodec/vulkan/prores_idct.comp.glsl
+++ b/libavcodec/vulkan/prores_idct.comp.glsl
@@ -28,7 +28,10 @@ layout (constant_id = 0) const bool interlaced = false;
layout (set = 0, binding = 0) readonly buffer quant_idx_buf {
uint8_t quant_idx[];
};
-layout (set = 0, binding = 1) uniform uimage2D dst[];
+layout (set = 0, binding = 1) readonly buffer qmat_buf {
+ uint8_t qmat[];
+};
+layout (set = 0, binding = 2) uniform uimage2D dst[];
layout (push_constant, scalar) uniform pushConstants {
u8buf slice_data;
@@ -45,9 +48,6 @@ layout (push_constant, scalar) uniform pushConstants {
uint8_t depth;
uint8_t alpha_info;
uint8_t bottom_field;
-
- uint8_t qmat_luma [8*8];
- uint8_t qmat_chroma[8*8];
};
uint get_px(uint tex_idx, ivec2 pos)
@@ -79,21 +79,14 @@ void main(void)
/* Coalesced load of DCT coeffs in shared memory, inverse quantization */
if (act) {
- /**
- * According to the VK spec indexing an array in push constant memory
with
- * a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
- * so copy the whole matrix locally.
- */
- uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
-
/* Table 15 */
uint8_t qidx = quant_idx[(gid.y >> 1) * mb_width + (gid.x >> (4 -
chroma_shift))];
- int qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
+ int qscale = qidx > 128 ? (qidx - 96) << 2 : qidx, mat = int(gid.z !=
0) << 6;
[[unroll]] for (uint i = 0; i < 8; ++i) {
uint cidx = (i << 3) + idx;
int c = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) +
i))), 16);
- float v = float(c * qscale * int(qmat[cidx])) * norm;
+ float v = float(c * qscale * int(qmat[mat + cidx])) * norm;
blocks[block][i * 9 + idx] = v * idct_scale[cidx];
}
}
diff --git a/libavcodec/vulkan/prores_vld.comp.glsl
b/libavcodec/vulkan/prores_vld.comp.glsl
index a22f7fed77..85b4dbdd61 100644
--- a/libavcodec/vulkan/prores_vld.comp.glsl
+++ b/libavcodec/vulkan/prores_vld.comp.glsl
@@ -48,9 +48,6 @@ layout (push_constant, scalar) uniform pushConstants {
uint8_t depth;
uint8_t alpha_info;
uint8_t bottom_field;
-
- uint8_t qmat_luma [8*8];
- uint8_t qmat_chroma[8*8];
};
/**
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index 019637f6ee..c7451c20fd 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -42,8 +42,8 @@ typedef struct ProresVulkanDecodePicture {
uint32_t bitstream_size;
uint32_t slice_num;
- uint32_t slice_offsets_sz, mb_params_sz;
- uint32_t slice_offsets_off, mb_params_off;
+ uint32_t slice_offsets_sz, qmat_sz, mb_params_sz;
+ uint32_t slice_offsets_off, qmat_off, mb_params_off;
} ProresVulkanDecodePicture;
typedef struct ProresVulkanDecodeContext {
@@ -68,9 +68,6 @@ typedef struct ProresVkParameters {
uint8_t depth;
uint8_t alpha_info;
uint8_t bottom_field;
-
- uint8_t qmat_luma [64];
- uint8_t qmat_chroma[64];
} ProresVkParameters;
static int vk_prores_start_frame(AVCodecContext *avctx,
@@ -88,10 +85,13 @@ static int vk_prores_start_frame(AVCodecContext
*avctx,
int err;
pp->slice_offsets_sz = (pr->slice_count + 1) * sizeof(uint32_t);
+ pp->qmat_sz = sizeof(pr->qmat_luma) + sizeof(pr->qmat_chroma);
pp->mb_params_sz = pr->mb_width * pr->mb_height * sizeof(uint8_t);
pp->slice_offsets_off = 0;
- pp->mb_params_off = FFALIGN(pp->slice_offsets_off +
pp->slice_offsets_sz,
+ pp->qmat_off = FFALIGN(pp->slice_offsets_off +
pp->slice_offsets_sz,
+
ctx->s.props.properties.limits.minStorageBufferOffsetAlignment);
+ pp->mb_params_off = FFALIGN(pp->qmat_off + pp->qmat_sz,
ctx->s.props.properties.limits.minStorageBufferOffsetAlignment);
/* Host map the input slices data if supported */
@@ -198,8 +198,10 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
.bottom_field = pr->first_field ^ (pr->frame_type == 1),
};
- memcpy(pd.qmat_luma, pr->qmat_luma, sizeof(pd.qmat_luma ));
- memcpy(pd.qmat_chroma, pr->qmat_chroma, sizeof(pd.qmat_chroma));
+ memcpy(metadata->mapped_mem + pp->qmat_off,
+ pr->qmat_luma, sizeof(pr->qmat_luma));
+ memcpy(metadata->mapped_mem + pp->qmat_off + sizeof(pr->qmat_luma),
+ pr->qmat_chroma, sizeof(pr->qmat_chroma));
FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
RET(ff_vk_exec_start(&ctx->s, exec));
@@ -230,7 +232,6 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
VK_ACCESS_2_TRANSFER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
-
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
@@ -261,7 +262,6 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
-
ff_vk_buf_barrier(buf_bar[nb_buf_bar++], metadata,
ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
@@ -310,7 +310,6 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
-
ff_vk_buf_barrier(buf_bar[nb_buf_bar++], metadata,
COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
COMPUTE_SHADER_BIT, SHADER_READ_BIT, NONE_KHR,
@@ -331,9 +330,15 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
pp->mb_params_off,
pp->mb_params_sz,
VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &pv->idct,
+ 0, 1, 0,
+ metadata,
+ pp->qmat_off,
+ pp->qmat_sz,
+ VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&ctx->s, exec, &pv->idct,
f, vp->view.out,
- 0, 1,
+ 0, 2,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
@@ -433,6 +438,11 @@ static int init_idct_shader(AVCodecContext *avctx,
FFVulkanContext *s,
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
+ {
+ .name = "qmat_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
@@ -440,7 +450,7 @@ static int init_idct_shader(AVCodecContext *avctx,
FFVulkanContext *s,
.elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format),
},
};
- RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
RET(ff_vk_shader_link(s, shd,
ff_prores_idct_comp_spv_data,
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]