This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit f2a55af9a4160f8a204b3bfc852cc35be891a078 Author: Lynne <[email protected]> AuthorDate: Fri Jan 9 11:16:53 2026 +0100 Commit: Lynne <[email protected]> CommitDate: Mon Jan 12 17:28:43 2026 +0100 vulkan_dpx: switch to compile-time SPIR-V generation --- configure | 2 +- libavcodec/vulkan/Makefile | 5 +- libavcodec/vulkan/dpx_copy.comp | 55 -------- libavcodec/vulkan/dpx_copy.comp.glsl | 95 ++++++++++++++ .../{dpx_unpack.comp => dpx_unpack.comp.glsl} | 69 ++++++---- libavcodec/vulkan_dpx.c | 142 ++++++++++----------- 6 files changed, 210 insertions(+), 158 deletions(-) diff --git a/configure b/configure index e893d8aa04..d0f6dd3a9b 100755 --- a/configure +++ b/configure @@ -3316,7 +3316,7 @@ av1_videotoolbox_hwaccel_deps="videotoolbox" av1_videotoolbox_hwaccel_select="av1_decoder" av1_vulkan_hwaccel_deps="vulkan" av1_vulkan_hwaccel_select="av1_decoder" -dpx_vulkan_hwaccel_deps="vulkan spirv_library" +dpx_vulkan_hwaccel_deps="vulkan" dpx_vulkan_hwaccel_select="dpx_decoder" ffv1_vulkan_hwaccel_deps="vulkan spirv_library" ffv1_vulkan_hwaccel_select="ffv1_decoder" diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index c0432e75cc..860475d960 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -18,9 +18,8 @@ OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/prores_raw_decode.comp.spv.o OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/prores_vld.comp.spv.o \ vulkan/prores_idct.comp.spv.o -OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan/common.o \ - vulkan/dpx_unpack.o \ - vulkan/dpx_copy.o +OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan/dpx_unpack.comp.spv.o \ + vulkan/dpx_copy.comp.spv.o VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp)) .SECONDARY: $(VULKAN:.comp=.c) diff --git a/libavcodec/vulkan/dpx_copy.comp b/libavcodec/vulkan/dpx_copy.comp deleted file mode 100644 index 4151fcb2b1..0000000000 --- a/libavcodec/vulkan/dpx_copy.comp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2025 Lynne <[email protected]> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -TYPE read_data(uint off) -{ -#ifdef BIG_ENDIAN - return TYPE_REVERSE(data[off]); -#else - return data[off]; -#endif -} - -void main(void) -{ - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - if (!IS_WITHIN(pos, imageSize(dst[0]))) - return; - - uint linesize; - linesize = align(imageSize(dst[0]).x*BITS_PER_COMP*COMPONENTS, 32); - linesize >>= BITS_LOG2; - - uint offs = pos.y*linesize + pos.x*COMPONENTS; -#if NB_IMAGES == 1 - TYPE_VEC val; - for (int i = 0; i < COMPONENTS; i++) - val[i] = read_data(offs + i); - val >>= SHIFT; - imageStore(dst[0], pos, val); -#else - const ivec4 fmt_lut = ivec4(2, 0, 1, 3); - for (int i = 0; i < COMPONENTS; i++) { - TYPE val = read_data(offs + i); - val >>= SHIFT; - imageStore(dst[fmt_lut[i]], pos, TYPE_VEC(val)); - } -#endif -} diff --git a/libavcodec/vulkan/dpx_copy.comp.glsl b/libavcodec/vulkan/dpx_copy.comp.glsl new file mode 100644 index 0000000000..3903d35944 --- /dev/null +++ b/libavcodec/vulkan/dpx_copy.comp.glsl @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2025 Lynne <[email protected]> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#version 460 +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_nontemporal_keyword : require + +#include "common.comp" + +layout (constant_id = 0) const bool big_endian = false; +layout (constant_id = 1) const int type_bits = 0; + +layout (set = 0, binding = 0) uniform writeonly uimage2D dst[]; +layout (set = 0, binding = 1, scalar) nontemporal readonly buffer data_buf8 { + uint8_t data8[]; +}; +layout (set = 0, binding = 2, scalar) nontemporal readonly buffer data_buf16 { + uint16_t data16[]; +}; +layout (set = 0, binding = 3, scalar) nontemporal readonly buffer data_buf32 { + uint32_t data32[]; +}; + +layout (push_constant, scalar) uniform pushConstants { + int bits_per_comp; + int nb_comp; + int nb_images; + int stride; + int need_align; + int padded_10bit; + int shift; +}; + +#define READ_FN(bits, bytes) \ +uint read_val##bits(uint off) \ +{ \ + if (big_endian) \ + return uint(reverse##bytes(data##bits[off])); \ + return uint(data##bits[off]); \ +} +READ_FN(16, 2) +READ_FN(32, 4) + +uint read_data(uint off) +{ + if (type_bits == 8) + return uint(data8[off]); + else if (type_bits == 16) + return read_val16(off); + return read_val32(off); +} + +void main(void) +{ + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + + uint linesize; + linesize = align(imageSize(dst[0]).x*bits_per_comp*nb_comp, 32); + + uint offs = pos.y*linesize + pos.x*nb_comp*bits_per_comp; + offs /= bits_per_comp; + + if (nb_images == 1) { + uvec4 val; + for (int i = 0; i < nb_comp; i++) + val[i] = read_data(offs + i); + val >>= shift; + imageStore(dst[0], pos, val); + } else { + const ivec4 fmt_lut = ivec4(2, 0, 1, 3); + for (int i = 0; i < nb_comp; i++) { + uint32_t val = read_data(offs + i); + val >>= shift; + imageStore(dst[fmt_lut[i]], pos, uvec4(val)); + } + } +} diff --git a/libavcodec/vulkan/dpx_unpack.comp b/libavcodec/vulkan/dpx_unpack.comp.glsl similarity index 54% rename from libavcodec/vulkan/dpx_unpack.comp rename to libavcodec/vulkan/dpx_unpack.comp.glsl index b5ca2cf509..e722899d04 100644 --- a/libavcodec/vulkan/dpx_unpack.comp +++ b/libavcodec/vulkan/dpx_unpack.comp.glsl @@ -18,17 +18,39 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#version 460 +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_nontemporal_keyword : require + +#include "common.comp" + +layout (constant_id = 0) const bool big_endian = false; +layout (constant_id = 1) const bool packed_10bit = false; + +layout (set = 0, binding = 0) uniform writeonly uimage2D dst[]; +layout (set = 0, binding = 1, scalar) nontemporal readonly buffer data_buf { + uint32_t data[]; +}; + +layout (push_constant, scalar) uniform pushConstants { + int bits_per_comp; + int nb_comp; + int nb_images; + int stride; + int need_align; + int padded_10bit; + int shift; +}; + uint32_t read_data(uint off) { -#ifdef BIG_ENDIAN - return reverse4(data[off]); -#else + if (big_endian) + return reverse4(data[off]); return data[off]; -#endif } -#ifdef PACKED_10BIT -i16vec4 parse_packed_in_32(ivec2 pos, int stride) +i16vec4 parse_packed10_in_32(ivec2 pos, int stride) { uint32_t d = read_data(pos.y*stride + pos.x); i16vec4 v; @@ -41,15 +63,15 @@ i16vec4 parse_packed_in_32(ivec2 pos, int stride) v[3] = int16_t(0); return v; } -#else + i16vec4 parse_packed_in_32(ivec2 pos, int stride) { - uint line_size = stride*BITS_PER_COMP*COMPONENTS; + uint line_size = stride*bits_per_comp*nb_comp; line_size += line_size & 31; line_size += need_align << 3; uint line_off = pos.y*line_size; - uint pix_off = pos.x*BITS_PER_COMP*COMPONENTS; + uint pix_off = pos.x*bits_per_comp*nb_comp; uint off = (line_off + pix_off) >> 5; uint bit = pix_off & 0x1f; @@ -61,12 +83,11 @@ i16vec4 parse_packed_in_32(ivec2 pos, int stride) combined >>= bit; return i16vec4(combined, - combined >> (BITS_PER_COMP*1), - combined >> (BITS_PER_COMP*2), - combined >> (BITS_PER_COMP*3)) & - int16_t((1 << BITS_PER_COMP) - 1); + combined >> (bits_per_comp*1), + combined >> (bits_per_comp*2), + combined >> (bits_per_comp*3)) & + int16_t((1 << bits_per_comp) - 1); } -#endif void main(void) { @@ -74,13 +95,17 @@ void main(void) if (!IS_WITHIN(pos, imageSize(dst[0]))) return; - i16vec4 p = parse_packed_in_32(pos, imageSize(dst[0]).x); + i16vec4 p; + if (packed_10bit) + p = parse_packed10_in_32(pos, imageSize(dst[0]).x); + else + p = parse_packed_in_32(pos, imageSize(dst[0]).x); -#if NB_IMAGES == 1 - imageStore(dst[0], pos, p); -#else - const ivec4 fmt_lut = COMPONENTS == 1 ? ivec4(0) : ivec4(2, 0, 1, 3); - for (uint i = 0; i < COMPONENTS; i++) - imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i])); -#endif + if (nb_images == 1) { + imageStore(dst[0], pos, p); + } else { + const ivec4 fmt_lut = ivec4(2, 0, 1, 3); + for (uint i = 0; i < nb_comp; i++) + imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i])); + } } diff --git a/libavcodec/vulkan_dpx.c b/libavcodec/vulkan_dpx.c index faad28d624..cf53a0f4df 100644 --- a/libavcodec/vulkan_dpx.c +++ b/libavcodec/vulkan_dpx.c @@ -22,12 +22,13 @@ #include "hwaccel_internal.h" #include "dpx.h" -#include "libavutil/vulkan_spirv.h" #include "libavutil/mem.h" -extern const char *ff_source_common_comp; -extern const char *ff_source_dpx_unpack_comp; -extern const char *ff_source_dpx_copy_comp; +extern const unsigned char ff_dpx_unpack_comp_spv_data[]; +extern const unsigned int ff_dpx_unpack_comp_spv_len; + +extern const unsigned char ff_dpx_copy_comp_spv_data[]; +extern const unsigned int ff_dpx_copy_comp_spv_len; const FFVulkanDecodeDescriptor ff_vk_dec_dpx_desc = { .codec_id = AV_CODEC_ID_DPX, @@ -44,9 +45,13 @@ typedef struct DPXVulkanDecodeContext { } DPXVulkanDecodeContext; typedef struct DecodePushData { + int bits_per_comp; + int nb_comp; + int nb_images; int stride; int need_align; int padded_10bit; + int shift; } DecodePushData; static int host_upload_image(AVCodecContext *avctx, @@ -214,6 +219,9 @@ static int vk_dpx_end_frame(AVCodecContext *avctx) DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private; FFVulkanDecodePicture *vp = &pp->vp; + int unpack = (avctx->bits_per_raw_sample == 12 && !dpx->packing) || + avctx->bits_per_raw_sample == 10; + FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; VkImageMemoryBarrier2 img_bar[8]; @@ -266,14 +274,31 @@ static int vk_dpx_end_frame(AVCodecContext *avctx) slices_buf, 0, slices_buf->size, VK_FORMAT_UNDEFINED); + if (!unpack) { + ff_vk_shader_update_desc_buffer(&ctx->s, exec, shd, + 0, 2, 0, + slices_buf, + 0, slices_buf->size, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, shd, + 0, 3, 0, + slices_buf, + 0, slices_buf->size, + VK_FORMAT_UNDEFINED); + } ff_vk_exec_bind_shader(&ctx->s, exec, shd); /* Update push data */ DecodePushData pd = (DecodePushData) { + .bits_per_comp = avctx->bits_per_raw_sample, + .nb_comp = dpx->components, + .nb_images = ff_vk_count_images(vkf), .stride = dpx->stride, .need_align = dpx->need_align, .padded_10bit = !dpx->unpadded_10bit, + .shift = FFALIGN(avctx->bits_per_raw_sample, 8) - + avctx->bits_per_raw_sample, }; ff_vk_shader_update_push_const(&ctx->s, exec, shd, @@ -294,92 +319,65 @@ fail: } static int init_shader(AVCodecContext *avctx, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int bits) + FFVkExecPool *pool, FFVulkanShader *shd, int bits) { int err; DPXDecContext *dpx = avctx->priv_data; - FFVulkanDescriptorSetBinding *desc_set; AVHWFramesContext *dec_frames_ctx; dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; - int planes = av_pix_fmt_count_planes(dec_frames_ctx->sw_format); - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - - RET(ff_vk_shader_init(s, shd, "dpx", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 512, 1, 1, - 0)); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, int stride; ); - GLSLC(1, int need_align; ); - GLSLC(1, int padded_10bit; ); - GLSLC(0, }; ); - GLSLC(0, ); - ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData), - VK_SHADER_STAGE_COMPUTE_BIT); int unpack = (avctx->bits_per_raw_sample == 12 && !dpx->packing) || avctx->bits_per_raw_sample == 10; - desc_set = (FFVulkanDescriptorSetBinding []) { + SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t)) + SPEC_LIST_ADD(sl, 0, 32, dpx->endian && bits > 8); /* big endian */ + if (unpack) + SPEC_LIST_ADD(sl, 1, 32, bits == 10); /* packed_10bit */ + else + SPEC_LIST_ADD(sl, 1, 32, FFALIGN(bits, 8)); /* type_bits */ + + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, + (uint32_t []) { 512, 1, 1 }, 0); + + ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData), + VK_SHADER_STAGE_COMPUTE_BIT); + + const FFVulkanDescriptorSetBinding desc_set[] = { { .name = "dst", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_quali = "writeonly", - .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), }, { .name = "data_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "readonly", - .buf_content = (unpack || bits == 32) ? "uint32_t data[];" : - bits == 8 ? "uint8_t data[];" : "uint16_t data[];", + }, + { + .name = "data_buf16", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "data_buf32", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); - - if (dpx->endian && bits > 8) - GLSLC(0, #define BIG_ENDIAN ); - GLSLF(0, #define COMPONENTS (%i) ,dpx->components); - GLSLF(0, #define BITS_PER_COMP (%i) ,bits); - GLSLF(0, #define BITS_LOG2 (%i) ,av_log2(bits)); - GLSLF(0, #define NB_IMAGES (%i) ,planes); + ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2 + (2*!unpack), 0, 0); + + const unsigned char *src = ff_dpx_copy_comp_spv_data; + size_t src_len = ff_dpx_copy_comp_spv_len; if (unpack) { - if (bits == 10) - GLSLC(0, #define PACKED_10BIT ); - GLSLD(ff_source_dpx_unpack_comp); - } else { - GLSLF(0, #define SHIFT (%i) ,FFALIGN(bits, 8) - bits); - GLSLF(0, #define TYPE uint%i_t ,FFALIGN(bits, 8)); - GLSLF(0, #define TYPE_VEC u%ivec4 ,FFALIGN(bits, 8)); - GLSLF(0, #define TYPE_REVERSE(x) (reverse%i(x)), FFALIGN(bits, 8)/8); - GLSLD(ff_source_dpx_copy_comp); + src = ff_dpx_unpack_comp_spv_data; + src_len = ff_dpx_unpack_comp_spv_len; } - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(s, shd, src, src_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; } @@ -415,31 +413,21 @@ static int vk_decode_dpx_init(AVCodecContext *avctx) break; } - FFVkSPIRVCompiler *spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - err = ff_vk_decode_init(avctx); if (err < 0) return err; FFVulkanDecodeShared *ctx = dec->shared_ctx; DPXVulkanDecodeContext *dxv = ctx->sd_ctx = av_mallocz(sizeof(*dxv)); - if (!dxv) { - err = AVERROR(ENOMEM); - goto fail; - } + if (!dxv) + return AVERROR(ENOMEM); ctx->sd_ctx_free = &vk_decode_dpx_uninit; RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, - spv, &dxv->shader, avctx->bits_per_raw_sample)); + &dxv->shader, avctx->bits_per_raw_sample)); fail: - spv->uninit(&spv); - return err; } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
