PR #22585 opened by Lynne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22585
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22585.patch


>From 853a748e49d1b6540b67008b4e651368d0be845a Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Thu, 19 Mar 2026 11:30:57 +0100
Subject: [PATCH 1/2] hwcontext_vulkan: add support for GBRPF16/GBRAPF16

Sponsored-by: Sovereign Tech Fund
---
 libavutil/hwcontext_vulkan.c | 2 ++
 libavutil/vulkan.c           | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 65e2256e2d..5474ab3432 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -435,6 +435,7 @@ static const struct FFVkFormatEntry {
     { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GBRP12,   VK_IMAGE_ASPECT_COLOR_BIT, 3, 
3, 3, { VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM  } },
     { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GBRP14,   VK_IMAGE_ASPECT_COLOR_BIT, 3, 
3, 3, { VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM  } },
     { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GBRP16,   VK_IMAGE_ASPECT_COLOR_BIT, 3, 
3, 3, { VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM  } },
+    { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GBRPF16,  VK_IMAGE_ASPECT_COLOR_BIT, 3, 
3, 3, { VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM  } },
     { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32,  VK_IMAGE_ASPECT_COLOR_BIT, 3, 
3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
 
     /* Planar RGB + Alpha */
@@ -443,6 +444,7 @@ static const struct FFVkFormatEntry {
     { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GBRAP12,  VK_IMAGE_ASPECT_COLOR_BIT, 4, 
4, 4, { VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  
VK_FORMAT_R16_UNORM  } },
     { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GBRAP14,  VK_IMAGE_ASPECT_COLOR_BIT, 4, 
4, 4, { VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  
VK_FORMAT_R16_UNORM  } },
     { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GBRAP16,  VK_IMAGE_ASPECT_COLOR_BIT, 4, 
4, 4, { VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  
VK_FORMAT_R16_UNORM  } },
+    { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GBRAPF16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 
4, 4, { VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  
VK_FORMAT_R16_UNORM  } },
     { VK_FORMAT_R32_UINT,   AV_PIX_FMT_GBRAP32,  VK_IMAGE_ASPECT_COLOR_BIT, 4, 
4, 4, { VK_FORMAT_R32_UINT,   VK_FORMAT_R32_UINT,   VK_FORMAT_R32_UINT,   
VK_FORMAT_R32_UINT   } },
     { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 
4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, 
VK_FORMAT_R32_SFLOAT } },
 
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ff24b956cd..fcefb82d02 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1559,6 +1559,7 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
         pix_fmt == AV_PIX_FMT_GBRAP   || pix_fmt == AV_PIX_FMT_GBRAP10 ||
         pix_fmt == AV_PIX_FMT_GBRAP12 || pix_fmt == AV_PIX_FMT_GBRAP14 ||
         pix_fmt == AV_PIX_FMT_GBRAP16 || pix_fmt == AV_PIX_FMT_GBRAP32 ||
+        pix_fmt == AV_PIX_FMT_GBRPF16 || pix_fmt == AV_PIX_FMT_GBRAPF16 ||
         pix_fmt == AV_PIX_FMT_GBRPF32 || pix_fmt == AV_PIX_FMT_GBRAPF32 ||
         pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10 ||
         pix_fmt == AV_PIX_FMT_RGBAF32 || pix_fmt == AV_PIX_FMT_RGBF32 ||
@@ -1577,10 +1578,12 @@ void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int 
lut[4], int inv)
     case AV_PIX_FMT_GBRAP12:
     case AV_PIX_FMT_GBRAP14:
     case AV_PIX_FMT_GBRAP16:
+    case AV_PIX_FMT_GBRAPF16:
     case AV_PIX_FMT_GBRP10:
     case AV_PIX_FMT_GBRP12:
     case AV_PIX_FMT_GBRP14:
     case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_GBRPF16:
     case AV_PIX_FMT_GBRPF32:
     case AV_PIX_FMT_GBRAP32:
     case AV_PIX_FMT_GBRAPF32:
@@ -1706,10 +1709,12 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat 
pix_fmt,
     case AV_PIX_FMT_GBRAP12:
     case AV_PIX_FMT_GBRAP14:
     case AV_PIX_FMT_GBRAP16:
+    case AV_PIX_FMT_GBRAPF16:
     case AV_PIX_FMT_GBRP10:
     case AV_PIX_FMT_GBRP12:
     case AV_PIX_FMT_GBRP14:
     case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_GBRPF16:
     case AV_PIX_FMT_YUV420P10:
     case AV_PIX_FMT_YUV420P12:
     case AV_PIX_FMT_YUV420P16:
-- 
2.52.0


>From 9c94716b5805ffd4643229634dde733073b3e482 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Mon, 23 Mar 2026 09:14:42 +0100
Subject: [PATCH 2/2] ffv1enc_vulkan: implement floating-point encoding

Sponsored-by: Sovereign Tech Fund
---
 libavcodec/ffv1enc_vulkan.c                   | 170 +++++++++++++++++-
 libavcodec/vulkan/Makefile                    |   4 +-
 libavcodec/vulkan/ffv1_common.glsl            |   1 +
 libavcodec/vulkan/ffv1_enc.comp.glsl          |  15 ++
 libavcodec/vulkan/ffv1_enc_remap.comp.glsl    |  67 +++++++
 .../vulkan/ffv1_enc_rgb_float.comp.glsl       |  31 ++++
 libavcodec/vulkan/ffv1_enc_setup.comp.glsl    |  69 +++++--
 libavcodec/vulkan/rangecoder.glsl             |   5 +-
 8 files changed, 341 insertions(+), 21 deletions(-)
 create mode 100644 libavcodec/vulkan/ffv1_enc_remap.comp.glsl
 create mode 100644 libavcodec/vulkan/ffv1_enc_rgb_float.comp.glsl

diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 0e891f0888..2bb65c2ce5 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -70,8 +70,9 @@ typedef struct VulkanEncodeFFv1Context {
     int async_depth;
     size_t max_heap_size;
 
-    FFVulkanShader setup;
     FFVulkanShader rct_search;
+    FFVulkanShader remap;
+    FFVulkanShader setup;
     FFVulkanShader reset;
     FFVulkanShader enc;
 
@@ -85,6 +86,9 @@ typedef struct VulkanEncodeFFv1Context {
     AVBufferPool *slice_data_pool;
     AVBufferRef *keyframe_slice_data_ref;
 
+    /* Remap data pool */
+    AVBufferPool *remap_data_pool;
+
     /* Output data buffer */
     AVBufferPool *out_data_pool;
 
@@ -131,6 +135,12 @@ extern const unsigned int 
ff_ffv1_enc_rgb_golomb_comp_spv_len;
 extern const unsigned char ff_ffv1_enc_rct_search_comp_spv_data[];
 extern const unsigned int ff_ffv1_enc_rct_search_comp_spv_len;
 
+extern const unsigned char ff_ffv1_enc_remap_comp_spv_data[];
+extern const unsigned int ff_ffv1_enc_remap_comp_spv_len;
+
+extern const unsigned char ff_ffv1_enc_rgb_float_comp_spv_data[];
+extern const unsigned int ff_ffv1_enc_rgb_float_comp_spv_len;
+
 static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec,
                           AVFrame *enc_in, VkImageView *enc_in_views,
                           FFVkBuffer *slice_data_buf, uint32_t slice_data_size,
@@ -162,6 +172,37 @@ static int run_rct_search(AVCodecContext *avctx, 
FFVkExecContext *exec,
     return 0;
 }
 
+static int run_remap(AVCodecContext *avctx, FFVkExecContext *exec,
+                     AVFrame *enc_in, VkImageView *enc_in_views,
+                     FFVkBuffer *fltmap_buf, uint32_t fltmap_size, 
+                     FFv1ShaderParams *pd)
+{
+    VulkanEncodeFFv1Context *fv = avctx->priv_data;
+    FFV1Context *f = &fv->ctx;
+    FFVulkanFunctions *vk = &fv->s.vkfn;
+
+    /* Update descriptors */
+    ff_vk_shader_update_img_array(&fv->s, exec, &fv->remap,
+                                  enc_in, enc_in_views,
+                                  1, 1,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+    ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->remap,
+                                    1, 2, 0,
+                                    fltmap_buf,
+                                    0, fltmap_size*f->slice_count,
+                                    VK_FORMAT_UNDEFINED);
+
+    ff_vk_exec_bind_shader(&fv->s, exec, &fv->remap);
+    ff_vk_shader_update_push_const(&fv->s, exec, &fv->remap,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(FFv1ShaderParams), pd);
+
+    vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+
+    return 0;
+}
+
 static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
                                            FFVkExecContext *exec,
                                            const AVFrame *pict)
@@ -180,6 +221,11 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
     uint32_t slice_state_size;
     uint32_t slice_data_size;
 
+    /* Remap data */
+    AVBufferRef *remap_data_ref = NULL;
+    FFVkBuffer *remap_data_buf = NULL;
+    uint32_t remap_data_size = 0;
+
     /* Output data */
     size_t maxsize;
     FFVkBuffer *out_data_buf;
@@ -232,6 +278,19 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
     }
     slice_data_buf = (FFVkBuffer *)slice_data_ref->data;
 
+    if (f->remap_mode) {
+        const AVPixFmtDescriptor *desc = 
av_pix_fmt_desc_get(fv->s.frames->sw_format);
+        remap_data_size = desc->nb_components*(1 << desc->comp[0].depth);
+
+        RET(ff_vk_get_pooled_buffer(&fv->s, &fv->remap_data_pool,
+                                    &remap_data_ref,
+                                    VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+                                    NULL, remap_data_size*f->slice_count,
+                                    VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
+
+        remap_data_buf = (FFVkBuffer *)remap_data_ref->data;
+    }
+
     /* Output buffer size */
     maxsize = ff_ffv1_encode_buffer_size(avctx);
     maxsize = FFMIN(maxsize, fv->s.props_11.maxMemoryAllocationSize);
@@ -314,6 +373,10 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
 
     ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
     ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
+    if (f->remap_mode) {
+        ff_vk_exec_add_dep_buf(&fv->s, exec, &remap_data_ref, 1, 0);
+        remap_data_ref = NULL;
+    }
 
     RET(ff_vk_exec_add_dep_frame(&fv->s, exec, src,
                                  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
@@ -323,8 +386,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
                                      VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
                                      VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
 
-    /* Run RCT search if needed */
-    if (fv->optimize_rct) {
+    if (fv->optimize_rct || f->remap_mode) {
         /* Prepare the frame for reading */
         ff_vk_frame_barrier(&fv->s, exec, src, img_bar, &nb_img_bar,
                             VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
@@ -339,9 +401,11 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
             .imageMemoryBarrierCount = nb_img_bar,
         });
         nb_img_bar = 0;
+    }
 
-        RET(run_rct_search(avctx, exec,
-                           src, src_views,
+    /* Run RCT search if needed */
+    if (fv->optimize_rct) {
+        RET(run_rct_search(avctx, exec, src, src_views,
                            slice_data_buf, slice_data_size, &pd));
 
         /* Make sure the writes are visible to the setup shader */
@@ -349,6 +413,20 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
                           COMPUTE_SHADER_BIT, SHADER_READ_BIT, 
SHADER_WRITE_BIT,
                           COMPUTE_SHADER_BIT, SHADER_READ_BIT, 
SHADER_WRITE_BIT,
                           0, slice_data_size*f->slice_count);
+    }
+
+    if (f->remap_mode) {
+        RET(run_remap(avctx, exec, src, src_views,
+                      remap_data_buf, remap_data_size, &pd));
+
+        /* Make sure the writes are visible to the setup shader */
+        ff_vk_buf_barrier(buf_bar[nb_buf_bar++], remap_data_buf,
+                          COMPUTE_SHADER_BIT, SHADER_READ_BIT, 
SHADER_WRITE_BIT,
+                          COMPUTE_SHADER_BIT, SHADER_READ_BIT, 
SHADER_WRITE_BIT,
+                          0, remap_data_size*f->slice_count);
+    }
+
+    if (fv->optimize_rct || f->remap_mode) {
         vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
             .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
             .pBufferMemoryBarriers = buf_bar,
@@ -497,6 +575,12 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
                                       1, 4,
                                       VK_IMAGE_LAYOUT_GENERAL,
                                       VK_NULL_HANDLE);
+    if (f->remap_mode)
+        ff_vk_shader_update_desc_buffer(&fv->s, exec,
+                                        &fv->enc, 1, 5, 0,
+                                        remap_data_buf,
+                                        0, remap_data_size*f->slice_count,
+                                        VK_FORMAT_UNDEFINED);
 
     ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc);
     ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc,
@@ -812,6 +896,53 @@ fail:
     return err;
 }
 
+static int init_remap_shader(AVCodecContext *avctx, VkSpecializationInfo *sl)
+{
+    int err;
+    VulkanEncodeFFv1Context *fv = avctx->priv_data;
+    FFVulkanShader *shd = &fv->remap;
+
+    ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
+                      (uint32_t []) { 32, 32, 1 }, 0);
+
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
+    const FFVulkanDescriptorSetBinding desc_set_const[] = {
+        { /* rangecoder_buf */
+            .type   = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+    ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set_const, 1, 1, 0);
+
+    const FFVulkanDescriptorSetBinding desc_set[] = {
+        { /* slice_data_buf */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+        { /* src */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+            .elems  = av_pix_fmt_count_planes(fv->s.frames->sw_format),
+        },
+        { /* fltmap */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+    ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0);
+
+    RET(ff_vk_shader_link(&fv->s, shd,
+                          ff_ffv1_enc_remap_comp_spv_data,
+                          ff_ffv1_enc_remap_comp_spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+    return err;
+}
+
 static int init_setup_shader(AVCodecContext *avctx, VkSpecializationInfo *sl)
 {
     int err;
@@ -837,8 +968,12 @@ static int init_setup_shader(AVCodecContext *avctx, 
VkSpecializationInfo *sl)
             .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
             .stages = VK_SHADER_STAGE_COMPUTE_BIT,
         },
+        { /* fltmap */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
     };
-    ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0);
+    ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0);
 
     RET(ff_vk_shader_link(&fv->s, shd,
                           ff_ffv1_enc_setup_comp_spv_data,
@@ -903,6 +1038,7 @@ static int init_encode_shader(AVCodecContext *avctx, 
VkSpecializationInfo *sl)
 {
     int err;
     VulkanEncodeFFv1Context *fv = avctx->priv_data;
+    FFV1Context *f = &fv->ctx;
     FFVulkanShader *shd = &fv->enc;
 
     uint32_t wg_x = fv->ctx.ac != AC_GOLOMB_RICE ? CONTEXT_SIZE : 1;
@@ -950,10 +1086,19 @@ static int init_encode_shader(AVCodecContext *avctx, 
VkSpecializationInfo *sl)
             .type   = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
             .stages = VK_SHADER_STAGE_COMPUTE_BIT,
         },
+        { /* fltmap */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
     };
-    ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 4 + fv->is_rgb, 0, 
0);
+    ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set,
+                                    4 + fv->is_rgb + !!f->remap_mode, 0, 0);
 
-    if (fv->ctx.ac == AC_GOLOMB_RICE) {
+    if (f->remap_mode) {
+        ff_vk_shader_link(&fv->s, shd,
+                          ff_ffv1_enc_rgb_float_comp_spv_data,
+                          ff_ffv1_enc_rgb_float_comp_spv_len, "main");
+    } else if (fv->ctx.ac == AC_GOLOMB_RICE) {
         if (fv->is_rgb)
             ff_vk_shader_link(&fv->s, shd,
                               ff_ffv1_enc_rgb_golomb_comp_spv_data,
@@ -1154,13 +1299,14 @@ static av_cold int 
vulkan_encode_ffv1_init(AVCodecContext *avctx)
                        !fv->force_pcm && fv->optimize_rct;
 
     /* Init shader specialization consts */
-    SPEC_LIST_CREATE(sl, 18, 18*sizeof(uint32_t))
+    SPEC_LIST_CREATE(sl, 19, 19*sizeof(uint32_t))
     SPEC_LIST_ADD(sl,  0, 32, RGB_LINECACHE);
     SPEC_LIST_ADD(sl,  1, 32, f->ec);
     ff_ffv1_vk_set_common_sl(avctx, f, sl, fv->s.frames->sw_format);
     SPEC_LIST_ADD(sl, 15, 32, fv->force_pcm);
     SPEC_LIST_ADD(sl, 16, 32, fv->optimize_rct);
     SPEC_LIST_ADD(sl, 17, 32, f->context_model);
+    SPEC_LIST_ADD(sl, 17, 32, f->remap_mode);
 
     if (fv->optimize_rct) {
         err = init_rct_search_shader(avctx, sl);
@@ -1168,6 +1314,12 @@ static av_cold int 
vulkan_encode_ffv1_init(AVCodecContext *avctx)
             return err;
     }
 
+    if (f->remap_mode) {
+        err = init_remap_shader(avctx, sl);
+        if (err < 0)
+            return err;
+    }
+
     /* Init setup shader */
     err = init_setup_shader(avctx, sl);
     if (err < 0)
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index cd21a53f88..774c425dcf 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -8,7 +8,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += 
vulkan/ffv1_enc_setup.comp.spv.o \
                                       vulkan/ffv1_enc_golomb.comp.spv.o \
                                       vulkan/ffv1_enc_rgb.comp.spv.o \
                                       vulkan/ffv1_enc_rgb_golomb.comp.spv.o \
-                                      vulkan/ffv1_enc_rct_search.comp.spv.o
+                                      vulkan/ffv1_enc_rct_search.comp.spv.o \
+                                      vulkan/ffv1_enc_remap.comp.spv.o \
+                                      vulkan/ffv1_enc_rgb_float.comp.spv.o
 
 OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \
                                       vulkan/ffv1_dec_reset.comp.spv.o \
diff --git a/libavcodec/vulkan/ffv1_common.glsl 
b/libavcodec/vulkan/ffv1_common.glsl
index 8719276691..bec1e6da1f 100644
--- a/libavcodec/vulkan/ffv1_common.glsl
+++ b/libavcodec/vulkan/ffv1_common.glsl
@@ -55,6 +55,7 @@ const ivec2 chroma_shift = ivec2(chroma_shift_x, 
chroma_shift_y);
 layout (constant_id = 15) const bool force_pcm = false;
 layout (constant_id = 16) const bool rct_search = false;
 layout (constant_id = 17) const uint context_model = 0;
+layout (constant_id = 18) const uint remap_mode = 0;
 
 layout (push_constant, scalar) uniform pushConstants {
     u8buf slice_data;
diff --git a/libavcodec/vulkan/ffv1_enc.comp.glsl 
b/libavcodec/vulkan/ffv1_enc.comp.glsl
index 7a212fd6bd..071e14444c 100644
--- a/libavcodec/vulkan/ffv1_enc.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc.comp.glsl
@@ -34,7 +34,14 @@ layout (set = 0, binding = 2, scalar) uniform crc_ieee_buf {
 layout (set = 1, binding = 1, scalar) writeonly buffer slice_results_buf {
     uint32_t slice_results[];
 };
+#ifdef FLOAT
+layout (set = 1, binding = 3) uniform image2D src[];
+layout (set = 1, binding = 5) buffer FLTBufs {
+    uint fltmap[][4][65536];
+};
+#else
 layout (set = 1, binding = 3) uniform uimage2D src[];
+#endif
 
 #ifndef GOLOMB
 
@@ -222,11 +229,19 @@ const uvec4 rgb_plane_order = { 1, 2, 0, 3 };
 
 ivec4 load_components(ivec2 pos)
 {
+#ifdef FLOAT
+    ivec4 pix = ivec4(floatBitsToInt(imageLoad(src[0], pos)[0]));
+    if (planar_rgb) {
+        for (int i = 1; i < (3 + int(transparency)); i++)
+            pix[i] = int(floatBitsToInt(imageLoad(src[i], pos)[0]));
+    }
+#else
     ivec4 pix = ivec4(imageLoad(src[0], pos));
     if (planar_rgb) {
         for (int i = 1; i < (3 + int(transparency)); i++)
             pix[i] = int(imageLoad(src[i], pos)[0]);
     }
+#endif
 
     return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
                  pix[fmt_lut[2]], pix[fmt_lut[3]]);
diff --git a/libavcodec/vulkan/ffv1_enc_remap.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_remap.comp.glsl
new file mode 100644
index 0000000000..de8710c07c
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_remap.comp.glsl
@@ -0,0 +1,67 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#pragma shader_stage(compute)
+#extension GL_GOOGLE_include_directive : require
+
+#define SB_QUALI readonly
+#include "common.glsl"
+#include "ffv1_common.glsl"
+
+layout (set = 1, binding = 1) uniform image2D src[];
+
+layout (set = 1, binding = 2) buffer FLTBufs {
+    uint fltmap[][4][65536];
+};
+
+void load_fltmap(uint slice_idx, uint p)
+{
+    uvec2 img_size = imageSize(src[0]);
+    uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,
+                           gl_NumWorkGroups.x, 0);
+    uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,
+                           gl_NumWorkGroups.x, 0);
+    uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,
+                           gl_NumWorkGroups.y, 0);
+    uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
+                           gl_NumWorkGroups.y, 0);
+
+    for (uint i = gl_LocalInvocationIndex; i < 65536;
+         i += (gl_LocalInvocationID.y*gl_WorkGroupSize.x))
+        fltmap[slice_idx][p][i] = U16(0);
+
+    for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += 
gl_WorkGroupSize.y) {
+        for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += 
gl_WorkGroupSize.x) {
+            vec4 pix = imageLoad(src[p], ivec2(x, y));
+            int pix_idx = floatBitsToInt(pix[0]);
+            atomicAdd(fltmap[slice_idx][p][pix_idx], 1);
+        }
+    }
+}
+
+void main(void)
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + 
gl_WorkGroupID.x;
+
+    for (int i = 0; i < color_planes; i++)
+        load_fltmap(slice_idx, i);
+}
diff --git a/libavcodec/vulkan/ffv1_enc_rgb_float.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_rgb_float.comp.glsl
new file mode 100644
index 0000000000..c66440601a
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_rgb_float.comp.glsl
@@ -0,0 +1,31 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2026 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#pragma shader_stage(compute)
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_shader_image_load_formatted : require
+
+layout (set = 1, binding = 4) uniform uimage2D tmp;
+
+#define FLOAT
+#define RGB
+#include "ffv1_enc.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl
index 62e774ef86..1009565121 100644
--- a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl
@@ -23,10 +23,15 @@
 #pragma shader_stage(compute)
 #extension GL_GOOGLE_include_directive : require
 
+#define NB_CONTEXTS 6
 #define FULL_RENORM
 #include "common.glsl"
 #include "ffv1_common.glsl"
 
+layout (set = 1, binding = 2) buffer FLTBufs {
+    uint fltmap[][4][65536];
+};
+
 void init_slice(inout SliceContext sc, uint slice_idx)
 {
     /* Set coordinates */
@@ -51,26 +56,67 @@ void init_slice(inout SliceContext sc, uint slice_idx)
     rac_init(slice_idx*slice_size_max, slice_size_max);
 }
 
-void put_usymbol(uint v)
+void put_usymbol(const uint ctx, uint v)
 {
     bool is_nil = (v == 0);
-    put_rac(rc_state[0], is_nil);
+    put_rac(rc_state[ctx*CONTEXT_SIZE], is_nil);
     if (is_nil)
         return;
 
     const int e = findMSB(v);
 
     for (int i = 0; i <= e; i++)
-        put_rac(rc_state[1 + min(i, 9)], i < e);
+        put_rac(rc_state[ctx*CONTEXT_SIZE + 1 + min(i, 9)], i < e);
 
     for (int i = e - 1; i >= 0; i--)
-        put_rac(rc_state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1)));
+        put_rac(rc_state[ctx*CONTEXT_SIZE + 22 + min(i, 9)],
+                bool(bitfieldExtract(v, i, 1)));
 }
 
 shared uint hdr_sym[4 + 4 + 3];
 const int nb_hdr_sym = 4 + codec_planes + 3;
 
-void write_slice_header(inout SliceContext sc)
+void encode_histogram_remap(uint slice_idx)
+{
+    const int flip = remap_mode == 2 ? 0x7FFF : 0;
+
+    for (int p = 0; p < color_planes; p++) {
+        uint j = 0;
+        uint lu = 0;
+        uint8_t state[2][32];
+        int run = 0;
+
+        for (int i = 0; i < NB_CONTEXTS*CONTEXT_SIZE; i++)
+            rc_state[i] = uint8_t(128);
+
+        put_usymbol(0, 0);
+
+        for (int i = 0; i < CONTEXT_SIZE; i++)
+            rc_state[i] = uint8_t(128);
+
+        for (int i = 0; i < rct_offset; i++) {
+            int ri = i ^ (((i & 0x8000) != 0) ? 0 : flip);
+            uint u = fltmap[slice_idx][p][ri];
+
+            fltmap[slice_idx][p][ri] = j;
+            j += u;
+
+            if (lu == u) {
+                run++;
+            } else {
+                put_usymbol(lu, run);
+                if (run == 0)
+                    lu = u;
+                run = 0;
+            }
+        }
+
+        if (run != 0)
+            put_usymbol(lu, run);
+    }
+}
+
+void write_slice_header(uint slice_idx, in SliceContext sc)
 {
     [[unroll]]
     for (int i = 0; i < CONTEXT_SIZE; i++)
@@ -90,15 +136,18 @@ void write_slice_header(inout SliceContext sc)
     hdr_sym[nb_hdr_sym - 1] = sar.y;
 
     for (int i = 0; i < nb_hdr_sym; i++)
-        put_usymbol(hdr_sym[i]);
+        put_usymbol(0, hdr_sym[i]);
 
     if (version >= 4) {
         put_rac(rc_state[0], force_pcm);
-        put_usymbol(uint(force_pcm));
+        put_usymbol(0, uint(force_pcm));
         if (!force_pcm && colorspace == 1) {
-            put_usymbol(sc.slice_rct_coef.g);
-            put_usymbol(sc.slice_rct_coef.r);
+            put_usymbol(0, sc.slice_rct_coef.g);
+            put_usymbol(0, sc.slice_rct_coef.r);
         }
+
+        if (remap_mode != 0)
+            encode_histogram_remap(slice_idx);
     }
 }
 
@@ -116,7 +165,7 @@ void main(void)
     if (slice_idx == 0)
         write_frame_header(slice_ctx[slice_idx]);
 
-    write_slice_header(slice_ctx[slice_idx]);
+    write_slice_header(slice_idx, slice_ctx[slice_idx]);
 
     slice_ctx[slice_idx].c = rc;
 }
diff --git a/libavcodec/vulkan/rangecoder.glsl 
b/libavcodec/vulkan/rangecoder.glsl
index f86f632ee3..c3b2dbbd36 100644
--- a/libavcodec/vulkan/rangecoder.glsl
+++ b/libavcodec/vulkan/rangecoder.glsl
@@ -23,6 +23,9 @@
 #ifndef VULKAN_RANGECODER_H
 #define VULKAN_RANGECODER_H
 
+#ifndef NB_CONTEXTS
+#define NB_CONTEXTS 1
+#endif
 #define CONTEXT_SIZE 32
 #define MAX_OVERREAD 2
 
@@ -47,7 +50,7 @@ struct RangeCoder {
 };
 
 shared RangeCoder rc;
-shared uint8_t rc_state[CONTEXT_SIZE];
+shared uint8_t rc_state[NB_CONTEXTS*CONTEXT_SIZE];
 shared bool rc_data[CONTEXT_SIZE];
 
 void rac_init(uint bs_start, uint bs_len)
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to