This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 4c19f82cc02ab6512686c160893d5f8b2af42477
Author:     Niklas Haas <[email protected]>
AuthorDate: Wed Apr 15 20:02:36 2026 +0200
Commit:     Niklas Haas <[email protected]>
CommitDate: Thu Apr 16 20:59:39 2026 +0000

    swscale/ops_dispatch: compute minimum needed tail size
    
    Not only does this take into account extreme edge cases where the plane
    padding can significantly exceed the actual width/stride, but it also
    correctly takes into account the filter offsets when scaling; which the
    previous code completely ignored.
    
    Simpler, robuster, and more correct. Now valgrind passes for 100% of format
    conversions for me, with and without scaling.
    
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_dispatch.c | 65 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 16 deletions(-)

diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c
index a0d3da6cc3..a86fc060e2 100644
--- a/libswscale/ops_dispatch.c
+++ b/libswscale/ops_dispatch.c
@@ -153,6 +153,23 @@ static inline size_t pixel_bytes(size_t pixels, int 
pixel_bits,
     }
 }
 
+static size_t safe_bytes_pad(int linesize, int plane_pad)
+{
+    av_assert1(linesize);
+    int64_t safe_bytes = FFABS((int64_t) linesize) - plane_pad;
+    return FFMAX(safe_bytes, 0);
+}
+
+static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size,
+                                 ptrdiff_t safe_offset,
+                                 const int32_t *offset_bytes)
+{
+    size_t safe_blocks = num_blocks;
+    while (safe_blocks && offset_bytes[safe_blocks * block_size - 1] > 
safe_offset)
+        safe_blocks--;
+    return safe_blocks;
+}
+
 static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
                          const SwsPass *pass)
 {
@@ -174,20 +191,31 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
     p->memcpy_last  = false;
     p->memcpy_out   = false;
 
+    size_t safe_blocks = num_blocks;
     for (int i = 0; i < p->planes_in; i++) {
         int idx        = p->idx_in[i];
         int chroma     = idx == 1 || idx == 2;
         int sub_x      = chroma ? indesc->log2_chroma_w : 0;
         int sub_y      = chroma ? indesc->log2_chroma_h : 0;
-        size_t plane_w    = AV_CEIL_RSHIFT(aligned_w, sub_x);
-        size_t plane_size = pixel_bytes(plane_w, p->pixel_bits_in, 
AV_ROUND_UP);
-        size_t total_size = plane_size + comp->over_read;
-        size_t loop_size  = num_blocks * exec->block_size_in;
-        if (in->linesize[idx] >= 0) {
-            p->memcpy_last |= total_size > in->linesize[idx];
+        size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read);
+        size_t safe_blocks_in;
+        if (exec->in_offset_x) {
+            size_t filter_size = pixel_bytes(p->filter_size, p->pixel_bits_in,
+                                             AV_ROUND_UP);
+            safe_blocks_in = safe_blocks_offset(num_blocks, block_size,
+                                                safe_bytes - filter_size,
+                                                exec->in_offset_x);
         } else {
-            p->memcpy_first |= total_size > -in->linesize[idx];
+            safe_blocks_in = safe_bytes / exec->block_size_in;
         }
+
+        if (safe_blocks_in < num_blocks) {
+            p->memcpy_first |= in->linesize[idx] < 0;
+            p->memcpy_last  |= in->linesize[idx] > 0;
+            safe_blocks = FFMIN(safe_blocks, safe_blocks_in);
+        }
+
+        size_t loop_size   = num_blocks * exec->block_size_in;
         exec->in[i]        = in->data[idx];
         exec->in_stride[i] = in->linesize[idx];
         exec->in_bump[i]   = in->linesize[idx] - loop_size;
@@ -200,10 +228,14 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
         int chroma     = idx == 1 || idx == 2;
         int sub_x      = chroma ? outdesc->log2_chroma_w : 0;
         int sub_y      = chroma ? outdesc->log2_chroma_h : 0;
-        size_t plane_w    = AV_CEIL_RSHIFT(aligned_w, sub_x);
-        size_t plane_size = pixel_bytes(plane_w, p->pixel_bits_out, 
AV_ROUND_UP);
-        size_t loop_size  = num_blocks * exec->block_size_out;
-        p->memcpy_out |= plane_size + comp->over_write > 
FFABS(out->linesize[idx]);
+        size_t safe_bytes = safe_bytes_pad(out->linesize[idx], 
comp->over_write);
+        size_t safe_blocks_out = safe_bytes / exec->block_size_out;
+        if (safe_blocks_out < num_blocks) {
+            p->memcpy_out = true;
+            safe_blocks   = FFMIN(safe_blocks, safe_blocks_out);
+        }
+
+        size_t loop_size    = num_blocks * exec->block_size_out;
         exec->out[i]        = out->data[idx];
         exec->out_stride[i] = out->linesize[idx];
         exec->out_bump[i]   = out->linesize[idx] - loop_size;
@@ -212,8 +244,10 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
     }
 
     const bool memcpy_in = p->memcpy_first || p->memcpy_last;
-    if (!memcpy_in && !p->memcpy_out)
+    if (!memcpy_in && !p->memcpy_out) {
+        av_assert0(safe_blocks == num_blocks);
         return 0;
+    }
 
     /* Set-up tail section parameters and buffers */
     SwsOpExec *tail = &p->exec_tail;
@@ -221,13 +255,11 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
     size_t alloc_size = 0;
     *tail = *exec;
 
-    p->tail_blocks = 1;
-    av_assert0(num_blocks >= p->tail_blocks);
-
-    const size_t safe_width = (num_blocks - p->tail_blocks) * block_size;
+    const size_t safe_width = safe_blocks * block_size;
     const size_t tail_size  = pass->width - safe_width;
     p->tail_off_out  = pixel_bytes(safe_width, p->pixel_bits_out, 
AV_ROUND_DOWN);
     p->tail_size_out = pixel_bytes(tail_size,  p->pixel_bits_out, AV_ROUND_UP);
+    p->tail_blocks   = num_blocks - safe_blocks;
 
     if (exec->in_offset_x) {
         p->tail_off_in  = exec->in_offset_x[safe_width];
@@ -395,6 +427,7 @@ static void op_pass_run(const SwsFrame *out, const SwsFrame 
*in, const int y,
     }
 
     /* Dispatch kernel over tail */
+    av_assert1(tail_blocks > 0);
     comp->func(&tail, comp->priv, num_blocks - tail_blocks, y, num_blocks, y + 
h);
 
     for (int i = 0; memcpy_out && i < p->planes_out; i++) {

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to