PR #23173 opened by Niklas Haas (haasn) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23173 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23173.patch
Normally, over-reading garbage data from the padding region is fine, even when horizontally filtering; because they are explicitly multiplied by 0 in the latter case. However, 0 * NaN = 0 * Infinity = NaN; and x + NaN = NaN, so this corrupts the entire filtered pixel. I tried unsuccessfully to solve this is a number of different ways (e.g. adjusting the filter offsets and weights backwards to avoid reading into the padding bytes at all), but those attempts all came with other edge cases (e.g. if the image itself is smaller than the number of aligned filter taps). I think the only truly robust solution here is to just force the copy into the tail buffer. As an aside, the tail buffer is always zero-initialized, and will never contain garbage data inside the padding bytes, because we don't copy any extra pixels. >From 200592d1883560bd27d52717be39f9e80a3e791e Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Wed, 20 May 2026 18:13:32 +0200 Subject: [PATCH 1/2] swscale/ops_dispatch: directly communicate extra filter taps over_read is arguably a clumsy way of solving this; fundamentally this field is about communicating that the filter requires access to additional taps beyond what is available in the filter itself. This shouldn't actually change anything, as the accounting is just moved from `over_read` to `filter_read_h`, but makes the code a bit simpler and more transparent. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_chain.h | 1 + libswscale/ops_dispatch.c | 9 +++++---- libswscale/ops_dispatch.h | 1 + libswscale/x86/ops.c | 5 ++--- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h index dcd9a9fc74..08e29e8fb1 100644 --- a/libswscale/ops_chain.h +++ b/libswscale/ops_chain.h @@ -114,6 +114,7 @@ typedef struct SwsImplResult { void (*free)(SwsOpPriv *priv); /* free function for `priv` */ int over_read; /* implementation over-reads input by this many bytes */ int over_write; /* implementation over-writes output by this many bytes */ + int extra_taps; /* extra filter taps needed for read */ } SwsImplResult; typedef struct SwsOpEntry { diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c index f477c7839a..ab56bacaf1 100644 --- a/libswscale/ops_dispatch.c +++ b/libswscale/ops_dispatch.c @@ -45,7 +45,7 @@ typedef struct SwsOpPass { int idx_in[4]; int idx_out[4]; int *offsets_y; - int filter_size; + int filter_taps_h; bool memcpy_first; bool memcpy_last; bool memcpy_out; @@ -207,7 +207,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read); size_t safe_blocks_in; if (exec->in_offset_x) { - size_t filter_size = pixel_bytes(p->filter_size, p->pixel_bits_in, + size_t filter_size = pixel_bytes(p->filter_taps_h, p->pixel_bits_in, AV_ROUND_UP); safe_blocks_in = safe_blocks_offset(num_blocks, block_size, safe_bytes - filter_size, @@ -271,7 +271,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, if (exec->in_offset_x) { p->tail_off_in = exec->in_offset_x[safe_width]; p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in; - p->tail_size_in += pixel_bytes(p->filter_size, p->pixel_bits_in, AV_ROUND_UP); + p->tail_size_in += pixel_bytes(p->filter_taps_h, p->pixel_bits_in, AV_ROUND_UP); } else { p->tail_off_in = pixel_bytes(safe_width, p->pixel_bits_in, AV_ROUND_DOWN); p->tail_size_in = pixel_bytes(tail_size, p->pixel_bits_in, AV_ROUND_UP); @@ -524,6 +524,7 @@ static int compile(SwsGraph *graph, const SwsOpBackend *backend, const SwsFilterWeights *filter = read->rw.kernel; if (read->rw.filter == SWS_OP_FILTER_V) { p->offsets_y = av_refstruct_ref(filter->offsets); + av_assert0(!comp->extra_taps); /* Compute relative pointer bumps for each output line */ int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump)); @@ -564,7 +565,7 @@ static int compile(SwsGraph *graph, const SwsOpBackend *backend, for (int x = filter->dst_size; x < pixels; x++) offset[x] = offset[filter->dst_size - 1]; p->exec_base.block_size_in = 0; /* ptr does not advance */ - p->filter_size = filter->filter_size; + p->filter_taps_h = filter->filter_size + comp->extra_taps; } ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height, diff --git a/libswscale/ops_dispatch.h b/libswscale/ops_dispatch.h index be771da9a9..0ceb92b0c6 100644 --- a/libswscale/ops_dispatch.h +++ b/libswscale/ops_dispatch.h @@ -119,6 +119,7 @@ typedef struct SwsCompiledOp { int block_size; /* number of pixels processed per iteration */ int over_read; /* implementation over-reads input by this many bytes */ int over_write; /* implementation over-writes output by this many bytes */ + int extra_taps; /* extra filter taps needed for read */ /* Arbitrary private data */ void *priv; diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c index 20369652cc..e5dd17cbd0 100644 --- a/libswscale/x86/ops.c +++ b/libswscale/x86/ops.c @@ -423,7 +423,7 @@ static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out) out->priv.ptr = weights.ptr; out->priv.uptr[1] = aligned_size; out->free = ff_op_priv_free; - out->over_read = (aligned_size - filter_size) * pixel_size; + out->extra_taps = aligned_size - filter_size; return 0; } @@ -455,7 +455,6 @@ static int setup_filter_4x4_h(const SwsImplParams *params, SwsImplResult *out) { const SwsOp *op = params->op; const SwsFilterWeights *filter = op->rw.kernel; - const int pixel_size = ff_sws_pixel_type_size(op->type); const int sizeof_weights = hscale_sizeof_weight(op); const int block_size = params->table->block_size; const int taps_align = 16 / sizeof_weights; /* taps per iteration (XMM) */ @@ -506,7 +505,7 @@ static int setup_filter_4x4_h(const SwsImplParams *params, SwsImplResult *out) out->priv.ptr = weights.ptr; out->priv.uptr[1] = aligned_size * sizeof_weights; out->free = ff_op_priv_free; - out->over_read = (aligned_size - filter_size) * pixel_size; + out->extra_taps = aligned_size - filter_size; return 0; } -- 2.52.0 >From e0a1501a34dabd87a941d387238f8151c4ba3f39 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Wed, 20 May 2026 17:53:56 +0200 Subject: [PATCH 2/2] swscale/ops_dispatch: prevent float over-read when horizontal filtering The code made the fundamental assumption that over-read into the padding bytes is okay to do; because the most that can happen is that those pixel values end up corrupted, which doesn't affect any adjacent pixels. However, this is not true for SWS_OP_FILTER_H, because this operation fundamentally mixes together horizontal pixels. Normally, this was fine, because the filter weights for those pixels are set to 0, and 0 * x = 0. However, that is not true for floating point inputs, which can contain Infinity; and 0 * Infinity = NaN, thus corrupting the entire pixel. Solve it by specifically preventing over-read when it would be unsafe. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_dispatch.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c index ab56bacaf1..1f2d512469 100644 --- a/libswscale/ops_dispatch.c +++ b/libswscale/ops_dispatch.c @@ -182,6 +182,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, { const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->format); const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format); + const bool float_in = indesc->flags & AV_PIX_FMT_FLAG_FLOAT; SwsOpPass *p = pass->priv; SwsOpExec *exec = &p->exec_base; @@ -204,8 +205,17 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, int chroma = idx == 1 || idx == 2; int sub_x = chroma ? indesc->log2_chroma_w : 0; int sub_y = chroma ? indesc->log2_chroma_h : 0; - size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read); + size_t safe_bytes; size_t safe_blocks_in; + + if (p->filter_taps_h && float_in) { + /* Floating point inputs may contain NaN in the padding */ + safe_bytes = pixel_bytes(pass->width, p->pixel_bits_in, AV_ROUND_DOWN); + } else { + /* In all other cases, we can safely over-read into the stride */ + safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read); + } + if (exec->in_offset_x) { size_t filter_size = pixel_bytes(p->filter_taps_h, p->pixel_bits_in, AV_ROUND_UP); -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
