This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit e729f49645b6f60e1f45efffee14ff8645a050f7 Author: Niklas Haas <[email protected]> AuthorDate: Tue Jan 13 17:20:55 2026 +0100 Commit: Niklas Haas <[email protected]> CommitDate: Thu Feb 19 19:44:46 2026 +0000 swscale/ops_backend: allocate block storage up-front Instead of in each read() function. Not only is this slightly faster, due to promoting more tail calls, but it also allows us to have operation chains that don't start with a read. Also simplifies the implementations. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_backend.c | 16 +++++++++------- libswscale/ops_backend.h | 21 ++++++--------------- libswscale/ops_tmpl_int.c | 10 +--------- 3 files changed, 16 insertions(+), 31 deletions(-) diff --git a/libswscale/ops_backend.c b/libswscale/ops_backend.c index 248a591fd2..a503139016 100644 --- a/libswscale/ops_backend.c +++ b/libswscale/ops_backend.c @@ -53,18 +53,20 @@ static void process(const SwsOpExec *exec, const void *priv, { const SwsOpChain *chain = priv; const SwsOpImpl *impl = chain->impl; - SwsOpIter iter; + u32block_t x, y, z, w; /* allocate enough space for any intermediate */ - for (iter.y = y_start; iter.y < y_end; iter.y++) { + SwsOpIter iterdata; + SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */ + + for (iter->y = y_start; iter->y < y_end; iter->y++) { for (int i = 0; i < 4; i++) { - iter.in[i] = exec->in[i] + (iter.y - y_start) * exec->in_stride[i]; - iter.out[i] = exec->out[i] + (iter.y - y_start) * exec->out_stride[i]; + iter->in[i] = exec->in[i] + (iter->y - y_start) * exec->in_stride[i]; + iter->out[i] = exec->out[i] + (iter->y - y_start) * exec->out_stride[i]; } for (int block = bx_start; block < bx_end; block++) { - iter.x = block * SWS_BLOCK_SIZE; - ((void (*)(SwsOpIter *, const SwsOpImpl *)) impl->cont) - (&iter, &impl[1]); + iter->x = block * SWS_BLOCK_SIZE; + CONTINUE(u32block_t, x, y, z, w); } } } diff --git a/libswscale/ops_backend.h b/libswscale/ops_backend.h index 4a1794af8a..b1616f6b02 100644 --- a/libswscale/ops_backend.h +++ b/libswscale/ops_backend.h @@ -78,13 +78,9 @@ typedef struct SwsOpIter { __VA_ARGS__) #define DECL_READ(NAME, ...) \ - static av_always_inline void fn(NAME)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl, \ - const pixel_t *restrict in0, \ - const pixel_t *restrict in1, \ - const pixel_t *restrict in2, \ - const pixel_t *restrict in3, \ - __VA_ARGS__) + DECL_FUNC(NAME, const pixel_t *restrict in0, const pixel_t *restrict in1, \ + const pixel_t *restrict in2, const pixel_t *restrict in3, \ + __VA_ARGS__) #define DECL_WRITE(NAME, ...) \ DECL_FUNC(NAME, pixel_t *restrict out0, pixel_t *restrict out1, \ @@ -96,10 +92,9 @@ typedef struct SwsOpIter { fn(FUNC)(iter, impl, x, y, z, w, __VA_ARGS__) #define CALL_READ(FUNC, ...) \ - fn(FUNC)(iter, impl, (const pixel_t *) iter->in[0], \ - (const pixel_t *) iter->in[1], \ - (const pixel_t *) iter->in[2], \ - (const pixel_t *) iter->in[3], __VA_ARGS__) + CALL(FUNC, (const pixel_t *) iter->in[0], (const pixel_t *) iter->in[1], \ + (const pixel_t *) iter->in[2], (const pixel_t *) iter->in[3], \ + __VA_ARGS__) #define CALL_WRITE(FUNC, ...) \ CALL(FUNC, (pixel_t *) iter->out[0], (pixel_t *) iter->out[1], \ @@ -112,10 +107,6 @@ typedef struct SwsOpIter { block_t x, block_t y, \ block_t z, block_t w) -#define DECL_IMPL_READ(NAME) \ - static SWS_FUNC void fn(NAME)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl) - /* Helper macro to call into the next continuation with a given type */ #define CONTINUE(TYPE, ...) \ ((void (*)(SwsOpIter *, const SwsOpImpl *, \ diff --git a/libswscale/ops_tmpl_int.c b/libswscale/ops_tmpl_int.c index 84596e2763..d9870faf34 100644 --- a/libswscale/ops_tmpl_int.c +++ b/libswscale/ops_tmpl_int.c @@ -58,8 +58,6 @@ DECL_READ(read_planar, const int elems) { - block_t x, y, z, w; - SWS_LOOP for (int i = 0; i < SWS_BLOCK_SIZE; i++) { x[i] = in0[i]; @@ -76,8 +74,6 @@ DECL_READ(read_planar, const int elems) DECL_READ(read_packed, const int elems) { - block_t x, y, z, w; - SWS_LOOP for (int i = 0; i < SWS_BLOCK_SIZE; i++) { x[i] = in0[elems * i + 0]; @@ -121,7 +117,7 @@ DECL_WRITE(write_packed, const int elems) } #define WRAP_READ(FUNC, ELEMS, FRAC, PACKED) \ -DECL_IMPL_READ(FUNC##ELEMS) \ +DECL_IMPL(FUNC##ELEMS) \ { \ CALL_READ(FUNC, ELEMS); \ for (int i = 0; i < (PACKED ? 1 : ELEMS); i++) \ @@ -173,8 +169,6 @@ WRAP_WRITE(write_packed, 4, 0, true) #if BIT_DEPTH == 8 DECL_READ(read_nibbles, const int elems) { - block_t x, y, z, w; - SWS_LOOP for (int i = 0; i < SWS_BLOCK_SIZE; i += 2) { const pixel_t val = ((const pixel_t *) in0)[i >> 1]; @@ -187,8 +181,6 @@ DECL_READ(read_nibbles, const int elems) DECL_READ(read_bits, const int elems) { - block_t x, y, z, w; - SWS_LOOP for (int i = 0; i < SWS_BLOCK_SIZE; i += 8) { const pixel_t val = ((const pixel_t *) in0)[i >> 3]; _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
