This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit b756d83e242b236a38663a34997919a5d0c3785f Author: Lynne <[email protected]> AuthorDate: Tue Feb 10 00:04:43 2026 +0100 Commit: Lynne <[email protected]> CommitDate: Thu Feb 19 19:42:31 2026 +0100 vulkan_ffv1: use local RangeCoder struct, refactor overread checking --- libavcodec/vulkan/ffv1_dec.comp.glsl | 68 +++++++++++++++++------------- libavcodec/vulkan/ffv1_dec_setup.comp.glsl | 42 +++++++++--------- libavcodec/vulkan/rangecoder.glsl | 66 +++++++++++++---------------- libavcodec/vulkan_ffv1.c | 13 +++--- 4 files changed, 95 insertions(+), 94 deletions(-) diff --git a/libavcodec/vulkan/ffv1_dec.comp.glsl b/libavcodec/vulkan/ffv1_dec.comp.glsl index 720fa14cd2..2c089dd64d 100644 --- a/libavcodec/vulkan/ffv1_dec.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec.comp.glsl @@ -41,46 +41,49 @@ layout (set = 1, binding = 3, scalar) buffer slice_state_buf { uint8_t slice_rc_state[]; }; -#define READ(c, idx) get_rac_noadapt(c, idx) -int get_isymbol(inout RangeCoder c) +#define READ(idx) get_rac_noadapt(idx) +int get_isymbol(void) { - if (READ(c, 0)) + if (READ(0)) return 0; - uint e = 1; + int e = 1; for (; e < 11; e++) { - if (!READ(c, e)) + if (!READ(e)) break; } int a = 1; - uint i = e; + int i = e; if (bits > 8 && e == 11) { do { rc_state[10] = zero_one_state[rc_state[10] + 256]; e++; - } while (READ(c, 10)); + } while (READ(10)); e--; i = e - 1; - a += a + int(READ(c, 31)); + a <<= 1; + a |= int(READ(31)); for (; i >= 11; i--) { rc_state[31] = zero_one_state[rc_state[31] + (rc_data[31] ? 256 : 0)]; - a += a + int(READ(c, 31)); + a <<= 1; + a |= int(READ(31)); } } - i += 20; - for (; i >= 22; i--) - a += a + int(READ(c, i)); + a <<= i - 1; + i -= 2; + for (; i >= 0; i--) + a |= int(READ(i + 22)) << i; - return READ(c, min(e + 10, 21)) ? -a : a; + return READ(min(e + 10, 21)) ? -a : a; } -void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p) +void decode_line_pcm(ivec2 sp, int w, int y, int p) { if (gl_LocalInvocationID.x > 0) return; @@ -97,13 +100,13 @@ void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p) [[unroll]] for (uint i = (rct_offset >> 1); i > 0; i >>= 1) - v |= get_rac_equi(sc.c) ? i : 0; + v |= get_rac_equi() ? i : 0; imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); } } -void decode_line(inout SliceContext sc, ivec2 sp, int w, +void decode_line(ivec2 sp, int w, int y, int p, uint state_off, uint8_t quant_table_idx, int run_index) { @@ -126,7 +129,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, int diff; if (gl_LocalInvocationID.x == 0) - diff = get_isymbol(sc.c); + diff = get_isymbol(); barrier(); uint i = gl_LocalInvocationID.x; @@ -155,14 +158,14 @@ GetBitContext gb; void golomb_init(inout SliceContext sc) { if (version == 3 && micro_version > 1 || version > 3) - get_rac_internal(sc.c, sc.c.range * 129 >> 8); + get_rac_internal(rc.range * 129 >> 8); - uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1; - init_get_bits(gb, u8buf(sc.c.bytestream_start + ac_byte_count), - int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count)); + uint64_t ac_byte_count = rc.bytestream - rc.bytestream_start - 1; + init_get_bits(gb, u8buf(rc.bytestream_start + ac_byte_count), + int(rc.bytestream_end - rc.bytestream_start - ac_byte_count)); } -void decode_line(inout SliceContext sc, ivec2 sp, int w, +void decode_line(ivec2 sp, int w, int y, int p, uint state_off, uint8_t quant_table_idx, inout int run_index) { @@ -284,7 +287,7 @@ void decode_slice(inout SliceContext sc, const uint slice_idx) #ifdef RGB for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) - decode_line_pcm(sc, sp, w, y, p); + decode_line_pcm(sp, w, y, p); writeout_rgb(sc, sp, w, y, false); } @@ -295,7 +298,7 @@ void decode_slice(inout SliceContext sc, const uint slice_idx) h = ceil_rshift(h, chroma_shift.y); for (int y = 0; y < h; y++) - decode_line_pcm(sc, sp, w, y, p); + decode_line_pcm(sp, w, y, p); } #endif return; @@ -315,7 +318,7 @@ void decode_slice(inout SliceContext sc, const uint slice_idx) int run_index = 0; for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) - decode_line(sc, sp, w, y, p, + decode_line(sp, w, y, p, slice_state_off[p], quant_table_idx[p], run_index); writeout_rgb(sc, sp, w, y, true); @@ -328,7 +331,7 @@ void decode_slice(inout SliceContext sc, const uint slice_idx) int run_index = 0; for (int y = 0; y < h; y++) - decode_line(sc, sp, w, y, p, + decode_line(sp, w, y, p, slice_state_off[p], quant_table_idx[p], run_index); } #endif @@ -337,9 +340,16 @@ void decode_slice(inout SliceContext sc, const uint slice_idx) void main(void) { uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + + rc = slice_ctx[slice_idx].c; + decode_slice(slice_ctx[slice_idx], slice_idx); - uint32_t status = corrupt ? uint32_t(corrupt) : overread; - if (status != 0) - slice_status[2*slice_idx + 1] = status; + if (gl_LocalInvocationID.x > 0) + return; + + uint overread = 0; + if (rc.bytestream >= (rc.bytestream_end + MAX_OVERREAD)) + overread = uint(rc.bytestream - rc.bytestream_end); + slice_status[2*slice_idx + 1] = overread; } diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl index cebff23517..56abad0971 100644 --- a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl @@ -37,28 +37,22 @@ layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf { uint32_t slice_status[]; }; -shared uint8_t setup_state[CONTEXT_SIZE]; shared uint hdr_sym[4 + 4 + 3]; const int nb_hdr_sym = 4 + codec_planes + 3; -uint get_usymbol(inout RangeCoder c) +uint get_usymbol(void) { - if (get_rac_direct(c, setup_state[0])) + if (get_rac_direct(rc_state[0])) return 0; int e = 0; - while (get_rac_direct(c, setup_state[1 + min(e, 9)])) { // 1..10 + while (get_rac_direct(rc_state[1 + min(e, 9)])) // 1..10 e++; - if (e > 31) { - corrupt = true; - return 0; - } - } uint a = 1; for (int i = e - 1; i >= 0; i--) { a <<= 1; - a |= uint(get_rac_direct(c, setup_state[22 + min(i, 9)])); // 22..31 + a |= uint(get_rac_direct(rc_state[22 + min(i, 9)])); // 22..31 } return a; @@ -68,10 +62,10 @@ bool decode_slice_header(inout SliceContext sc) { [[unroll]] for (int i = 0; i < CONTEXT_SIZE; i++) - setup_state[i] = uint8_t(128); + rc_state[i] = uint8_t(128); for (int i = 0; i < nb_hdr_sym; i++) - hdr_sym[i] = get_usymbol(sc.c); + hdr_sym[i] = get_usymbol(); uint sx = hdr_sym[0]; uint sy = hdr_sym[1]; @@ -79,10 +73,8 @@ bool decode_slice_header(inout SliceContext sc) uint sh = hdr_sym[3] + 1; if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 || - sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) || - corrupt) { + sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh)) return true; - } /* Set coordinates */ uint sxs = slice_coord(img_size.x, sx , gl_NumWorkGroups.x, chroma_shift.x); @@ -103,11 +95,11 @@ bool decode_slice_header(inout SliceContext sc) } if (version >= 4) { - sc.slice_reset_contexts = get_rac_direct(sc.c, setup_state[0]); - sc.slice_coding_mode = get_usymbol(sc.c); + sc.slice_reset_contexts = get_rac_direct(rc_state[0]); + sc.slice_coding_mode = get_usymbol(); if (sc.slice_coding_mode != 1 && colorspace == 1) { - sc.slice_rct_coef.x = int(get_usymbol(sc.c)); - sc.slice_rct_coef.y = int(get_usymbol(sc.c)); + sc.slice_rct_coef.x = int(get_usymbol()); + sc.slice_rct_coef.y = int(get_usymbol()); if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4) return true; } @@ -123,14 +115,15 @@ void main(void) u8buf bs = u8buf(slice_data + slice_offsets[slice_idx].x); uint32_t slice_size = slice_offsets[slice_idx].y; - rac_init_dec(slice_ctx[slice_idx].c, - bs, slice_size); + rac_init_dec(bs, slice_size); if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1)) - get_rac_equi(slice_ctx[slice_idx].c); + get_rac_equi(); decode_slice_header(slice_ctx[slice_idx]); + slice_ctx[slice_idx].c = rc; + if (has_crc) { uint32_t crc = crcref; for (int i = 0; i < slice_size; i++) @@ -139,5 +132,8 @@ void main(void) slice_status[2*slice_idx + 0] = crc; } - slice_status[2*slice_idx + 1] = corrupt ? uint32_t(corrupt) : overread; + uint overread = 0; + if (rc.bytestream >= (rc.bytestream_end + MAX_OVERREAD)) + overread = uint(rc.bytestream - rc.bytestream_end); + slice_status[2*slice_idx + 1] = overread; } diff --git a/libavcodec/vulkan/rangecoder.glsl b/libavcodec/vulkan/rangecoder.glsl index 545f13d463..95fa6bba29 100644 --- a/libavcodec/vulkan/rangecoder.glsl +++ b/libavcodec/vulkan/rangecoder.glsl @@ -24,6 +24,7 @@ #define VULKAN_RANGECODER_H #define CONTEXT_SIZE 32 +#define MAX_OVERREAD 2 layout (set = 0, binding = 0, scalar) readonly buffer rangecoder_buf { uint8_t zero_one_state[512]; @@ -40,6 +41,7 @@ struct RangeCoder { uint8_t outstanding_byte; }; +shared RangeCoder rc; shared uint8_t rc_state[CONTEXT_SIZE]; shared bool rc_data[CONTEXT_SIZE]; shared bool rc_dec[CONTEXT_SIZE]; @@ -189,74 +191,64 @@ uint rac_terminate(inout RangeCoder c) return uint(uint64_t(c.bytestream) - uint64_t(c.bytestream_start)); } -/* Decoder */ -uint overread = 0; -bool corrupt = false; - -void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size) +void rac_init_dec(u8buf data, uint buf_size) { - overread = 0; - corrupt = false; - /* Skip priming bytes */ - rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2); + rac_init(rc, OFFBUF(u8buf, data, 2), buf_size - 2); u8vec2 prime = u8vec2buf(data).v; /* Switch endianness of the priming bytes */ - r.low = pack16(prime.yx); + rc.low = pack16(prime.yx); - if (r.low >= 0xFF00) { - r.low = 0xFF00; - r.bytestream_end = uint64_t(data) + 2; + if (rc.low >= 0xFF00) { + rc.low = 0xFF00; + rc.bytestream_end = uint64_t(data) + 2; } } -void refill(inout RangeCoder c) +void refill(void) { - c.range <<= 8; - c.low <<= 8; - if (expectEXT(c.bytestream < c.bytestream_end, false)) { - c.low |= u8buf(c.bytestream).v; - c.bytestream++; - } else { - overread++; - } + rc.range <<= 8; + rc.low <<= 8; + if (expectEXT(rc.bytestream < rc.bytestream_end, true)) + rc.low |= u8buf(rc.bytestream).v; + rc.bytestream++; } -bool get_rac_internal(inout RangeCoder c, const uint range1) +bool get_rac_internal(const uint range1) { - uint ranged = c.range - range1; - bool bit = c.low >= ranged; - c.low -= bit ? ranged : 0; - c.range = (bit ? 0 : ranged) + (bit ? range1 : 0); + uint ranged = rc.range - range1; + bool bit = rc.low >= ranged; + rc.low -= bit ? ranged : 0; + rc.range = (bit ? 0 : ranged) + (bit ? range1 : 0); - if (expectEXT(c.range < 0x100, false)) - refill(c); + if (expectEXT(rc.range < 0x100, false)) + refill(); return bit; } -bool get_rac_direct(inout RangeCoder c, inout uint8_t state) +bool get_rac_direct(inout uint8_t state) { - bool bit = get_rac_internal(c, c.range * state >> 8); + bool bit = get_rac_internal(rc.range * state >> 8); state = zero_one_state[state + (bit ? 256 : 0)]; return bit; } -bool get_rac_noadapt(inout RangeCoder c, uint idx) +bool get_rac_noadapt(uint idx) { rc_dec[idx] = true; - return (rc_data[idx] = get_rac_internal(c, c.range * rc_state[idx] >> 8)); + return (rc_data[idx] = get_rac_internal(rc.range * rc_state[idx] >> 8)); } -bool get_rac(inout RangeCoder c, uint64_t state) +bool get_rac(uint64_t state) { - return get_rac_direct(c, u8buf(state).v); + return get_rac_direct(u8buf(state).v); } -bool get_rac_equi(inout RangeCoder c) +bool get_rac_equi(void) { - return get_rac_internal(c, c.range >> 1); + return get_rac_internal(rc.range >> 1); } #endif /* VULKAN_RANGECODER_H */ diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c index 260c92836d..8f5cab61d0 100644 --- a/libavcodec/vulkan_ffv1.c +++ b/libavcodec/vulkan_ffv1.c @@ -887,18 +887,21 @@ static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) int slice_error_cnt = 0; int crc_mismatch_cnt = 0; + uint32_t max_overread = 0; for (int i = 0; i < fp->slice_num; i++) { uint32_t crc_res = 0; + uint8_t *ssp = slice_status->mapped_mem + 2*i*sizeof(uint32_t); if (fp->crc_checked) - crc_res = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 0); - uint32_t status = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 4); - slice_error_cnt += !!status; + crc_res = AV_RN32(ssp + 0); + uint32_t overread = AV_RN32(ssp + 4); + max_overread = FFMAX(overread, max_overread); + slice_error_cnt += !!overread; crc_mismatch_cnt += !!crc_res; } if (slice_error_cnt || crc_mismatch_cnt) - av_log(dev_ctx, AV_LOG_ERROR, "Decode status: %i slices errored, " + av_log(dev_ctx, AV_LOG_ERROR, "Decode status: %i slices overread (%i bytes max), " "%i CRCs mismatched\n", - slice_error_cnt, crc_mismatch_cnt); + slice_error_cnt, max_overread, crc_mismatch_cnt); av_buffer_unref(&fp->slice_state); av_buffer_unref(&fp->slice_offset_buf); _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
