This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit d8cb567171e3f3d87414f01eeeb3c3cafff0f173 Author: Lynne <[email protected]> AuthorDate: Fri May 15 02:46:11 2026 +0900 Commit: Lynne <[email protected]> CommitDate: Sun May 17 12:02:52 2026 +0900 prores_raw: fix tile alignment issues Reverse engineered the decoder a bit more. All tiles are always 16x1. The issue is that at the edges, tiles don't have the same width. Instead, the first tile that starts to clip is half, and then the next tile after that is also half the previous tile's width. --- libavcodec/prores_raw.c | 67 +++++++++++++++------------ libavcodec/prores_raw.h | 3 +- libavcodec/vulkan/prores_raw_decode.comp.glsl | 9 +--- libavcodec/vulkan/prores_raw_idct.comp.glsl | 13 +++--- libavcodec/vulkan_prores_raw.c | 5 +- 5 files changed, 49 insertions(+), 48 deletions(-) diff --git a/libavcodec/prores_raw.c b/libavcodec/prores_raw.c index c1c05fd959..375a4f2c99 100644 --- a/libavcodec/prores_raw.c +++ b/libavcodec/prores_raw.c @@ -20,6 +20,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/avassert.h" #include "libavutil/intreadwrite.h" #include "libavutil/mem_internal.h" #include "libavutil/mem.h" @@ -131,11 +132,10 @@ static int decode_comp(AVCodecContext *avctx, TileContext *tile, uint16_t *dst = (uint16_t *)(frame->data[0] + tile->y*frame->linesize[0] + 2*tile->x); int idx; - const int w = FFMIN(s->tw, avctx->width - tile->x) / 2; - const int nb_blocks = w / 8; - const int log2_nb_blocks = 31 - ff_clz(nb_blocks); - const int block_mask = (1 << log2_nb_blocks) - 1; - const int nb_codes = 64 * nb_blocks; + const int log2_nb_blocks = tile->log2_nb_blocks; + const int nb_blocks = 1 << log2_nb_blocks; + const int block_mask = nb_blocks - 1; + const int nb_codes = 64 * nb_blocks; LOCAL_ALIGNED_32(int16_t, block, [64*16]); @@ -426,15 +426,13 @@ static int decode_frame(AVCodecContext *avctx, ff_permute_scantable(s->qmat, s->prodsp.idct_permutation, qmat); - s->nb_tw = (w + 15) >> 4; + int tw16 = (w + 15) >> 4; + s->nb_tw = (tw16 >> align) + av_popcount(~(-1 * (1 << align)) & tw16); s->nb_th = (h + 15) >> 4; - s->nb_tw = (s->nb_tw >> align) + av_popcount(~(-1 * (1 << align)) & s->nb_tw); s->nb_tiles = s->nb_tw * s->nb_th; av_log(avctx, AV_LOG_DEBUG, "%dx%d | nb_tiles: %d\n", s->nb_tw, s->nb_th, s->nb_tiles); - s->tw = s->version == 0 ? 128 : 256; s->th = 16; - av_log(avctx, AV_LOG_DEBUG, "tile_size: %dx%d\n", s->tw, s->th); av_fast_mallocz(&s->tiles, &s->tiles_size, s->nb_tiles * sizeof(*s->tiles)); if (!s->tiles) @@ -443,29 +441,38 @@ static int decode_frame(AVCodecContext *avctx, if (bytestream2_get_bytes_left(&gb) < s->nb_tiles * 2) return AVERROR_INVALIDDATA; - /* Read tile data offsets */ + /* First tile that extends past the right edge gets halved in width, + * next one gets quartered, and so on */ int offset = bytestream2_tell(&gb) + s->nb_tiles * 2; - for (int n = 0; n < s->nb_tiles; n++) { - TileContext *tile = &s->tiles[n]; - - int size = bytestream2_get_be16(&gb); - if (offset >= avpkt->size) - return AVERROR_INVALIDDATA; - if (size >= avpkt->size) - return AVERROR_INVALIDDATA; - if (offset > avpkt->size - size) - return AVERROR_INVALIDDATA; - - bytestream2_init(&tile->gb, avpkt->data + offset, size); - - tile->y = (n / s->nb_tw) * s->th; - tile->x = (n % s->nb_tw) * s->tw; - - if (avctx->width - tile->x < 16) - return AVERROR_PATCHWELCOME; - - offset += size; + int n = 0; + for (int ty = 0; ty < s->nb_th; ty++) { + unsigned tx = 0; + int rem = tw16; + for (int e = align; rem > 0; e--) { + int unit = 1 << e; + while (unit <= rem) { + TileContext *tile = &s->tiles[n++]; + int size = bytestream2_get_be16(&gb); + + if (offset >= avpkt->size) + return AVERROR_INVALIDDATA; + if (size >= avpkt->size) + return AVERROR_INVALIDDATA; + if (offset > avpkt->size - size) + return AVERROR_INVALIDDATA; + + bytestream2_init(&tile->gb, avpkt->data + offset, size); + tile->x = tx * 16; + tile->y = ty * s->th; + tile->log2_nb_blocks = e; + offset += size; + + tx += unit; + rem -= unit; + } + } } + av_assert1(n == s->nb_tiles); ret = ff_thread_get_buffer(avctx, frame, 0); if (ret < 0) diff --git a/libavcodec/prores_raw.h b/libavcodec/prores_raw.h index 3ac8068dd5..23b55661e4 100644 --- a/libavcodec/prores_raw.h +++ b/libavcodec/prores_raw.h @@ -33,6 +33,7 @@ typedef struct TileContext { GetByteContext gb; unsigned x, y; + int log2_nb_blocks; } TileContext; typedef struct ProResRAWContext { @@ -42,7 +43,7 @@ typedef struct ProResRAWContext { TileContext *tiles; unsigned int tiles_size; int nb_tiles; - int tw, th; + int th; int nb_tw, nb_th; enum AVPixelFormat pix_fmt; diff --git a/libavcodec/vulkan/prores_raw_decode.comp.glsl b/libavcodec/vulkan/prores_raw_decode.comp.glsl index c1ab920e27..92859d59d0 100644 --- a/libavcodec/vulkan/prores_raw_decode.comp.glsl +++ b/libavcodec/vulkan/prores_raw_decode.comp.glsl @@ -30,6 +30,7 @@ struct TileData { ivec2 pos; uint offset; uint size; + uint log2_nb_blocks; }; layout (set = 0, binding = 0, r16ui) uniform writeonly uimage2D dst; @@ -39,7 +40,6 @@ layout (set = 0, binding = 1, scalar) readonly buffer frame_data_buf { layout (push_constant, scalar) uniform pushConstants { u8buf pkt_data; - ivec2 tile_size; }; #define COMP_ID (gl_LocalInvocationID.y) @@ -215,10 +215,6 @@ void main(void) const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; TileData td = tile_data[tile_idx]; - int width = imageSize(dst).x; - if (expectEXT(td.pos.x >= width, false)) - return; - uint64_t pkt_offset = uint64_t(pkt_data) + td.offset; u8vec2buf hdr_data = u8vec2buf(pkt_offset); int header_len = hdr_data[0].v.x >> 3; @@ -232,8 +228,7 @@ void main(void) return; const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1); - const int w = min(tile_size.x, width - td.pos.x) >> 1; - const int nb_blocks = w >> 3; + const int nb_blocks = 1 << td.log2_nb_blocks; const ivec4 comp_offset = ivec4(size[2] + size[1] + size[3], size[2], diff --git a/libavcodec/vulkan/prores_raw_idct.comp.glsl b/libavcodec/vulkan/prores_raw_idct.comp.glsl index 15af6d5a3f..3393ea3402 100644 --- a/libavcodec/vulkan/prores_raw_idct.comp.glsl +++ b/libavcodec/vulkan/prores_raw_idct.comp.glsl @@ -30,6 +30,7 @@ struct TileData { ivec2 pos; uint offset; uint size; + uint log2_nb_blocks; }; layout (set = 0, binding = 0, r16ui) uniform uimage2D dst; @@ -39,7 +40,6 @@ layout (set = 0, binding = 1, scalar) readonly buffer frame_data_buf { layout (push_constant, scalar) uniform pushConstants { u8buf pkt_data; - ivec2 tile_size; uint8_t qmat[64]; }; @@ -73,17 +73,12 @@ void main(void) const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; TileData td = tile_data[tile_idx]; - int width = imageSize(dst).x; - if (expectEXT(td.pos.x >= width, false)) - return; - uint64_t pkt_offset = uint64_t(pkt_data) + td.offset; u8vec2buf hdr_data = u8vec2buf(pkt_offset); int qscale = pack16(hdr_data[0].v.yx); const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1); - const uint w = min(tile_size.x, width - td.pos.x) >> 1; - const uint nb_blocks = w >> 3; + const uint nb_blocks = 1 << td.log2_nb_blocks; /* Copy push-constant qmat into shared memory for fast non-uniform access */ if (gl_LocalInvocationIndex < 64) @@ -110,6 +105,10 @@ void main(void) idct8(BLOCK_ID, COMP_ID*72 + ROW_ID * 9, 1); barrier(); + /* Border tile check */ + if (BLOCK_ID >= nb_blocks) + return; + [[unroll]] for (uint y = 0; y < 8; y++) { int v = int(round(blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID]*4095.0)); diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c index 392b74a863..953b67d592 100644 --- a/libavcodec/vulkan_prores_raw.c +++ b/libavcodec/vulkan_prores_raw.c @@ -51,7 +51,6 @@ typedef struct ProResRAWVulkanDecodeContext { typedef struct DecodePushData { VkDeviceAddress pkt_data; - int32_t tile_size[2]; uint8_t qmat[64]; } DecodePushData; @@ -59,6 +58,7 @@ typedef struct TileData { int32_t pos[2]; uint32_t offset; uint32_t size; + uint32_t log2_nb_blocks; } TileData; static int vk_prores_raw_start_frame(AVCodecContext *avctx, @@ -118,6 +118,7 @@ static int vk_prores_raw_decode_slice(AVCodecContext *avctx, td[pp->nb_tiles].pos[0] = prr->tiles[pp->nb_tiles].x; td[pp->nb_tiles].pos[1] = prr->tiles[pp->nb_tiles].y; td[pp->nb_tiles].size = size; + td[pp->nb_tiles].log2_nb_blocks = prr->tiles[pp->nb_tiles].log2_nb_blocks; if (vp->slices_buf && slices_buf->host_ref) { td[pp->nb_tiles].offset = data - slices_buf->mapped_mem; @@ -229,8 +230,6 @@ static int vk_prores_raw_end_frame(AVCodecContext *avctx) /* Update push data */ DecodePushData pd_decode = (DecodePushData) { .pkt_data = slices_buf->address, - .tile_size[0] = prr->tw, - .tile_size[1] = prr->th, }; memcpy(pd_decode.qmat, prr->qmat, 64); ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
