[FFmpeg-devel] [PATCH] v210enc: Add x86 SIMD
From: Kieran Kunhya Signed-off-by: Michael Niedermayer --- libavcodec/v210enc.c | 78 + libavcodec/v210enc.h | 31 libavcodec/x86/Makefile |2 ++ libavcodec/x86/v210enc.asm| 75 +++ libavcodec/x86/v210enc_init.c | 31 5 files changed, 194 insertions(+), 23 deletions(-) create mode 100644 libavcodec/v210enc.h create mode 100644 libavcodec/x86/v210enc.asm create mode 100644 libavcodec/x86/v210enc_init.c diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c index 1e53bdb..f4fc1fe 100644 --- a/libavcodec/v210enc.c +++ b/libavcodec/v210enc.c @@ -24,9 +24,37 @@ #include "avcodec.h" #include "bytestream.h" #include "internal.h" +#include "v210enc.h" + +#define CLIP(v) av_clip(v, 4, 1019) + +#define WRITE_PIXELS(a, b, c) \ +do {\ +val = CLIP(*a++); \ +val |= (CLIP(*b++) << 10) | \ + (CLIP(*c++) << 20); \ +AV_WL32(dst, val); \ +dst += 4; \ +} while (0) + +static void v210_planar_pack_c(const uint16_t *y, const uint16_t *u, + const uint16_t *v, uint8_t *dst, ptrdiff_t width) +{ +uint32_t val; +int i; + +for( i = 0; i < width-5; i += 6 ){ +WRITE_PIXELS(u, y, v); +WRITE_PIXELS(y, u, y); +WRITE_PIXELS(v, y, u); +WRITE_PIXELS(y, v, y); +} +} static av_cold int encode_init(AVCodecContext *avctx) { +V210EncContext *s = avctx->priv_data; + if (avctx->width & 1) { av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n"); return AVERROR(EINVAL); @@ -42,12 +70,19 @@ static av_cold int encode_init(AVCodecContext *avctx) avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; +s->pack_line= v210_planar_pack_c; + +if (HAVE_MMX) +v210enc_x86_init(s); + return 0; } static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *pic, int *got_packet) { +V210EncContext *s = avctx->priv_data; + int aligned_width = ((avctx->width + 47) / 48) * 48; int stride = aligned_width * 8 / 3; int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4; @@ -55,47 +90,43 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const uint16_t *y = (const uint16_t*)pic->data[0]; const uint16_t *u = (const uint16_t*)pic->data[1]; const uint16_t *v = (const uint16_t*)pic->data[2]; -PutByteContext p; +uint8_t *dst; if ((ret = ff_alloc_packet2(avctx, pkt, avctx->height * stride)) < 0) return ret; -bytestream2_init_writer(&p, pkt->data, pkt->size); - -#define CLIP(v) av_clip(v, 4, 1019) - -#define WRITE_PIXELS(a, b, c) \ -do {\ -val = CLIP(*a++); \ -val |= (CLIP(*b++) << 10) | \ - (CLIP(*c++) << 20); \ -bytestream2_put_le32u(&p, val); \ -} while (0) +dst = pkt->data; for (h = 0; h < avctx->height; h++) { uint32_t val; -for (w = 0; w < avctx->width - 5; w += 6) { -WRITE_PIXELS(u, y, v); -WRITE_PIXELS(y, u, y); -WRITE_PIXELS(v, y, u); -WRITE_PIXELS(y, v, y); -} +w = (avctx->width / 6) * 6; +s->pack_line(y, u, v, dst, w); + +y += w; +u += w >> 1; +v += w >> 1; +dst += (w / 6) * 16; if (w < avctx->width - 1) { WRITE_PIXELS(u, y, v); val = CLIP(*y++); -if (w == avctx->width - 2) -bytestream2_put_le32u(&p, val); +if (w == avctx->width - 2) { +AV_WL32(dst, val); +dst += 4; +} if (w < avctx->width - 3) { val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20); -bytestream2_put_le32u(&p, val); +AV_WL32(dst, val); +dst += 4; val = CLIP(*v++) | (CLIP(*y++) << 10); -bytestream2_put_le32u(&p, val); +AV_WL32(dst, val); +dst += 4; } } -bytestream2_set_buffer(&p, 0, line_padding); +memset(dst, 0, line_padding); +dst += line_padding; y += pic->linesize[0] / 2 - avctx->width; u += pic->linesize[1] / 2 - avctx->width / 2; @@ -119,6 +150,7 @@ AVCodec ff_v210_encoder = { .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), .type = AVMEDIA_TYPE_VIDEO, .id = AV_CODEC_ID_V210, +.priv_data_size = sizeof(V210EncContext), .init = encode_init, .encode2= encode_frame, .close = encode_close, diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h new file m
Re: [FFmpeg-devel] [PATCH] v210enc: Add x86 SIMD
Kieran Kunhya obe.tv> writes: > --- a/libavcodec/x86/Makefile > +++ b/libavcodec/x86/Makefile > -47,6 +47,7 >x86/rv40dsp_init.o > OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc.o > OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o > +OBJS-$(CONFIG_V210_ENCODER)+= x86/v210enc_init.o This does not apply here, please update to current git head. Carl Eugen ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] v210enc: Add x86 SIMD
--- libavcodec/v210enc.c | 78 ++- libavcodec/v210enc.h | 31 + libavcodec/x86/Makefile | 2 ++ libavcodec/x86/v210enc.asm| 76 + libavcodec/x86/v210enc_init.c | 31 + 5 files changed, 195 insertions(+), 23 deletions(-) create mode 100644 libavcodec/v210enc.h create mode 100644 libavcodec/x86/v210enc.asm create mode 100644 libavcodec/x86/v210enc_init.c diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c index ef0d6ab..4a6bdfc 100644 --- a/libavcodec/v210enc.c +++ b/libavcodec/v210enc.c @@ -24,9 +24,37 @@ #include "avcodec.h" #include "bytestream.h" #include "internal.h" +#include "v210enc.h" + +#define CLIP(v) av_clip(v, 4, 1019) + +#define WRITE_PIXELS(a, b, c) \ +do {\ +val = CLIP(*a++); \ +val |= (CLIP(*b++) << 10) | \ + (CLIP(*c++) << 20); \ +AV_WL32(dst, val); \ +dst += 4; \ +} while (0) + +static void v210_planar_pack_c(const uint16_t *y, const uint16_t *u, + const uint16_t *v, uint8_t *dst, ptrdiff_t width) +{ +uint32_t val; +int i; + +for( i = 0; i < width-5; i += 6 ){ +WRITE_PIXELS(u, y, v); +WRITE_PIXELS(y, u, y); +WRITE_PIXELS(v, y, u); +WRITE_PIXELS(y, v, y); +} +} static av_cold int encode_init(AVCodecContext *avctx) { +V210EncContext *s = avctx->priv_data; + if (avctx->width & 1) { av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n"); return AVERROR(EINVAL); @@ -42,12 +70,19 @@ static av_cold int encode_init(AVCodecContext *avctx) avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; +s->pack_line= v210_planar_pack_c; + +if (HAVE_MMX) +v210enc_x86_init(s); + return 0; } static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *pic, int *got_packet) { +V210EncContext *s = avctx->priv_data; + int aligned_width = ((avctx->width + 47) / 48) * 48; int stride = aligned_width * 8 / 3; int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4; @@ -55,49 +90,45 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const uint16_t *y = (const uint16_t*)pic->data[0]; const uint16_t *u = (const uint16_t*)pic->data[1]; const uint16_t *v = (const uint16_t*)pic->data[2]; -PutByteContext p; +uint8_t *dst; if ((ret = ff_alloc_packet(pkt, avctx->height * stride)) < 0) { av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n"); return ret; } -bytestream2_init_writer(&p, pkt->data, pkt->size); - -#define CLIP(v) av_clip(v, 4, 1019) - -#define WRITE_PIXELS(a, b, c) \ -do {\ -val = CLIP(*a++); \ -val |= (CLIP(*b++) << 10) | \ - (CLIP(*c++) << 20); \ -bytestream2_put_le32u(&p, val); \ -} while (0) +dst = pkt->data; for (h = 0; h < avctx->height; h++) { uint32_t val; -for (w = 0; w < avctx->width - 5; w += 6) { -WRITE_PIXELS(u, y, v); -WRITE_PIXELS(y, u, y); -WRITE_PIXELS(v, y, u); -WRITE_PIXELS(y, v, y); -} +w = (avctx->width / 6) * 6; +s->pack_line(y, u, v, dst, w); + +y += w; +u += w >> 1; +v += w >> 1; +dst += (w / 6) * 16; if (w < avctx->width - 1) { WRITE_PIXELS(u, y, v); val = CLIP(*y++); -if (w == avctx->width - 2) -bytestream2_put_le32u(&p, val); +if (w == avctx->width - 2) { +AV_WL32(dst, val); +dst += 4; +} } if (w < avctx->width - 3) { val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20); -bytestream2_put_le32u(&p, val); +AV_WL32(dst, val); +dst += 4; val = CLIP(*v++) | (CLIP(*y++) << 10); -bytestream2_put_le32u(&p, val); +AV_WL32(dst, val); +dst += 4; } -bytestream2_set_buffer(&p, 0, line_padding); +memset(dst, 0, line_padding); +dst += line_padding; y += pic->linesize[0] / 2 - avctx->width; u += pic->linesize[1] / 2 - avctx->width / 2; @@ -121,6 +152,7 @@ AVCodec ff_v210_encoder = { .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), .type = AVMEDIA_TYPE_VIDEO, .id = AV_CODEC_ID_V210, +.priv_data_size = sizeof(V210EncContext), .init = encode_init, .encode2= encode_frame, .close = encode_close, diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h new file mode 100644 index 000..b8b
[FFmpeg-devel] [PATCH] v210enc: Add x86 SIMD
--- libavcodec/v210enc.c| 78 ++--- libavcodec/v210enc.h| 31 libavcodec/x86/Makefile | 2 ++ 3 files changed, 88 insertions(+), 23 deletions(-) create mode 100644 libavcodec/v210enc.h diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c index ef0d6ab..4a6bdfc 100644 --- a/libavcodec/v210enc.c +++ b/libavcodec/v210enc.c @@ -24,9 +24,37 @@ #include "avcodec.h" #include "bytestream.h" #include "internal.h" +#include "v210enc.h" + +#define CLIP(v) av_clip(v, 4, 1019) + +#define WRITE_PIXELS(a, b, c) \ +do {\ +val = CLIP(*a++); \ +val |= (CLIP(*b++) << 10) | \ + (CLIP(*c++) << 20); \ +AV_WL32(dst, val); \ +dst += 4; \ +} while (0) + +static void v210_planar_pack_c(const uint16_t *y, const uint16_t *u, + const uint16_t *v, uint8_t *dst, ptrdiff_t width) +{ +uint32_t val; +int i; + +for( i = 0; i < width-5; i += 6 ){ +WRITE_PIXELS(u, y, v); +WRITE_PIXELS(y, u, y); +WRITE_PIXELS(v, y, u); +WRITE_PIXELS(y, v, y); +} +} static av_cold int encode_init(AVCodecContext *avctx) { +V210EncContext *s = avctx->priv_data; + if (avctx->width & 1) { av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n"); return AVERROR(EINVAL); @@ -42,12 +70,19 @@ static av_cold int encode_init(AVCodecContext *avctx) avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; +s->pack_line= v210_planar_pack_c; + +if (HAVE_MMX) +v210enc_x86_init(s); + return 0; } static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *pic, int *got_packet) { +V210EncContext *s = avctx->priv_data; + int aligned_width = ((avctx->width + 47) / 48) * 48; int stride = aligned_width * 8 / 3; int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4; @@ -55,49 +90,45 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const uint16_t *y = (const uint16_t*)pic->data[0]; const uint16_t *u = (const uint16_t*)pic->data[1]; const uint16_t *v = (const uint16_t*)pic->data[2]; -PutByteContext p; +uint8_t *dst; if ((ret = ff_alloc_packet(pkt, avctx->height * stride)) < 0) { av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n"); return ret; } -bytestream2_init_writer(&p, pkt->data, pkt->size); - -#define CLIP(v) av_clip(v, 4, 1019) - -#define WRITE_PIXELS(a, b, c) \ -do {\ -val = CLIP(*a++); \ -val |= (CLIP(*b++) << 10) | \ - (CLIP(*c++) << 20); \ -bytestream2_put_le32u(&p, val); \ -} while (0) +dst = pkt->data; for (h = 0; h < avctx->height; h++) { uint32_t val; -for (w = 0; w < avctx->width - 5; w += 6) { -WRITE_PIXELS(u, y, v); -WRITE_PIXELS(y, u, y); -WRITE_PIXELS(v, y, u); -WRITE_PIXELS(y, v, y); -} +w = (avctx->width / 6) * 6; +s->pack_line(y, u, v, dst, w); + +y += w; +u += w >> 1; +v += w >> 1; +dst += (w / 6) * 16; if (w < avctx->width - 1) { WRITE_PIXELS(u, y, v); val = CLIP(*y++); -if (w == avctx->width - 2) -bytestream2_put_le32u(&p, val); +if (w == avctx->width - 2) { +AV_WL32(dst, val); +dst += 4; +} } if (w < avctx->width - 3) { val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20); -bytestream2_put_le32u(&p, val); +AV_WL32(dst, val); +dst += 4; val = CLIP(*v++) | (CLIP(*y++) << 10); -bytestream2_put_le32u(&p, val); +AV_WL32(dst, val); +dst += 4; } -bytestream2_set_buffer(&p, 0, line_padding); +memset(dst, 0, line_padding); +dst += line_padding; y += pic->linesize[0] / 2 - avctx->width; u += pic->linesize[1] / 2 - avctx->width / 2; @@ -121,6 +152,7 @@ AVCodec ff_v210_encoder = { .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), .type = AVMEDIA_TYPE_VIDEO, .id = AV_CODEC_ID_V210, +.priv_data_size = sizeof(V210EncContext), .init = encode_init, .encode2= encode_frame, .close = encode_close, diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h new file mode 100644 index 000..b8b6143 --- /dev/null +++ b/libavcodec/v210enc.h @@ -0,0 +1,31 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as