[FFmpeg-devel] [PATCH] v210enc: Add x86 SIMD

2014-11-22 Thread Michael Niedermayer
From: Kieran Kunhya 

Signed-off-by: Michael Niedermayer 
---
 libavcodec/v210enc.c  |   78 +
 libavcodec/v210enc.h  |   31 
 libavcodec/x86/Makefile   |2 ++
 libavcodec/x86/v210enc.asm|   75 +++
 libavcodec/x86/v210enc_init.c |   31 
 5 files changed, 194 insertions(+), 23 deletions(-)
 create mode 100644 libavcodec/v210enc.h
 create mode 100644 libavcodec/x86/v210enc.asm
 create mode 100644 libavcodec/x86/v210enc_init.c

diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c
index 1e53bdb..f4fc1fe 100644
--- a/libavcodec/v210enc.c
+++ b/libavcodec/v210enc.c
@@ -24,9 +24,37 @@
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
+#include "v210enc.h"
+
+#define CLIP(v) av_clip(v, 4, 1019)
+
+#define WRITE_PIXELS(a, b, c)   \
+do {\
+val =   CLIP(*a++); \
+val |= (CLIP(*b++) << 10) | \
+   (CLIP(*c++) << 20);  \
+AV_WL32(dst, val);  \
+dst += 4;   \
+} while (0)
+
+static void v210_planar_pack_c(const uint16_t *y, const uint16_t *u,
+   const uint16_t *v, uint8_t *dst, ptrdiff_t 
width)
+{
+uint32_t val;
+int i;
+
+for( i = 0; i < width-5; i += 6 ){
+WRITE_PIXELS(u, y, v);
+WRITE_PIXELS(y, u, y);
+WRITE_PIXELS(v, y, u);
+WRITE_PIXELS(y, v, y);
+}
+}
 
 static av_cold int encode_init(AVCodecContext *avctx)
 {
+V210EncContext *s = avctx->priv_data;
+
 if (avctx->width & 1) {
 av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n");
 return AVERROR(EINVAL);
@@ -42,12 +70,19 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 
+s->pack_line= v210_planar_pack_c;
+
+if (HAVE_MMX)
+v210enc_x86_init(s);
+
 return 0;
 }
 
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 const AVFrame *pic, int *got_packet)
 {
+V210EncContext *s = avctx->priv_data;
+
 int aligned_width = ((avctx->width + 47) / 48) * 48;
 int stride = aligned_width * 8 / 3;
 int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4;
@@ -55,47 +90,43 @@ static int encode_frame(AVCodecContext *avctx, AVPacket 
*pkt,
 const uint16_t *y = (const uint16_t*)pic->data[0];
 const uint16_t *u = (const uint16_t*)pic->data[1];
 const uint16_t *v = (const uint16_t*)pic->data[2];
-PutByteContext p;
+uint8_t *dst;
 
 if ((ret = ff_alloc_packet2(avctx, pkt, avctx->height * stride)) < 0)
 return ret;
 
-bytestream2_init_writer(&p, pkt->data, pkt->size);
-
-#define CLIP(v) av_clip(v, 4, 1019)
-
-#define WRITE_PIXELS(a, b, c)   \
-do {\
-val =   CLIP(*a++); \
-val |= (CLIP(*b++) << 10) | \
-   (CLIP(*c++) << 20);  \
-bytestream2_put_le32u(&p, val); \
-} while (0)
+dst = pkt->data;
 
 for (h = 0; h < avctx->height; h++) {
 uint32_t val;
-for (w = 0; w < avctx->width - 5; w += 6) {
-WRITE_PIXELS(u, y, v);
-WRITE_PIXELS(y, u, y);
-WRITE_PIXELS(v, y, u);
-WRITE_PIXELS(y, v, y);
-}
+w = (avctx->width / 6) * 6;
+s->pack_line(y, u, v, dst, w);
+
+y += w;
+u += w >> 1;
+v += w >> 1;
+dst += (w / 6) * 16;
 if (w < avctx->width - 1) {
 WRITE_PIXELS(u, y, v);
 
 val = CLIP(*y++);
-if (w == avctx->width - 2)
-bytestream2_put_le32u(&p, val);
+if (w == avctx->width - 2) {
+AV_WL32(dst, val);
+dst += 4;
+}
 if (w < avctx->width - 3) {
 val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20);
-bytestream2_put_le32u(&p, val);
+AV_WL32(dst, val);
+dst += 4;
 
 val = CLIP(*v++) | (CLIP(*y++) << 10);
-bytestream2_put_le32u(&p, val);
+AV_WL32(dst, val);
+dst += 4;
 }
 }
 
-bytestream2_set_buffer(&p, 0, line_padding);
+memset(dst, 0, line_padding);
+dst += line_padding;
 
 y += pic->linesize[0] / 2 - avctx->width;
 u += pic->linesize[1] / 2 - avctx->width / 2;
@@ -119,6 +150,7 @@ AVCodec ff_v210_encoder = {
 .long_name  = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
 .type   = AVMEDIA_TYPE_VIDEO,
 .id = AV_CODEC_ID_V210,
+.priv_data_size = sizeof(V210EncContext),
 .init   = encode_init,
 .encode2= encode_frame,
 .close  = encode_close,
diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h
new file m

Re: [FFmpeg-devel] [PATCH] v210enc: Add x86 SIMD

2014-11-22 Thread Carl Eugen Hoyos
Kieran Kunhya  obe.tv> writes:

> --- a/libavcodec/x86/Makefile
> +++ b/libavcodec/x86/Makefile
>  -47,6 +47,7 
>x86/rv40dsp_init.o
>  OBJS-$(CONFIG_SVQ1_ENCODER)+= x86/svq1enc.o
>  OBJS-$(CONFIG_TRUEHD_DECODER)  += x86/mlpdsp.o
> +OBJS-$(CONFIG_V210_ENCODER)+= x86/v210enc_init.o

This does not apply here, please update to current 
git head.

Carl Eugen

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] v210enc: Add x86 SIMD

2014-11-22 Thread Kieran Kunhya
---
 libavcodec/v210enc.c  | 78 ++-
 libavcodec/v210enc.h  | 31 +
 libavcodec/x86/Makefile   |  2 ++
 libavcodec/x86/v210enc.asm| 76 +
 libavcodec/x86/v210enc_init.c | 31 +
 5 files changed, 195 insertions(+), 23 deletions(-)
 create mode 100644 libavcodec/v210enc.h
 create mode 100644 libavcodec/x86/v210enc.asm
 create mode 100644 libavcodec/x86/v210enc_init.c

diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c
index ef0d6ab..4a6bdfc 100644
--- a/libavcodec/v210enc.c
+++ b/libavcodec/v210enc.c
@@ -24,9 +24,37 @@
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
+#include "v210enc.h"
+
+#define CLIP(v) av_clip(v, 4, 1019)
+
+#define WRITE_PIXELS(a, b, c)   \
+do {\
+val =   CLIP(*a++); \
+val |= (CLIP(*b++) << 10) | \
+   (CLIP(*c++) << 20);  \
+AV_WL32(dst, val);  \
+dst += 4;   \
+} while (0)
+
+static void v210_planar_pack_c(const uint16_t *y, const uint16_t *u,
+   const uint16_t *v, uint8_t *dst, ptrdiff_t 
width)
+{
+uint32_t val;
+int i;
+
+for( i = 0; i < width-5; i += 6 ){
+WRITE_PIXELS(u, y, v);
+WRITE_PIXELS(y, u, y);
+WRITE_PIXELS(v, y, u);
+WRITE_PIXELS(y, v, y);
+}
+}
 
 static av_cold int encode_init(AVCodecContext *avctx)
 {
+V210EncContext *s = avctx->priv_data;
+
 if (avctx->width & 1) {
 av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n");
 return AVERROR(EINVAL);
@@ -42,12 +70,19 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 
+s->pack_line= v210_planar_pack_c;
+
+if (HAVE_MMX)
+v210enc_x86_init(s);
+
 return 0;
 }
 
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 const AVFrame *pic, int *got_packet)
 {
+V210EncContext *s = avctx->priv_data;
+
 int aligned_width = ((avctx->width + 47) / 48) * 48;
 int stride = aligned_width * 8 / 3;
 int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4;
@@ -55,49 +90,45 @@ static int encode_frame(AVCodecContext *avctx, AVPacket 
*pkt,
 const uint16_t *y = (const uint16_t*)pic->data[0];
 const uint16_t *u = (const uint16_t*)pic->data[1];
 const uint16_t *v = (const uint16_t*)pic->data[2];
-PutByteContext p;
+uint8_t *dst;
 
 if ((ret = ff_alloc_packet(pkt, avctx->height * stride)) < 0) {
 av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 return ret;
 }
 
-bytestream2_init_writer(&p, pkt->data, pkt->size);
-
-#define CLIP(v) av_clip(v, 4, 1019)
-
-#define WRITE_PIXELS(a, b, c)   \
-do {\
-val =   CLIP(*a++); \
-val |= (CLIP(*b++) << 10) | \
-   (CLIP(*c++) << 20);  \
-bytestream2_put_le32u(&p, val); \
-} while (0)
+dst = pkt->data;
 
 for (h = 0; h < avctx->height; h++) {
 uint32_t val;
-for (w = 0; w < avctx->width - 5; w += 6) {
-WRITE_PIXELS(u, y, v);
-WRITE_PIXELS(y, u, y);
-WRITE_PIXELS(v, y, u);
-WRITE_PIXELS(y, v, y);
-}
+w = (avctx->width / 6) * 6;
+s->pack_line(y, u, v, dst, w);
+
+y += w;
+u += w >> 1;
+v += w >> 1;
+dst += (w / 6) * 16;
 if (w < avctx->width - 1) {
 WRITE_PIXELS(u, y, v);
 
 val = CLIP(*y++);
-if (w == avctx->width - 2)
-bytestream2_put_le32u(&p, val);
+if (w == avctx->width - 2) {
+AV_WL32(dst, val);
+dst += 4;
+}
 }
 if (w < avctx->width - 3) {
 val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20);
-bytestream2_put_le32u(&p, val);
+AV_WL32(dst, val);
+dst += 4;
 
 val = CLIP(*v++) | (CLIP(*y++) << 10);
-bytestream2_put_le32u(&p, val);
+AV_WL32(dst, val);
+dst += 4;
 }
 
-bytestream2_set_buffer(&p, 0, line_padding);
+memset(dst, 0, line_padding);
+dst += line_padding;
 
 y += pic->linesize[0] / 2 - avctx->width;
 u += pic->linesize[1] / 2 - avctx->width / 2;
@@ -121,6 +152,7 @@ AVCodec ff_v210_encoder = {
 .long_name  = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
 .type   = AVMEDIA_TYPE_VIDEO,
 .id = AV_CODEC_ID_V210,
+.priv_data_size = sizeof(V210EncContext),
 .init   = encode_init,
 .encode2= encode_frame,
 .close  = encode_close,
diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h
new file mode 100644
index 000..b8b

[FFmpeg-devel] [PATCH] v210enc: Add x86 SIMD

2014-11-22 Thread Kieran Kunhya
---
 libavcodec/v210enc.c| 78 ++---
 libavcodec/v210enc.h| 31 
 libavcodec/x86/Makefile |  2 ++
 3 files changed, 88 insertions(+), 23 deletions(-)
 create mode 100644 libavcodec/v210enc.h

diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c
index ef0d6ab..4a6bdfc 100644
--- a/libavcodec/v210enc.c
+++ b/libavcodec/v210enc.c
@@ -24,9 +24,37 @@
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
+#include "v210enc.h"
+
+#define CLIP(v) av_clip(v, 4, 1019)
+
+#define WRITE_PIXELS(a, b, c)   \
+do {\
+val =   CLIP(*a++); \
+val |= (CLIP(*b++) << 10) | \
+   (CLIP(*c++) << 20);  \
+AV_WL32(dst, val);  \
+dst += 4;   \
+} while (0)
+
+static void v210_planar_pack_c(const uint16_t *y, const uint16_t *u,
+   const uint16_t *v, uint8_t *dst, ptrdiff_t 
width)
+{
+uint32_t val;
+int i;
+
+for( i = 0; i < width-5; i += 6 ){
+WRITE_PIXELS(u, y, v);
+WRITE_PIXELS(y, u, y);
+WRITE_PIXELS(v, y, u);
+WRITE_PIXELS(y, v, y);
+}
+}
 
 static av_cold int encode_init(AVCodecContext *avctx)
 {
+V210EncContext *s = avctx->priv_data;
+
 if (avctx->width & 1) {
 av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n");
 return AVERROR(EINVAL);
@@ -42,12 +70,19 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 
+s->pack_line= v210_planar_pack_c;
+
+if (HAVE_MMX)
+v210enc_x86_init(s);
+
 return 0;
 }
 
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 const AVFrame *pic, int *got_packet)
 {
+V210EncContext *s = avctx->priv_data;
+
 int aligned_width = ((avctx->width + 47) / 48) * 48;
 int stride = aligned_width * 8 / 3;
 int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4;
@@ -55,49 +90,45 @@ static int encode_frame(AVCodecContext *avctx, AVPacket 
*pkt,
 const uint16_t *y = (const uint16_t*)pic->data[0];
 const uint16_t *u = (const uint16_t*)pic->data[1];
 const uint16_t *v = (const uint16_t*)pic->data[2];
-PutByteContext p;
+uint8_t *dst;
 
 if ((ret = ff_alloc_packet(pkt, avctx->height * stride)) < 0) {
 av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 return ret;
 }
 
-bytestream2_init_writer(&p, pkt->data, pkt->size);
-
-#define CLIP(v) av_clip(v, 4, 1019)
-
-#define WRITE_PIXELS(a, b, c)   \
-do {\
-val =   CLIP(*a++); \
-val |= (CLIP(*b++) << 10) | \
-   (CLIP(*c++) << 20);  \
-bytestream2_put_le32u(&p, val); \
-} while (0)
+dst = pkt->data;
 
 for (h = 0; h < avctx->height; h++) {
 uint32_t val;
-for (w = 0; w < avctx->width - 5; w += 6) {
-WRITE_PIXELS(u, y, v);
-WRITE_PIXELS(y, u, y);
-WRITE_PIXELS(v, y, u);
-WRITE_PIXELS(y, v, y);
-}
+w = (avctx->width / 6) * 6;
+s->pack_line(y, u, v, dst, w);
+
+y += w;
+u += w >> 1;
+v += w >> 1;
+dst += (w / 6) * 16;
 if (w < avctx->width - 1) {
 WRITE_PIXELS(u, y, v);
 
 val = CLIP(*y++);
-if (w == avctx->width - 2)
-bytestream2_put_le32u(&p, val);
+if (w == avctx->width - 2) {
+AV_WL32(dst, val);
+dst += 4;
+}
 }
 if (w < avctx->width - 3) {
 val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20);
-bytestream2_put_le32u(&p, val);
+AV_WL32(dst, val);
+dst += 4;
 
 val = CLIP(*v++) | (CLIP(*y++) << 10);
-bytestream2_put_le32u(&p, val);
+AV_WL32(dst, val);
+dst += 4;
 }
 
-bytestream2_set_buffer(&p, 0, line_padding);
+memset(dst, 0, line_padding);
+dst += line_padding;
 
 y += pic->linesize[0] / 2 - avctx->width;
 u += pic->linesize[1] / 2 - avctx->width / 2;
@@ -121,6 +152,7 @@ AVCodec ff_v210_encoder = {
 .long_name  = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
 .type   = AVMEDIA_TYPE_VIDEO,
 .id = AV_CODEC_ID_V210,
+.priv_data_size = sizeof(V210EncContext),
 .init   = encode_init,
 .encode2= encode_frame,
 .close  = encode_close,
diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h
new file mode 100644
index 000..b8b6143
--- /dev/null
+++ b/libavcodec/v210enc.h
@@ -0,0 +1,31 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as