On 6/26/2017 7:15 PM, Paul B Mahol wrote: > Signed-off-by: Paul B Mahol <one...@gmail.com> > --- > libavcodec/Makefile | 2 +- > libavcodec/utvideo.h | 2 + > libavcodec/utvideodec.c | 53 +++----------------- > libavcodec/utvideodsp.c | 82 +++++++++++++++++++++++++++++++ > libavcodec/utvideodsp.h | 39 +++++++++++++++ > libavcodec/x86/Makefile | 2 + > libavcodec/x86/utvideodsp.asm | 101 > +++++++++++++++++++++++++++++++++++++++ > libavcodec/x86/utvideodsp_init.c | 43 +++++++++++++++++ > 8 files changed, 277 insertions(+), 47 deletions(-) > create mode 100644 libavcodec/utvideodsp.c > create mode 100644 libavcodec/utvideodsp.h > create mode 100644 libavcodec/x86/utvideodsp.asm > create mode 100644 libavcodec/x86/utvideodsp_init.c > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index f0cba88..b440a00 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -583,7 +583,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o > ttaencdsp.o ttadata.o > OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o > OBJS-$(CONFIG_TXD_DECODER) += txd.o > OBJS-$(CONFIG_ULTI_DECODER) += ulti.o > -OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o > +OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o utvideodsp.o > OBJS-$(CONFIG_UTVIDEO_ENCODER) += utvideoenc.o utvideo.o > OBJS-$(CONFIG_V210_DECODER) += v210dec.o > OBJS-$(CONFIG_V210_ENCODER) += v210enc.o > diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h > index 9559c83..a811785 100644 > --- a/libavcodec/utvideo.h > +++ b/libavcodec/utvideo.h > @@ -30,6 +30,7 @@ > #include "libavutil/common.h" > #include "avcodec.h" > #include "bswapdsp.h" > +#include "utvideodsp.h" > #include "lossless_videodsp.h" > #include "lossless_videoencdsp.h" > > @@ -66,6 +67,7 @@ extern const int ff_ut_pred_order[5]; > typedef struct UtvideoContext { > const AVClass *class; > AVCodecContext *avctx; > + UTVideoDSPContext utdsp; > BswapDSPContext bdsp; > LLVidDSPContext llviddsp; > LLVidEncDSPContext llvidencdsp; > diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c > index 0c6f89e..44841aa 100644 > --- a/libavcodec/utvideodec.c > +++ b/libavcodec/utvideodec.c > @@ -333,50 +333,6 @@ fail: > return AVERROR_INVALIDDATA; > } > > -static void restore_rgb_planes(AVFrame *frame, int width, int height) > -{ > - uint8_t *src_r = (uint8_t *)frame->data[2]; > - uint8_t *src_g = (uint8_t *)frame->data[0]; > - uint8_t *src_b = (uint8_t *)frame->data[1]; > - uint8_t r, g, b; > - int i, j; > - > - for (j = 0; j < height; j++) { > - for (i = 0; i < width; i++) { > - r = src_r[i]; > - g = src_g[i]; > - b = src_b[i]; > - src_r[i] = r + g - 0x80; > - src_b[i] = b + g - 0x80; > - } > - src_r += frame->linesize[2]; > - src_g += frame->linesize[0]; > - src_b += frame->linesize[1]; > - } > -} > - > -static void restore_rgb_planes10(AVFrame *frame, int width, int height) > -{ > - uint16_t *src_r = (uint16_t *)frame->data[2]; > - uint16_t *src_g = (uint16_t *)frame->data[0]; > - uint16_t *src_b = (uint16_t *)frame->data[1]; > - int r, g, b; > - int i, j; > - > - for (j = 0; j < height; j++) { > - for (i = 0; i < width; i++) { > - r = src_r[i]; > - g = src_g[i]; > - b = src_b[i]; > - src_r[i] = (r + g - 0x200) & 0x3FF; > - src_b[i] = (b + g - 0x200) & 0x3FF; > - } > - src_r += frame->linesize[2] / 2; > - src_g += frame->linesize[0] / 2; > - src_b += frame->linesize[1] / 2; > - } > -} > - > #undef A > #undef B > #undef C > @@ -696,7 +652,9 @@ static int decode_frame(AVCodecContext *avctx, void > *data, int *got_frame, > } > } > } > - restore_rgb_planes(frame.f, avctx->width, avctx->height); > + c->utdsp.restore_rgb_planes(frame.f->data[2], frame.f->data[0], > frame.f->data[1], > + frame.f->linesize[2], > frame.f->linesize[0], frame.f->linesize[1], > + avctx->width, avctx->height); > break; > case AV_PIX_FMT_GBRAP10: > case AV_PIX_FMT_GBRP10: > @@ -709,7 +667,9 @@ static int decode_frame(AVCodecContext *avctx, void > *data, int *got_frame, > if (ret) > return ret; > } > - restore_rgb_planes10(frame.f, avctx->width, avctx->height); > + c->utdsp.restore_rgb_planes10((uint16_t *)frame.f->data[2], > (uint16_t *)frame.f->data[0], (uint16_t *)frame.f->data[1], > + frame.f->linesize[2] / 2, > frame.f->linesize[0] / 2, frame.f->linesize[1] / 2, > + avctx->width, avctx->height); > break; > case AV_PIX_FMT_YUV420P: > for (i = 0; i < 3; i++) { > @@ -830,6 +790,7 @@ static av_cold int decode_init(AVCodecContext *avctx) > > c->avctx = avctx; > > + ff_utvideodsp_init(&c->utdsp); > ff_bswapdsp_init(&c->bdsp); > ff_llviddsp_init(&c->llviddsp); > > diff --git a/libavcodec/utvideodsp.c b/libavcodec/utvideodsp.c > new file mode 100644 > index 0000000..0831a6b > --- /dev/null > +++ b/libavcodec/utvideodsp.c > @@ -0,0 +1,82 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include <stdint.h> > + > +#include "config.h" > +#include "libavutil/attributes.h" > +#include "utvideodsp.h" > + > +static void restore_rgb_planes_c(uint8_t *src_r, > + uint8_t *src_g, > + uint8_t *src_b, > + ptrdiff_t linesize_r, > + ptrdiff_t linesize_g, > + ptrdiff_t linesize_b, > + int width, int height) > +{ > + uint8_t r, g, b; > + int i, j; > + > + for (j = 0; j < height; j++) { > + for (i = 0; i < width; i++) { > + r = src_r[i]; > + g = src_g[i]; > + b = src_b[i]; > + src_r[i] = r + g - 0x80; > + src_b[i] = b + g - 0x80; > + } > + src_r += linesize_r; > + src_g += linesize_g; > + src_b += linesize_b; > + } > +} > + > +static void restore_rgb_planes10_c(uint16_t *src_r, > + uint16_t *src_g, > + uint16_t *src_b, > + ptrdiff_t linesize_r, > + ptrdiff_t linesize_g, > + ptrdiff_t linesize_b, > + int width, int height) > +{ > + int r, g, b; > + int i, j; > + > + for (j = 0; j < height; j++) { > + for (i = 0; i < width; i++) { > + r = src_r[i]; > + g = src_g[i]; > + b = src_b[i]; > + src_r[i] = (r + g - 0x200) & 0x3FF; > + src_b[i] = (b + g - 0x200) & 0x3FF; > + } > + src_r += linesize_r; > + src_g += linesize_g; > + src_b += linesize_b; > + } > +} > + > +av_cold void ff_utvideodsp_init(UTVideoDSPContext *c) > +{ > + c->restore_rgb_planes = restore_rgb_planes_c; > + c->restore_rgb_planes10 = restore_rgb_planes10_c; > + > + if (ARCH_X86) > + ff_utvideodsp_init_x86(c); > +} > diff --git a/libavcodec/utvideodsp.h b/libavcodec/utvideodsp.h > new file mode 100644 > index 0000000..a3d2550 > --- /dev/null > +++ b/libavcodec/utvideodsp.h > @@ -0,0 +1,39 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AVCODEC_UTVIDEODSP_H > +#define AVCODEC_UTVIDEODSP_H > + > +#include <stdint.h> > +#include <stddef.h> > +#include "libavutil/pixfmt.h" > +#include "config.h" > + > +typedef struct UTVideoDSPContext { > + void (*restore_rgb_planes)(uint8_t *src_r, uint8_t *src_g, uint8_t > *src_b, > + ptrdiff_t linesize_r, ptrdiff_t linesize_g, > + ptrdiff_t linesize_b, int width, int height); > + void (*restore_rgb_planes10)(uint16_t *src_r, uint16_t *src_g, uint16_t > *src_b, > + ptrdiff_t linesize_r, ptrdiff_t linesize_g, > + ptrdiff_t linesize_b, int width, int > height); > +} UTVideoDSPContext; > + > +void ff_utvideodsp_init(UTVideoDSPContext *c); > +void ff_utvideodsp_init_x86(UTVideoDSPContext *c); > + > +#endif /* AVCODEC_UTVIDEODSP_H */ > diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile > index b86700b..0dbc465 100644 > --- a/libavcodec/x86/Makefile > +++ b/libavcodec/x86/Makefile > @@ -65,6 +65,7 @@ OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o > OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o > OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o > OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o > +OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp_init.o > OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o > OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o > OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o > @@ -171,6 +172,7 @@ X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o > X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o > X86ASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o > X86ASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o > +X86ASM-OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp.o > X86ASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o > X86ASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o > X86ASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o > diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm > new file mode 100644 > index 0000000..2e96f8b > --- /dev/null > +++ b/libavcodec/x86/utvideodsp.asm > @@ -0,0 +1,101 @@ > +;****************************************************************************** > +;* SIMD-optimized UTVideo functions > +;* Copyright (c) 2017 Paul B Mahol > +;* > +;* This file is part of FFmpeg. > +;* > +;* FFmpeg is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* FFmpeg is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with FFmpeg; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > +;****************************************************************************** > + > +%include "libavutil/x86/x86util.asm"
Wrap everything below this line with %if ARCH_X86_64. Nothing in this file will assemble on x86_32 targets. > + > +SECTION_RODATA > + > +pb_128: times 16 db 128 > +pw_512: times 8 dw 512 > +pw_1023: times 8 dw 1023 > + > +SECTION .text > + > +INIT_XMM sse2 > + > +; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, > +; ptrdiff_t linesize_r, ptrdiff_t linesize_g, > ptrdiff_t linesize_b, > +; int width, int height) > +cglobal restore_rgb_planes, 8,9,4, src_r, src_g, src_b, linesize_r, > linesize_g, linesize_b, w, h, x > + movsxdifnidn wq, wd > + add src_rq, wq > + add src_gq, wq > + add src_bq, wq > + neg wq > + mova m3, [pb_128] > +.nextrow: > + mov xq, wq > + > + .loop: > + mova m0, [src_rq + xq] > + mova m1, [src_gq + xq] > + mova m2, [src_bq + xq] > + psubb m1, m3 > + paddb m0, m1 > + paddb m2, m1 > + mova [src_rq+xq], m0 > + mova [src_bq+xq], m2 > + add xq, mmsize > + jl .loop > + > + add src_rq, linesize_rq > + add src_gq, linesize_gq > + add src_bq, linesize_bq > + sub hq, 1 > + jg .nextrow > + REP_RET > + > +cglobal restore_rgb_planes10, 8,9,5, src_r, src_g, src_b, linesize_r, > linesize_g, linesize_b, w, h, x > + movsxd wq, wd No need for this line. As i said in my previous reply, using wd instead of wq in the shl below implicitly clears the high 32 bits of w on its own. > + shl wd, 1 > + shl linesize_rq, 1 > + shl linesize_gq, 1 > + shl linesize_bq, 1 > + add src_rq, wq > + add src_gq, wq > + add src_bq, wq > + mova m3, [pw_512] > + mova m4, [pw_1023] > + neg wq > +.nextrow: > + mov xq, wq > + > + .loop: > + mova m0, [src_rq + xq] > + mova m1, [src_gq + xq] > + mova m2, [src_bq + xq] > + paddw m0, m1 > + paddw m2, m1 > + psubw m0, m3 > + psubw m2, m3 > + pand m0, m4 > + pand m2, m4 > + mova [src_rq+xq], m0 > + mova [src_bq+xq], m2 > + add xq, mmsize > + jl .loop > + > + add src_rq, linesize_rq > + add src_gq, linesize_gq > + add src_bq, linesize_bq > + sub hq, 1 > + jg .nextrow > + REP_RET > diff --git a/libavcodec/x86/utvideodsp_init.c > b/libavcodec/x86/utvideodsp_init.c > new file mode 100644 > index 0000000..d415692 > --- /dev/null > +++ b/libavcodec/x86/utvideodsp_init.c > @@ -0,0 +1,43 @@ > +/* > + * Copyright (c) 2017 Paul B Mahol > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "config.h" > +#include "libavutil/attributes.h" > +#include "libavutil/cpu.h" > +#include "libavutil/x86/asm.h" > +#include "libavutil/x86/cpu.h" > +#include "libavcodec/utvideodsp.h" > + > +void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t > *src_b, > + ptrdiff_t linesize_r, ptrdiff_t linesize_g, > + ptrdiff_t linesize_b, int width, int height); > +void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t > *src_b, > + ptrdiff_t linesize_r, ptrdiff_t linesize_g, > + ptrdiff_t linesize_b, int width, int > height); > + > +av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c) > +{ > + int cpu_flags = av_get_cpu_flags(); > + > + if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { > + c->restore_rgb_planes = ff_restore_rgb_planes_sse2; > + c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2; > + } > +} > Should be good if output is bitexact with c. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel