ffmpeg | branch: master | Shivraj Patil <shivraj.pa...@imgtec.com> | Mon Jun 29 21:15:13 2015 +0530| [2f3f98af2b3215b7f3ab302275a0b3b4acaf84a5] | committer: Michael Niedermayer
avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for mpegvideoencdsp functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for mpegvideoencdsp functions in new file mpegvideoencdsp_msa.c Signed-off-by: Shivraj Patil <shivraj.pa...@imgtec.com> Signed-off-by: Michael Niedermayer <michae...@gmx.at> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2f3f98af2b3215b7f3ab302275a0b3b4acaf84a5 --- libavcodec/mips/Makefile | 2 + libavcodec/mips/mpegvideoencdsp_init_mips.c | 40 +++++++++++++++++ libavcodec/mips/mpegvideoencdsp_msa.c | 62 +++++++++++++++++++++++++++ libavcodec/mpegvideoencdsp.c | 2 + libavcodec/mpegvideoencdsp.h | 2 + libavutil/mips/generic_macros_msa.h | 34 +++++++++++++++ 6 files changed, 142 insertions(+) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 277ac2a..59c1f79 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -30,6 +30,7 @@ OBJS-$(CONFIG_HPELDSP) += mips/hpeldsp_init_mips.o OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_init_mips.o OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o +OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \ mips/hevc_mc_uni_msa.o \ mips/hevc_mc_uniw_msa.o \ @@ -49,5 +50,6 @@ MSA-OBJS-$(CONFIG_HPELDSP) += mips/hpeldsp_msa.o MSA-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_msa.o MSA-OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_msa.o MSA-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_msa.o +MSA-OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/mpegvideoencdsp_init_mips.c b/libavcodec/mips/mpegvideoencdsp_init_mips.c new file mode 100644 index 0000000..9bfe94e --- /dev/null +++ b/libavcodec/mips/mpegvideoencdsp_init_mips.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/bit_depth_template.c" +#include "h263dsp_mips.h" + +#if HAVE_MSA +static av_cold void mpegvideoencdsp_init_msa(MpegvideoEncDSPContext *c, + AVCodecContext *avctx) +{ +#if BIT_DEPTH == 8 + c->pix_sum = ff_pix_sum_msa; +#endif +} +#endif // #if HAVE_MSA + +av_cold void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c, + AVCodecContext *avctx) +{ +#if HAVE_MSA + mpegvideoencdsp_init_msa(c, avctx); +#endif // #if HAVE_MSA +} diff --git a/libavcodec/mips/mpegvideoencdsp_msa.c b/libavcodec/mips/mpegvideoencdsp_msa.c new file mode 100644 index 0000000..46473da --- /dev/null +++ b/libavcodec/mips/mpegvideoencdsp_msa.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "h263dsp_mips.h" +#include "libavutil/mips/generic_macros_msa.h" + +static int32_t sum_u8src_16width_msa(uint8_t *src, int32_t stride) +{ + uint32_t sum = 0; + v16u8 in0, in1, in2, in3, in4, in5, in6, in7; + v16u8 in8, in9, in10, in11, in12, in13, in14, in15; + + LD_UB8(src, stride, in0, in1, in2, in3, in4, in5, in6, in7); + src += (8 * stride); + LD_UB8(src, stride, in8, in9, in10, in11, in12, in13, in14, in15); + + HADD_UB4_UB(in0, in1, in2, in3, in0, in1, in2, in3); + HADD_UB4_UB(in4, in5, in6, in7, in4, in5, in6, in7); + HADD_UB4_UB(in8, in9, in10, in11, in8, in9, in10, in11); + HADD_UB4_UB(in12, in13, in14, in15, in12, in13, in14, in15); + + sum = HADD_UH_U32(in0); + sum += HADD_UH_U32(in1); + sum += HADD_UH_U32(in2); + sum += HADD_UH_U32(in3); + sum += HADD_UH_U32(in4); + sum += HADD_UH_U32(in5); + sum += HADD_UH_U32(in6); + sum += HADD_UH_U32(in7); + sum += HADD_UH_U32(in8); + sum += HADD_UH_U32(in9); + sum += HADD_UH_U32(in10); + sum += HADD_UH_U32(in11); + sum += HADD_UH_U32(in12); + sum += HADD_UH_U32(in13); + sum += HADD_UH_U32(in14); + sum += HADD_UH_U32(in15); + + return sum; +} + +int ff_pix_sum_msa(uint8_t *pix, int line_size) +{ + return sum_u8src_16width_msa(pix, line_size); +} diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c index 860c2d8..d6caf4a 100644 --- a/libavcodec/mpegvideoencdsp.c +++ b/libavcodec/mpegvideoencdsp.c @@ -176,4 +176,6 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, ff_mpegvideoencdsp_init_ppc(c, avctx); if (ARCH_X86) ff_mpegvideoencdsp_init_x86(c, avctx); + if (ARCH_MIPS) + ff_mpegvideoencdsp_init_mips(c, avctx); } diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h index e12f4c6..33f0282 100644 --- a/libavcodec/mpegvideoencdsp.h +++ b/libavcodec/mpegvideoencdsp.h @@ -52,5 +52,7 @@ void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, AVCodecContext *avctx); void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, AVCodecContext *avctx); +void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c, + AVCodecContext *avctx); #endif /* AVCODEC_MPEGVIDEOENCDSP_H */ diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index 3f44ec9..b1e62b6 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -1262,6 +1262,15 @@ } #define HADD_UB3_UH(...) HADD_UB3(v8u16, __VA_ARGS__) +#define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ +{ \ + HADD_UB2(RTYPE, in0, in1, out0, out1); \ + HADD_UB2(RTYPE, in2, in3, out2, out3); \ +} +#define HADD_UB4_UB(...) HADD_UB4(v16u8, __VA_ARGS__) +#define HADD_UB4_UH(...) HADD_UB4(v8u16, __VA_ARGS__) +#define HADD_UB4_SH(...) HADD_UB4(v8i16, __VA_ARGS__) + /* Description : Horizontal subtraction of unsigned byte vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 @@ -1771,6 +1780,15 @@ #define SPLATI_H2_SB(...) SPLATI_H2(v16i8, __VA_ARGS__) #define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__) +#define SPLATI_H3(RTYPE, in, idx0, idx1, idx2, \ + out0, out1, out2) \ +{ \ + SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1); \ + out2 = (RTYPE) __msa_splati_h((v8i16) in, idx2); \ +} +#define SPLATI_H3_SB(...) SPLATI_H3(v16i8, __VA_ARGS__) +#define SPLATI_H3_SH(...) SPLATI_H3(v8i16, __VA_ARGS__) + #define SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3, \ out0, out1, out2, out3) \ { \ @@ -2823,4 +2841,20 @@ tmp_m = __msa_pckev_b((v16i8) in1, (v16i8) in0); \ ST_SB(tmp_m, (pdst)); \ } + +/* Description : Horizontal 2 tap filter kernel code + Arguments : Inputs - in0, in1, mask, coeff, shift +*/ +#define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) \ +( { \ + v16i8 tmp0_m; \ + v8u16 tmp1_m; \ + \ + tmp0_m = __msa_vshf_b((v16i8) mask, (v16i8) in1, (v16i8) in0); \ + tmp1_m = __msa_dotp_u_h((v16u8) tmp0_m, (v16u8) coeff); \ + tmp1_m = (v8u16) __msa_srari_h((v8i16) tmp1_m, shift); \ + tmp1_m = __msa_sat_u_h(tmp1_m, shift); \ + \ + tmp1_m; \ +} ) #endif /* AVUTIL_MIPS_GENERIC_MACROS_MSA_H */ _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog