Re: [FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions
On Fri, Apr 24, 2015 at 12:30:41PM +, Nedeljko Babic wrote: > LGTM applied thanks [..] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Its not that you shouldnt use gotos but rather that you should write readable code and code with gotos often but not always is less readable signature.asc Description: Digital signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions
LGTM Thanks, Nedeljko Od: ffmpeg-devel-boun...@ffmpeg.org [ffmpeg-devel-boun...@ffmpeg.org] u ime korisnika Shivraj Patil Poslato: 22. april 2015 11:04 Za: ffmpeg-devel@ffmpeg.org Cc: Shivraj Patil Tema: [FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions From: Shivraj Patil Incorporated review comment. Removed "__" from volatile. Signed-off-by: Shivraj Patil --- libavcodec/mips/hevcdsp_init_mips.c | 19 + libavcodec/mips/hevcdsp_mips.h | 20 + libavcodec/mips/hevcdsp_msa.c | 1098 +++ libavutil/mips/generic_macros_msa.h | 133 + 4 files changed, 1270 insertions(+) diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c index 05ed81f..4fec336 100644 --- a/libavcodec/mips/hevcdsp_init_mips.c +++ b/libavcodec/mips/hevcdsp_init_mips.c @@ -25,6 +25,16 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, const int bit_depth) { if (8 == bit_depth) { +c->put_hevc_qpel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa; +c->put_hevc_qpel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa; +c->put_hevc_qpel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa; +c->put_hevc_qpel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa; +c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa; +c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa; +c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa; +c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_msa; +c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_msa; + c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa; c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa; c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa; @@ -42,6 +52,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa; c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa; c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa; + +c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_msa; +c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_msa; +c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_msa; +c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_msa; +c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_msa; +c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa; +c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa; +c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa; } } #endif // #if HAVE_MSA diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h index 13cdb5b..4f7f273 100644 --- a/libavcodec/mips/hevcdsp_mips.h +++ b/libavcodec/mips/hevcdsp_mips.h @@ -29,6 +29,16 @@ void ff_hevc_put_hevc_##PEL##_##DIRWIDTH##_8_msa(int16_t *dst, \ intptr_t my, \ int width) +MC(pel, pixels, 4); +MC(pel, pixels, 6); +MC(pel, pixels, 8); +MC(pel, pixels, 12); +MC(pel, pixels, 16); +MC(pel, pixels, 24); +MC(pel, pixels, 32); +MC(pel, pixels, 48); +MC(pel, pixels, 64); + MC(qpel, h, 4); MC(qpel, h, 8); MC(qpel, h, 12); @@ -46,4 +56,14 @@ MC(qpel, v, 24); MC(qpel, v, 32); MC(qpel, v, 48); MC(qpel, v, 64); + +MC(qpel, hv, 4); +MC(qpel, hv, 8); +MC(qpel, hv, 12); +MC(qpel, hv, 16); +MC(qpel, hv, 24); +MC(qpel, hv, 32); +MC(qpel, hv, 48); +MC(qpel, hv, 64); + #undef MC diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c index 88e97d6..fcc344b 100644 --- a/libavcodec/mips/hevcdsp_msa.c +++ b/libavcodec/mips/hevcdsp_msa.c @@ -21,6 +21,18 @@ #include "libavutil/mips/generic_macros_msa.h" #include "libavcodec/mips/hevcdsp_mips.h" +#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\ + filt0, filt1, filt2, filt3)\ +( { \ +v4i32 out;\ + \ +out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\ +out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1)); \ +out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2)); \ +out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3)); \ +out; \ +} ) + #define HEVC_FILT_8TAP_DPADD_H(vec0, vec1, vec2, vec3, \
[FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions
From: Shivraj Patil Incorporated review comment. Removed "__" from volatile. Signed-off-by: Shivraj Patil --- libavcodec/mips/hevcdsp_init_mips.c | 19 + libavcodec/mips/hevcdsp_mips.h | 20 + libavcodec/mips/hevcdsp_msa.c | 1098 +++ libavutil/mips/generic_macros_msa.h | 133 + 4 files changed, 1270 insertions(+) diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c index 05ed81f..4fec336 100644 --- a/libavcodec/mips/hevcdsp_init_mips.c +++ b/libavcodec/mips/hevcdsp_init_mips.c @@ -25,6 +25,16 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, const int bit_depth) { if (8 == bit_depth) { +c->put_hevc_qpel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa; +c->put_hevc_qpel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa; +c->put_hevc_qpel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa; +c->put_hevc_qpel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa; +c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa; +c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa; +c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa; +c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_msa; +c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_msa; + c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa; c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa; c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa; @@ -42,6 +52,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa; c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa; c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa; + +c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_msa; +c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_msa; +c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_msa; +c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_msa; +c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_msa; +c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa; +c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa; +c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa; } } #endif // #if HAVE_MSA diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h index 13cdb5b..4f7f273 100644 --- a/libavcodec/mips/hevcdsp_mips.h +++ b/libavcodec/mips/hevcdsp_mips.h @@ -29,6 +29,16 @@ void ff_hevc_put_hevc_##PEL##_##DIRWIDTH##_8_msa(int16_t *dst, \ intptr_t my, \ int width) +MC(pel, pixels, 4); +MC(pel, pixels, 6); +MC(pel, pixels, 8); +MC(pel, pixels, 12); +MC(pel, pixels, 16); +MC(pel, pixels, 24); +MC(pel, pixels, 32); +MC(pel, pixels, 48); +MC(pel, pixels, 64); + MC(qpel, h, 4); MC(qpel, h, 8); MC(qpel, h, 12); @@ -46,4 +56,14 @@ MC(qpel, v, 24); MC(qpel, v, 32); MC(qpel, v, 48); MC(qpel, v, 64); + +MC(qpel, hv, 4); +MC(qpel, hv, 8); +MC(qpel, hv, 12); +MC(qpel, hv, 16); +MC(qpel, hv, 24); +MC(qpel, hv, 32); +MC(qpel, hv, 48); +MC(qpel, hv, 64); + #undef MC diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c index 88e97d6..fcc344b 100644 --- a/libavcodec/mips/hevcdsp_msa.c +++ b/libavcodec/mips/hevcdsp_msa.c @@ -21,6 +21,18 @@ #include "libavutil/mips/generic_macros_msa.h" #include "libavcodec/mips/hevcdsp_mips.h" +#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\ + filt0, filt1, filt2, filt3)\ +( { \ +v4i32 out;\ + \ +out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\ +out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1)); \ +out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2)); \ +out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3)); \ +out; \ +} ) + #define HEVC_FILT_8TAP_DPADD_H(vec0, vec1, vec2, vec3, \ filt0, filt1, filt2, filt3, \ var_in) \ @@ -34,6 +46,603 @@ out; \ } ) +static void hevc_copy_4w_msa(uint8_t * __restrict src, int32_t src_stride, + int16_t * __restrict dst, int32_t dst_stride, + int32_t height) +{ +v16i8 zero = {
[FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions
From: Shivraj Patil Signed-off-by: Shivraj Patil --- libavcodec/mips/hevcdsp_init_mips.c | 19 + libavcodec/mips/hevcdsp_mips.h | 20 + libavcodec/mips/hevcdsp_msa.c | 1098 +++ libavutil/mips/generic_macros_msa.h | 133 + 4 files changed, 1270 insertions(+) diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c index 05ed81f..4fec336 100644 --- a/libavcodec/mips/hevcdsp_init_mips.c +++ b/libavcodec/mips/hevcdsp_init_mips.c @@ -25,6 +25,16 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, const int bit_depth) { if (8 == bit_depth) { +c->put_hevc_qpel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa; +c->put_hevc_qpel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa; +c->put_hevc_qpel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa; +c->put_hevc_qpel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa; +c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa; +c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa; +c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa; +c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_msa; +c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_msa; + c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa; c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa; c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa; @@ -42,6 +52,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa; c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa; c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa; + +c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_msa; +c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_msa; +c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_msa; +c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_msa; +c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_msa; +c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa; +c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa; +c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa; } } #endif // #if HAVE_MSA diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h index 13cdb5b..4f7f273 100644 --- a/libavcodec/mips/hevcdsp_mips.h +++ b/libavcodec/mips/hevcdsp_mips.h @@ -29,6 +29,16 @@ void ff_hevc_put_hevc_##PEL##_##DIRWIDTH##_8_msa(int16_t *dst, \ intptr_t my, \ int width) +MC(pel, pixels, 4); +MC(pel, pixels, 6); +MC(pel, pixels, 8); +MC(pel, pixels, 12); +MC(pel, pixels, 16); +MC(pel, pixels, 24); +MC(pel, pixels, 32); +MC(pel, pixels, 48); +MC(pel, pixels, 64); + MC(qpel, h, 4); MC(qpel, h, 8); MC(qpel, h, 12); @@ -46,4 +56,14 @@ MC(qpel, v, 24); MC(qpel, v, 32); MC(qpel, v, 48); MC(qpel, v, 64); + +MC(qpel, hv, 4); +MC(qpel, hv, 8); +MC(qpel, hv, 12); +MC(qpel, hv, 16); +MC(qpel, hv, 24); +MC(qpel, hv, 32); +MC(qpel, hv, 48); +MC(qpel, hv, 64); + #undef MC diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c index 88e97d6..fcc344b 100644 --- a/libavcodec/mips/hevcdsp_msa.c +++ b/libavcodec/mips/hevcdsp_msa.c @@ -21,6 +21,18 @@ #include "libavutil/mips/generic_macros_msa.h" #include "libavcodec/mips/hevcdsp_mips.h" +#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\ + filt0, filt1, filt2, filt3)\ +( { \ +v4i32 out;\ + \ +out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\ +out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1)); \ +out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2)); \ +out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3)); \ +out; \ +} ) + #define HEVC_FILT_8TAP_DPADD_H(vec0, vec1, vec2, vec3, \ filt0, filt1, filt2, filt3, \ var_in) \ @@ -34,6 +46,603 @@ out; \ } ) +static void hevc_copy_4w_msa(uint8_t * __restrict src, int32_t src_stride, + int16_t * __restrict dst, int32_t dst_stride, + int32_t height) +{ +v16i8 zero = { 0 }; + +if (2 == height) { +uint64_t out0, out