Re: [FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions

2015-04-24 Thread Michael Niedermayer
On Fri, Apr 24, 2015 at 12:30:41PM +, Nedeljko Babic wrote:
> LGTM

applied

thanks

[..]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Its not that you shouldnt use gotos but rather that you should write
readable code and code with gotos often but not always is less readable


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions

2015-04-24 Thread Nedeljko Babic
LGTM

Thanks,
Nedeljko

Od: ffmpeg-devel-boun...@ffmpeg.org [ffmpeg-devel-boun...@ffmpeg.org] u ime 
korisnika Shivraj Patil
Poslato: 22. april 2015 11:04
Za: ffmpeg-devel@ffmpeg.org
Cc: Shivraj Patil
Tema: [FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations 
for HEVC copy and hv mc functions

From: Shivraj Patil 

Incorporated review comment.
Removed "__" from volatile.

Signed-off-by: Shivraj Patil 
---
 libavcodec/mips/hevcdsp_init_mips.c |   19 +
 libavcodec/mips/hevcdsp_mips.h  |   20 +
 libavcodec/mips/hevcdsp_msa.c   | 1098 +++
 libavutil/mips/generic_macros_msa.h |  133 +
 4 files changed, 1270 insertions(+)

diff --git a/libavcodec/mips/hevcdsp_init_mips.c 
b/libavcodec/mips/hevcdsp_init_mips.c
index 05ed81f..4fec336 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -25,6 +25,16 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
   const int bit_depth)
 {
 if (8 == bit_depth) {
+c->put_hevc_qpel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa;
+c->put_hevc_qpel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa;
+c->put_hevc_qpel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa;
+c->put_hevc_qpel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa;
+c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa;
+c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa;
+c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa;
+c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_msa;
+c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_msa;
+
 c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa;
 c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa;
 c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa;
@@ -42,6 +52,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa;
 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa;
 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa;
+
+c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_msa;
+c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_msa;
+c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_msa;
+c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_msa;
+c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_msa;
+c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa;
+c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa;
+c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa;
 }
 }
 #endif  // #if HAVE_MSA
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index 13cdb5b..4f7f273 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -29,6 +29,16 @@ void ff_hevc_put_hevc_##PEL##_##DIRWIDTH##_8_msa(int16_t 
*dst,  \
  intptr_t my,   \
  int width)

+MC(pel, pixels, 4);
+MC(pel, pixels, 6);
+MC(pel, pixels, 8);
+MC(pel, pixels, 12);
+MC(pel, pixels, 16);
+MC(pel, pixels, 24);
+MC(pel, pixels, 32);
+MC(pel, pixels, 48);
+MC(pel, pixels, 64);
+
 MC(qpel, h, 4);
 MC(qpel, h, 8);
 MC(qpel, h, 12);
@@ -46,4 +56,14 @@ MC(qpel, v, 24);
 MC(qpel, v, 32);
 MC(qpel, v, 48);
 MC(qpel, v, 64);
+
+MC(qpel, hv, 4);
+MC(qpel, hv, 8);
+MC(qpel, hv, 12);
+MC(qpel, hv, 16);
+MC(qpel, hv, 24);
+MC(qpel, hv, 32);
+MC(qpel, hv, 48);
+MC(qpel, hv, 64);
+
 #undef MC
diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index 88e97d6..fcc344b 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -21,6 +21,18 @@
 #include "libavutil/mips/generic_macros_msa.h"
 #include "libavcodec/mips/hevcdsp_mips.h"

+#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\
+   filt0, filt1, filt2, filt3)\
+( {   \
+v4i32 out;\
+  \
+out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\
+out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1));  \
+out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2));  \
+out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3));  \
+out;  \
+} )
+
 #define HEVC_FILT_8TAP_DPADD_H(vec0, vec1, vec2, vec3, 
\
  

[FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions

2015-04-22 Thread shivraj.patil
From: Shivraj Patil 

Incorporated review comment.
Removed "__" from volatile.

Signed-off-by: Shivraj Patil 
---
 libavcodec/mips/hevcdsp_init_mips.c |   19 +
 libavcodec/mips/hevcdsp_mips.h  |   20 +
 libavcodec/mips/hevcdsp_msa.c   | 1098 +++
 libavutil/mips/generic_macros_msa.h |  133 +
 4 files changed, 1270 insertions(+)

diff --git a/libavcodec/mips/hevcdsp_init_mips.c 
b/libavcodec/mips/hevcdsp_init_mips.c
index 05ed81f..4fec336 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -25,6 +25,16 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
   const int bit_depth)
 {
 if (8 == bit_depth) {
+c->put_hevc_qpel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa;
+c->put_hevc_qpel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa;
+c->put_hevc_qpel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa;
+c->put_hevc_qpel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa;
+c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa;
+c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa;
+c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa;
+c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_msa;
+c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_msa;
+
 c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa;
 c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa;
 c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa;
@@ -42,6 +52,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa;
 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa;
 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa;
+
+c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_msa;
+c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_msa;
+c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_msa;
+c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_msa;
+c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_msa;
+c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa;
+c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa;
+c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa;
 }
 }
 #endif  // #if HAVE_MSA
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index 13cdb5b..4f7f273 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -29,6 +29,16 @@ void ff_hevc_put_hevc_##PEL##_##DIRWIDTH##_8_msa(int16_t 
*dst,  \
  intptr_t my,   \
  int width)
 
+MC(pel, pixels, 4);
+MC(pel, pixels, 6);
+MC(pel, pixels, 8);
+MC(pel, pixels, 12);
+MC(pel, pixels, 16);
+MC(pel, pixels, 24);
+MC(pel, pixels, 32);
+MC(pel, pixels, 48);
+MC(pel, pixels, 64);
+
 MC(qpel, h, 4);
 MC(qpel, h, 8);
 MC(qpel, h, 12);
@@ -46,4 +56,14 @@ MC(qpel, v, 24);
 MC(qpel, v, 32);
 MC(qpel, v, 48);
 MC(qpel, v, 64);
+
+MC(qpel, hv, 4);
+MC(qpel, hv, 8);
+MC(qpel, hv, 12);
+MC(qpel, hv, 16);
+MC(qpel, hv, 24);
+MC(qpel, hv, 32);
+MC(qpel, hv, 48);
+MC(qpel, hv, 64);
+
 #undef MC
diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index 88e97d6..fcc344b 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -21,6 +21,18 @@
 #include "libavutil/mips/generic_macros_msa.h"
 #include "libavcodec/mips/hevcdsp_mips.h"
 
+#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\
+   filt0, filt1, filt2, filt3)\
+( {   \
+v4i32 out;\
+  \
+out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\
+out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1));  \
+out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2));  \
+out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3));  \
+out;  \
+} )
+
 #define HEVC_FILT_8TAP_DPADD_H(vec0, vec1, vec2, vec3, 
\
filt0, filt1, filt2, filt3, 
\
var_in) 
\
@@ -34,6 +46,603 @@
 out;   
\
 } )
 
+static void hevc_copy_4w_msa(uint8_t * __restrict src, int32_t src_stride,
+ int16_t * __restrict dst, int32_t dst_stride,
+ int32_t height)
+{
+v16i8 zero = { 

[FFmpeg-devel] [PATCH] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions

2015-04-19 Thread shivraj.patil
From: Shivraj Patil 

Signed-off-by: Shivraj Patil 
---
 libavcodec/mips/hevcdsp_init_mips.c |   19 +
 libavcodec/mips/hevcdsp_mips.h  |   20 +
 libavcodec/mips/hevcdsp_msa.c   | 1098 +++
 libavutil/mips/generic_macros_msa.h |  133 +
 4 files changed, 1270 insertions(+)

diff --git a/libavcodec/mips/hevcdsp_init_mips.c 
b/libavcodec/mips/hevcdsp_init_mips.c
index 05ed81f..4fec336 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -25,6 +25,16 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
   const int bit_depth)
 {
 if (8 == bit_depth) {
+c->put_hevc_qpel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa;
+c->put_hevc_qpel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa;
+c->put_hevc_qpel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa;
+c->put_hevc_qpel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa;
+c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa;
+c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa;
+c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa;
+c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_msa;
+c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_msa;
+
 c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa;
 c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa;
 c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa;
@@ -42,6 +52,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa;
 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa;
 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa;
+
+c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_msa;
+c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_msa;
+c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_msa;
+c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_msa;
+c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_msa;
+c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa;
+c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa;
+c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa;
 }
 }
 #endif  // #if HAVE_MSA
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index 13cdb5b..4f7f273 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -29,6 +29,16 @@ void ff_hevc_put_hevc_##PEL##_##DIRWIDTH##_8_msa(int16_t 
*dst,  \
  intptr_t my,   \
  int width)
 
+MC(pel, pixels, 4);
+MC(pel, pixels, 6);
+MC(pel, pixels, 8);
+MC(pel, pixels, 12);
+MC(pel, pixels, 16);
+MC(pel, pixels, 24);
+MC(pel, pixels, 32);
+MC(pel, pixels, 48);
+MC(pel, pixels, 64);
+
 MC(qpel, h, 4);
 MC(qpel, h, 8);
 MC(qpel, h, 12);
@@ -46,4 +56,14 @@ MC(qpel, v, 24);
 MC(qpel, v, 32);
 MC(qpel, v, 48);
 MC(qpel, v, 64);
+
+MC(qpel, hv, 4);
+MC(qpel, hv, 8);
+MC(qpel, hv, 12);
+MC(qpel, hv, 16);
+MC(qpel, hv, 24);
+MC(qpel, hv, 32);
+MC(qpel, hv, 48);
+MC(qpel, hv, 64);
+
 #undef MC
diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index 88e97d6..fcc344b 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -21,6 +21,18 @@
 #include "libavutil/mips/generic_macros_msa.h"
 #include "libavcodec/mips/hevcdsp_mips.h"
 
+#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\
+   filt0, filt1, filt2, filt3)\
+( {   \
+v4i32 out;\
+  \
+out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\
+out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1));  \
+out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2));  \
+out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3));  \
+out;  \
+} )
+
 #define HEVC_FILT_8TAP_DPADD_H(vec0, vec1, vec2, vec3, 
\
filt0, filt1, filt2, filt3, 
\
var_in) 
\
@@ -34,6 +46,603 @@
 out;   
\
 } )
 
+static void hevc_copy_4w_msa(uint8_t * __restrict src, int32_t src_stride,
+ int16_t * __restrict dst, int32_t dst_stride,
+ int32_t height)
+{
+v16i8 zero = { 0 };
+
+if (2 == height) {
+uint64_t out0, out