[FFmpeg-cvslog] Disable MSA optimization for big endian arch

2017-06-02 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Thu Jun  1 
14:25:19 2017 +0530| [6f35c21659f7802a5533dea04b24958502886d7a] | committer: 
Michael Niedermayer

Disable MSA optimization for big endian arch

The current upstreamed code has been written and tested for Little Endian 
systems.
We do have plans to add the Big Endian support in near future, but till that 
time, need to disable all to avoid its usage and failures.

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6f35c21659f7802a5533dea04b24958502886d7a
---

 configure | 4 
 1 file changed, 4 insertions(+)

diff --git a/configure b/configure
index 72060ef0e9..4ec8f21814 100755
--- a/configure
+++ b/configure
@@ -5392,6 +5392,10 @@ elif enabled mips; then
 enabled mipsdsp && check_inline_asm_flags mipsdsp '"addu.qb $t0, $t1, 
$t2"' '-mdsp'
 enabled mipsdspr2 && check_inline_asm_flags mipsdspr2 '"absq_s.qb $t0, 
$t1"' '-mdspr2'
 
+if enabled bigendian && enabled msa; then
+disable msa
+fi
+
 elif enabled parisc; then
 
 if enabled gcc; then

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] build fix for mips

2017-04-06 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Tue Apr  4 
19:14:01 2017 +0530| [2a512f86c12d2ed588733c454a12930efbad49f4] | committer: 
Ronald S. Bultje

build fix for mips

Signed-off-by: Shivraj Patil 
Signed-off-by: Ronald S. Bultje 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2a512f86c12d2ed588733c454a12930efbad49f4
---

 libavcodec/mips/hevcpred_init_mips.c | 3 ++-
 libavcodec/mips/hevcpred_msa.c   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mips/hevcpred_init_mips.c 
b/libavcodec/mips/hevcpred_init_mips.c
index 331cfac..e987698 100644
--- a/libavcodec/mips/hevcpred_init_mips.c
+++ b/libavcodec/mips/hevcpred_init_mips.c
@@ -18,7 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavcodec/hevc.h"
+#include "config.h"
+#include "libavutil/attributes.h"
 #include "libavcodec/mips/hevcpred_mips.h"
 
 #if HAVE_MSA
diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c
index 6a3b281..963c64c 100644
--- a/libavcodec/mips/hevcpred_msa.c
+++ b/libavcodec/mips/hevcpred_msa.c
@@ -18,7 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavcodec/hevc.h"
+#include "libavcodec/hevcdec.h"
 #include "libavutil/mips/generic_macros_msa.h"
 #include "hevcpred_mips.h"
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6

2016-11-26 Thread Shivraj Patil
ffmpeg | branch: release/2.8 | Shivraj Patil  | Wed 
Oct  5 18:10:24 2016 +0530| [b9ec80322b5b6dc22747e6bc9d463d2bcc683c6e] | 
committer: Michael Niedermayer

avutil/mips/generic_macros_msa: rename macro variable which causes segfault for 
mips r6

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 
(cherry picked from commit c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b9ec80322b5b6dc22747e6bc9d463d2bcc683c6e
---

 libavutil/mips/generic_macros_msa.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavutil/mips/generic_macros_msa.h 
b/libavutil/mips/generic_macros_msa.h
index b1d18dd..0a59619 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -85,12 +85,12 @@
 #else  // !(__mips == 64)
 #define LD(psrc)  \
 ( {   \
-uint8_t *psrc_m = (uint8_t *) (psrc); \
+uint8_t *psrc_ld_m = (uint8_t *) (psrc);  \
 uint32_t val0_m, val1_m;  \
 uint64_t val_m = 0;   \
   \
-val0_m = LW(psrc_m);  \
-val1_m = LW(psrc_m + 4);  \
+val0_m = LW(psrc_ld_m);   \
+val1_m = LW(psrc_ld_m + 4);   \
   \
 val_m = (uint64_t) (val1_m);  \
 val_m = (uint64_t) ((val_m << 32) & 0x);  \
@@ -172,12 +172,12 @@
 #else  // !(__mips == 64)
 #define LD(psrc)  \
 ( {   \
-uint8_t *psrc_m1 = (uint8_t *) (psrc);\
+uint8_t *psrc_ld_m = (uint8_t *) (psrc);  \
 uint32_t val0_m, val1_m;  \
 uint64_t val_m = 0;   \
   \
-val0_m = LW(psrc_m1); \
-val1_m = LW(psrc_m1 + 4); \
+val0_m = LW(psrc_ld_m);   \
+val1_m = LW(psrc_ld_m + 4);   \
   \
 val_m = (uint64_t) (val1_m);  \
 val_m = (uint64_t) ((val_m << 32) & 0x);  \

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6

2016-10-16 Thread Shivraj Patil
ffmpeg | branch: release/3.0 | Shivraj Patil  | Wed 
Oct  5 18:10:24 2016 +0530| [ef5b120e2dd6ef83b41f31317890ad947955d916] | 
committer: Michael Niedermayer

avutil/mips/generic_macros_msa: rename macro variable which causes segfault for 
mips r6

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 
(cherry picked from commit c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ef5b120e2dd6ef83b41f31317890ad947955d916
---

 libavutil/mips/generic_macros_msa.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavutil/mips/generic_macros_msa.h 
b/libavutil/mips/generic_macros_msa.h
index b1d18dd..0a59619 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -85,12 +85,12 @@
 #else  // !(__mips == 64)
 #define LD(psrc)  \
 ( {   \
-uint8_t *psrc_m = (uint8_t *) (psrc); \
+uint8_t *psrc_ld_m = (uint8_t *) (psrc);  \
 uint32_t val0_m, val1_m;  \
 uint64_t val_m = 0;   \
   \
-val0_m = LW(psrc_m);  \
-val1_m = LW(psrc_m + 4);  \
+val0_m = LW(psrc_ld_m);   \
+val1_m = LW(psrc_ld_m + 4);   \
   \
 val_m = (uint64_t) (val1_m);  \
 val_m = (uint64_t) ((val_m << 32) & 0x);  \
@@ -172,12 +172,12 @@
 #else  // !(__mips == 64)
 #define LD(psrc)  \
 ( {   \
-uint8_t *psrc_m1 = (uint8_t *) (psrc);\
+uint8_t *psrc_ld_m = (uint8_t *) (psrc);  \
 uint32_t val0_m, val1_m;  \
 uint64_t val_m = 0;   \
   \
-val0_m = LW(psrc_m1); \
-val1_m = LW(psrc_m1 + 4); \
+val0_m = LW(psrc_ld_m);   \
+val1_m = LW(psrc_ld_m + 4);   \
   \
 val_m = (uint64_t) (val1_m);  \
 val_m = (uint64_t) ((val_m << 32) & 0x);  \

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Support for MIPS cpu P6600

2016-10-16 Thread Shivraj Patil
ffmpeg | branch: release/3.0 | Shivraj Patil  | Fri 
Aug  5 14:00:44 2016 +0530| [c993a11e56abcc8a6199ebed894394acd37f3c0c] | 
committer: Michael Niedermayer

Support for MIPS cpu P6600

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 
(cherry picked from commit 6803a298f4338c19c3032d2417c6e857eb6d95be)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c993a11e56abcc8a6199ebed894394acd37f3c0c
---

 configure | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index c9c139e..1f36d37 100755
--- a/configure
+++ b/configure
@@ -4189,7 +4189,7 @@ elif enabled mips; then
 enable mips32r2
 disable msa
 ;;
-p5600|i6400)
+p5600|i6400|p6600)
 disable mipsdsp
 disable mipsdspr2
 ;;
@@ -4254,6 +4254,10 @@ elif enabled mips; then
 enable mips64r6
 check_cflags "-mtune=i6400 -mabi=64" && check_cflags 
"-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64"
 ;;
+p6600)
+enable mips64r6
+check_cflags "-mtune=p6600 -mabi=64" && check_cflags 
"-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64"
+;;
 esac
 else
 # We do not disable anything. Is up to the user to disable the 
unwanted features.

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6

2016-10-16 Thread Shivraj Patil
ffmpeg | branch: release/3.1 | Shivraj Patil  | Wed 
Oct  5 18:10:24 2016 +0530| [d89979e86b322210862987ebf1473fbd7cdc4c45] | 
committer: Michael Niedermayer

avutil/mips/generic_macros_msa: rename macro variable which causes segfault for 
mips r6

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 
(cherry picked from commit c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d89979e86b322210862987ebf1473fbd7cdc4c45
---

 libavutil/mips/generic_macros_msa.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavutil/mips/generic_macros_msa.h 
b/libavutil/mips/generic_macros_msa.h
index b1d18dd..0a59619 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -85,12 +85,12 @@
 #else  // !(__mips == 64)
 #define LD(psrc)  \
 ( {   \
-uint8_t *psrc_m = (uint8_t *) (psrc); \
+uint8_t *psrc_ld_m = (uint8_t *) (psrc);  \
 uint32_t val0_m, val1_m;  \
 uint64_t val_m = 0;   \
   \
-val0_m = LW(psrc_m);  \
-val1_m = LW(psrc_m + 4);  \
+val0_m = LW(psrc_ld_m);   \
+val1_m = LW(psrc_ld_m + 4);   \
   \
 val_m = (uint64_t) (val1_m);  \
 val_m = (uint64_t) ((val_m << 32) & 0x);  \
@@ -172,12 +172,12 @@
 #else  // !(__mips == 64)
 #define LD(psrc)  \
 ( {   \
-uint8_t *psrc_m1 = (uint8_t *) (psrc);\
+uint8_t *psrc_ld_m = (uint8_t *) (psrc);  \
 uint32_t val0_m, val1_m;  \
 uint64_t val_m = 0;   \
   \
-val0_m = LW(psrc_m1); \
-val1_m = LW(psrc_m1 + 4); \
+val0_m = LW(psrc_ld_m);   \
+val1_m = LW(psrc_ld_m + 4);   \
   \
 val_m = (uint64_t) (val1_m);  \
 val_m = (uint64_t) ((val_m << 32) & 0x);  \

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Support for MIPS cpu P6600

2016-10-16 Thread Shivraj Patil
ffmpeg | branch: release/3.1 | Shivraj Patil  | Fri 
Aug  5 14:00:44 2016 +0530| [d2566b124af849d28eaedcba60c3a7ac280070ab] | 
committer: Michael Niedermayer

Support for MIPS cpu P6600

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 
(cherry picked from commit 6803a298f4338c19c3032d2417c6e857eb6d95be)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d2566b124af849d28eaedcba60c3a7ac280070ab
---

 configure | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 5b069eb..64142e7 100755
--- a/configure
+++ b/configure
@@ -4336,7 +4336,7 @@ elif enabled mips; then
 enable mips32r2
 disable msa
 ;;
-p5600|i6400)
+p5600|i6400|p6600)
 disable mipsdsp
 disable mipsdspr2
 ;;
@@ -4401,6 +4401,10 @@ elif enabled mips; then
 enable mips64r6
 check_cflags "-mtune=i6400 -mabi=64" && check_cflags 
"-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64"
 ;;
+p6600)
+enable mips64r6
+check_cflags "-mtune=p6600 -mabi=64" && check_cflags 
"-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64"
+;;
 esac
 else
 # We do not disable anything. Is up to the user to disable the 
unwanted features.

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6

2016-10-05 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Wed Oct  5 
18:10:24 2016 +0530| [c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa] | committer: 
Michael Niedermayer

avutil/mips/generic_macros_msa: rename macro variable which causes segfault for 
mips r6

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa
---

 libavutil/mips/generic_macros_msa.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavutil/mips/generic_macros_msa.h 
b/libavutil/mips/generic_macros_msa.h
index b1d18dd..0a59619 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -85,12 +85,12 @@
 #else  // !(__mips == 64)
 #define LD(psrc)  \
 ( {   \
-uint8_t *psrc_m = (uint8_t *) (psrc); \
+uint8_t *psrc_ld_m = (uint8_t *) (psrc);  \
 uint32_t val0_m, val1_m;  \
 uint64_t val_m = 0;   \
   \
-val0_m = LW(psrc_m);  \
-val1_m = LW(psrc_m + 4);  \
+val0_m = LW(psrc_ld_m);   \
+val1_m = LW(psrc_ld_m + 4);   \
   \
 val_m = (uint64_t) (val1_m);  \
 val_m = (uint64_t) ((val_m << 32) & 0x);  \
@@ -172,12 +172,12 @@
 #else  // !(__mips == 64)
 #define LD(psrc)  \
 ( {   \
-uint8_t *psrc_m1 = (uint8_t *) (psrc);\
+uint8_t *psrc_ld_m = (uint8_t *) (psrc);  \
 uint32_t val0_m, val1_m;  \
 uint64_t val_m = 0;   \
   \
-val0_m = LW(psrc_m1); \
-val1_m = LW(psrc_m1 + 4); \
+val0_m = LW(psrc_ld_m);   \
+val1_m = LW(psrc_ld_m + 4);   \
   \
 val_m = (uint64_t) (val1_m);  \
 val_m = (uint64_t) ((val_m << 32) & 0x);  \

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] Support for MIPS cpu P6600

2016-08-05 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Fri Aug  5 
14:00:44 2016 +0530| [6803a298f4338c19c3032d2417c6e857eb6d95be] | committer: 
Michael Niedermayer

Support for MIPS cpu P6600

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6803a298f4338c19c3032d2417c6e857eb6d95be
---

 configure | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 9f5b31f..8e30c68 100755
--- a/configure
+++ b/configure
@@ -4346,7 +4346,7 @@ elif enabled mips; then
 enable mips32r2
 disable msa
 ;;
-p5600|i6400)
+p5600|i6400|p6600)
 disable mipsdsp
 disable mipsdspr2
 ;;
@@ -4411,6 +4411,10 @@ elif enabled mips; then
 enable mips64r6
 check_cflags "-mtune=i6400 -mabi=64" && check_cflags 
"-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64"
 ;;
+p6600)
+enable mips64r6
+check_cflags "-mtune=p6600 -mabi=64" && check_cflags 
"-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64"
+;;
 esac
 else
 # We do not disable anything. Is up to the user to disable the 
unwanted features.

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] configure: build fix for P5600 with mips code restructuring

2016-04-26 Thread Shivraj Patil
ffmpeg | branch: release/3.0 | Shivraj Patil  | Tue 
Apr 26 12:35:15 2016 +0530| [83eaaae0057fc471a621a2c1bf1e95e4ab27484f] | 
committer: Michael Niedermayer

configure: build fix for P5600 with mips code restructuring

Note:- backporting commit 15ef98afd10b3696d29fb6d19606ba03a9dd47ad from head

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=83eaaae0057fc471a621a2c1bf1e95e4ab27484f
---

 configure |  252 +
 1 file changed, 120 insertions(+), 132 deletions(-)

diff --git a/configure b/configure
index 475c087..9103e85 100755
--- a/configure
+++ b/configure
@@ -913,6 +913,25 @@ void foo(void){ __asm__ volatile($code); }
 EOF
 }
 
+check_inline_asm_flags(){
+log check_inline_asm_flags "$@"
+name="$1"
+code="$2"
+flags=''
+shift 2
+while [ "$1" != "" ]; do
+  append flags $1
+  shift
+done;
+disable $name
+cat > $TMPC <http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] configure: build fix for P5600 along option --disable-msa

2016-03-11 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Fri Mar 11 
14:04:42 2016 +0530| [15ef98afd10b3696d29fb6d19606ba03a9dd47ad] | committer: 
Michael Niedermayer

configure: build fix for P5600 along option --disable-msa

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=15ef98afd10b3696d29fb6d19606ba03a9dd47ad
---

 configure |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 1516b01..1b189328 100755
--- a/configure
+++ b/configure
@@ -5118,7 +5118,8 @@ elif enabled mips; then
 fi
 
 enabled mipsfpu && check_inline_asm_flags mipsfpu '"cvt.d.l $f0, $f2"' 
'-mhard-float'
-enabled mipsfpu && enabled msa && check_inline_asm_flags msa '"addvi.b 
$w0, $w1, 1"' '-mfp64 -mmsa' && check_header msa.h || disable msa
+enabled mipsfpu && (enabled mips32r5 || enabled mips32r6 || enabled 
mips64r6) && check_inline_asm_flags mipsfpu '"cvt.d.l $f0, $f1"' '-mfp64'
+enabled mipsfpu && enabled msa && check_inline_asm_flags msa '"addvi.b 
$w0, $w1, 1"' '-mmsa' && check_header msa.h || disable msa
 enabled mipsdsp && check_inline_asm_flags mipsdsp '"addu.qb $t0, $t1, 
$t2"' '-mdsp'
 enabled mipsdspr2 && check_inline_asm_flags mipsdspr2 '"absq_s.qb $t0, 
$t1"' '-mdspr2'
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] configure: add check_inline_asm_flags()

2016-03-08 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Mon Mar  7 
20:07:10 2016 +0530| [b59d06d5f4155c27d6c3aa14be8379723aaf0a08] | committer: 
Michael Niedermayer

configure: add check_inline_asm_flags()

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b59d06d5f4155c27d6c3aa14be8379723aaf0a08
---

 configure |   19 +++
 1 file changed, 19 insertions(+)

diff --git a/configure b/configure
index 3299b1b..c895b50 100755
--- a/configure
+++ b/configure
@@ -918,6 +918,25 @@ void foo(void){ __asm__ volatile($code); }
 EOF
 }
 
+check_inline_asm_flags(){
+log check_inline_asm_flags "$@"
+name="$1"
+code="$2"
+flags=''
+shift 2
+while [ "$1" != "" ]; do
+  append flags $1
+  shift
+done;
+disable $name
+cat > $TMPC <http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] configure: build fix for P5600 with mips code restructuring

2016-03-08 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Mon Mar  7 
20:07:10 2016 +0530| [8ca2c872b650182958f733db8b1d99c061dd3bf9] | committer: 
Michael Niedermayer

configure: build fix for P5600 with mips code restructuring

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8ca2c872b650182958f733db8b1d99c061dd3bf9
---

 configure |  293 +
 1 file changed, 98 insertions(+), 195 deletions(-)

diff --git a/configure b/configure
index c895b50..81ec105 100755
--- a/configure
+++ b/configure
@@ -1685,6 +1685,7 @@ ARCH_EXT_LIST_ARM="
 ARCH_EXT_LIST_MIPS="
 mipsfpu
 mips32r2
+mips32r5
 mips64r2
 mips32r6
 mips64r6
@@ -2183,10 +2184,11 @@ mipsfpu_deps="mips"
 mipsdsp_deps="mips"
 mipsdspr2_deps="mips"
 mips32r2_deps="mips"
+mips32r5_deps="mips"
 mips32r6_deps="mips"
 mips64r2_deps="mips"
 mips64r6_deps="mips"
-msa_deps="mips"
+msa_deps="mipsfpu"
 mmi_deps="mips"
 
 altivec_deps="ppc"
@@ -4208,118 +4210,90 @@ elif enabled mips; then
 
 cpuflags="-march=$cpu"
 
-case $cpu in
-24kc)
-disable mips32r6
-disable mips64r2
-disable mips64r6
-disable mipsfpu
-disable mipsdsp
-disable mipsdspr2
-disable msa
-;;
-24kf*)
-disable mips32r6
-disable mips64r2
-disable mips64r6
-disable mipsdsp
-disable mipsdspr2
-disable msa
-;;
-24kec|34kc|1004kc)
-disable mips32r6
-disable mips64r2
-disable mips64r6
-disable mipsfpu
-disable mipsdspr2
-disable msa
-;;
-24kef*|34kf*|1004kf*)
-disable mips32r6
-disable mips64r2
-disable mips64r6
-disable mipsdspr2
-disable msa
-;;
-74kc)
-disable mips32r6
-disable mips64r2
-disable mips64r6
-disable mipsfpu
-disable msa
-;;
-74kf)
-disable mips32r6
-disable mips64r2
-disable mips64r6
-disable msa
-;;
-p5600)
-disable mips32r6
-disable mips64r2
-disable mips64r6
-disable mipsdsp
-disable mipsdspr2
-check_cflags "-mtune=p5600" &&
-check_cflags "-mfp64 -msched-weight -mload-store-pairs 
-funroll-loops" &&
-add_asflags "-mfp64"
-;;
-i6400)
-disable mips32r2
-disable mips32r6
-disable mips64r2
-disable mipsdsp
-disable mipsdspr2
-check_cflags "-mtune=i6400 -mabi=64" &&
-check_cflags "-mfp64 -msched-weight -mload-store-pairs 
-funroll-loops" &&
-check_ldflags "-mabi=64" &&
-add_asflags "-mfp64"
-;;
-loongson*)
-disable mips32r2
-disable mips32r6
-disable mips64r2
-disable mips64r6
-disable mipsfpu
-disable mipsdsp
-disable mipsdspr2
-disable msa
-enable local_aligned_8 local_aligned_16 local_aligned_32
-enable simd_align_16
-enable fast_64bit
-enable fast_clz
-enable fast_cmov
-enable fast_unaligned
-disable aligned_stack
-case $cpu in
-loongson3*)
-cpuflags="-march=loongson3a -mhard-float 
-fno-expensive-optimizations"
-;;
-loongson2e)
-cpuflags="-march=loongson2e -mhard-float 
-fno-expensive-optimizations"
-;;
-loongson2f)
-cpuflags="-march=loongson2f -mhard-float 
-fno-expensive-optimizations"
-;;
-esac
-;;
-generic)
-# We do not disable anything. Is up to the user to disable
-# the unwanted features.
-;;
-*)
-# Unknown CPU. Disable everything.
-warn "unknown CPU. Disabling all MIPS optimizations."
-disable mipsfpu
-disable mips32r2
-disable mips32r6
-disable mips64r2
-disable mips64r6
-disable mipsdsp
-disable mipsdspr2
-disable msa
-;;
-esac
+if [ "$cpu" != "generic" ]; then
+disable mips32r2
+disable mips32r5
+disable mips64r2
+disable mips32r6
+

[FFmpeg-cvslog] avcodec/mips: build fix for MSA 64bit

2015-10-08 Thread Shivraj Patil
ffmpeg | branch: release/2.8 | Shivraj Patil  | Thu 
Oct  8 15:05:52 2015 +0530| [a931ad554d0d7a337f3ac3340622f189556885fc] | 
committer: Michael Niedermayer

avcodec/mips: build fix for MSA 64bit

Modified datatype of function argument (pitch from int32_t to ptrdiff_t).

Signed-off-by: Shivraj Patil 
Commit in master: 322e960dbf32b846b26f95afa6c0e652bc04e90d
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a931ad554d0d7a337f3ac3340622f189556885fc
---

 libavcodec/mips/vp9_lpf_msa.c |   42 -
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/libavcodec/mips/vp9_lpf_msa.c b/libavcodec/mips/vp9_lpf_msa.c
index 63e538e..eef8afc 100644
--- a/libavcodec/mips/vp9_lpf_msa.c
+++ b/libavcodec/mips/vp9_lpf_msa.c
@@ -259,7 +259,7 @@
 mask_out = __msa_xori_b(mask_out, 0xff);   \
 }
 
-void ff_loop_filter_v_4_8_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_4_8_msa(uint8_t *src, ptrdiff_t pitch,
   int32_t b_limit_ptr,
   int32_t limit_ptr,
   int32_t thresh_ptr)
@@ -288,7 +288,7 @@ void ff_loop_filter_v_4_8_msa(uint8_t *src, int32_t pitch,
 }
 
 
-void ff_loop_filter_v_44_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_44_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -318,7 +318,7 @@ void ff_loop_filter_v_44_16_msa(uint8_t *src, int32_t pitch,
 ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
 }
 
-void ff_loop_filter_v_8_8_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_8_8_msa(uint8_t *src, ptrdiff_t pitch,
   int32_t b_limit_ptr,
   int32_t limit_ptr,
   int32_t thresh_ptr)
@@ -392,7 +392,7 @@ void ff_loop_filter_v_8_8_msa(uint8_t *src, int32_t pitch,
 }
 }
 
-void ff_loop_filter_v_88_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_88_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -471,7 +471,7 @@ void ff_loop_filter_v_88_16_msa(uint8_t *src, int32_t pitch,
 }
 }
 
-void ff_loop_filter_v_84_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_84_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -542,7 +542,7 @@ void ff_loop_filter_v_84_16_msa(uint8_t *src, int32_t pitch,
 }
 }
 
-void ff_loop_filter_v_48_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_48_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -614,7 +614,7 @@ void ff_loop_filter_v_48_16_msa(uint8_t *src, int32_t pitch,
 }
 }
 
-static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
+static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, ptrdiff_t pitch,
 uint8_t *filter48,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
@@ -689,7 +689,7 @@ static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, 
int32_t pitch,
 }
 }
 
-static void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48)
+static void vp9_hz_lpf_t16_16w(uint8_t *src, ptrdiff_t pitch, uint8_t 
*filter48)
 {
 v16u8 flat, flat2, filter8;
 v16i8 zero = { 0 };
@@ -1021,7 +1021,7 @@ static void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t 
pitch, uint8_t *filter48)
 }
 }
 
-void ff_loop_filter_v_16_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_16_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -1037,7 +1037,7 @@ void ff_loop_filter_v_16_16_msa(uint8_t *src, int32_t 
pitch,
 }
 }
 
-void ff_loop_filter_v_16_8_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_16_8_msa(uint8_t *src, ptrdiff_t pitch,
int32_t b_limit_ptr,
int32_t limit_ptr,
int32_t thresh_ptr)
@@ -1261,7 +1261,7 @@ void ff_loop_filter_v_16_8_msa(uint8_t *src, int32_t 
pitch,
 }
 }
 
-void ff_loop_filter_h_4_8_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_h_4_8_msa(uint8_t *src, ptrdiff_t pitch,
   int32_t b_limit_ptr,
   int32_t limit_ptr,
   int32_t thresh_

[FFmpeg-cvslog] avcodec/mips: build fix for MSA

2015-10-08 Thread Shivraj Patil
ffmpeg | branch: release/2.8 | Shivraj Patil  | Thu 
Oct  8 14:21:22 2015 +0530| [7236080d2721dff24f9716a6cce91bbacb32581f] | 
committer: Michael Niedermayer

avcodec/mips: build fix for MSA

Modified sps and pps access from old HEVCContext(s) structure to newly 
introduced HEVCParamSets(ps).

Signed-off-by: Shivraj Patil 
Commit in master: b0732b0214a40cdbcaf49d72cc6f25a7e9e5f115
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7236080d2721dff24f9716a6cce91bbacb32581f
---

 libavcodec/mips/hevcpred_msa.c |  282 
 1 file changed, 141 insertions(+), 141 deletions(-)

diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c
index 5d9299f..6a3b281 100644
--- a/libavcodec/mips/hevcpred_msa.c
+++ b/libavcodec/mips/hevcpred_msa.c
@@ -1915,24 +1915,24 @@ void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, 
int y0, int c_idx)
 v16u8 vec0;
 HEVCLocalContext *lc = s->HEVClc;
 int i;
-int hshift = s->sps->hshift[c_idx];
-int vshift = s->sps->vshift[c_idx];
+int hshift = s->ps.sps->hshift[c_idx];
+int vshift = s->ps.sps->vshift[c_idx];
 int size_in_luma_h = 16 << hshift;
-int size_in_tbs_h = size_in_luma_h >> s->sps->log2_min_tb_size;
+int size_in_tbs_h = size_in_luma_h >> s->ps.sps->log2_min_tb_size;
 int size_in_luma_v = 16 << vshift;
-int size_in_tbs_v = size_in_luma_v >> s->sps->log2_min_tb_size;
+int size_in_tbs_v = size_in_luma_v >> s->ps.sps->log2_min_tb_size;
 int x = x0 >> hshift;
 int y = y0 >> vshift;
-int x_tb = (x0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
-int y_tb = (y0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
+int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
+int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
 
 int cur_tb_addr =
-s->pps->min_tb_addr_zs[(y_tb) * (s->sps->tb_mask + 2) + (x_tb)];
+s->ps.pps->min_tb_addr_zs[(y_tb) * (s->ps.sps->tb_mask + 2) + (x_tb)];
 
 ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(uint8_t);
 uint8_t *src = (uint8_t *) s->frame->data[c_idx] + x + y * stride;
 
-int min_pu_width = s->sps->min_pu_width;
+int min_pu_width = s->ps.sps->min_pu_width;
 
 enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
 lc->tu.intra_pred_mode;
@@ -1948,41 +1948,41 @@ void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, 
int y0, int c_idx)
 uint8_t *filtered_top = filtered_top_array + 1;
 int cand_bottom_left = lc->na.cand_bottom_left
 && cur_tb_addr >
-s->pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & s->sps->tb_mask) *
-   (s->sps->tb_mask + 2) + (x_tb - 1)];
+s->ps.pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & 
s->ps.sps->tb_mask) *
+   (s->ps.sps->tb_mask + 2) + (x_tb - 1)];
 int cand_left = lc->na.cand_left;
 int cand_up_left = lc->na.cand_up_left;
 int cand_up = lc->na.cand_up;
 int cand_up_right = lc->na.cand_up_right
 && cur_tb_addr >
-s->pps->min_tb_addr_zs[(y_tb - 1) * (s->sps->tb_mask + 2) +
-   ((x_tb + size_in_tbs_h) & s->sps->tb_mask)];
+s->ps.pps->min_tb_addr_zs[(y_tb - 1) * (s->ps.sps->tb_mask + 2) +
+   ((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask)];
 
 int bottom_left_size =
 (((y0 + 2 * size_in_luma_v) >
-  (s->sps->height) ? (s->sps->height) : (y0 +
+  (s->ps.sps->height) ? (s->ps.sps->height) : (y0 +
  2 * size_in_luma_v)) -
  (y0 + size_in_luma_v)) >> vshift;
 int top_right_size =
 (((x0 + 2 * size_in_luma_h) >
-  (s->sps->width) ? (s->sps->width) : (x0 + 2 * size_in_luma_h)) -
+  (s->ps.sps->width) ? (s->ps.sps->width) : (x0 + 2 * size_in_luma_h)) 
-
  (x0 + size_in_luma_h)) >> hshift;
 
-if (s->pps->constrained_intra_pred_flag == 1) {
-int size_in_luma_pu_v = ((size_in_luma_v) >> s->sps->log2_min_pu_size);
-int size_in_luma_pu_h = ((size_in_luma_h) >> s->sps->log2_min_pu_size);
-int on_pu_edge_x = !(x0 & ((1 << s->sps->log2_min_pu_size) - 1));
-int on_pu_edge_y = !(y0 & ((1 << s->sps->log2_min_pu_size) - 1));
+if (s->ps.pps->constrained_intra_pred_flag == 1) {
+int size_in_luma_pu_v = ((size_in_luma_v) >> 
s-&

[FFmpeg-cvslog] avcodec/mips: build fix for MSA 64bit

2015-10-07 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Wed Oct  7 
18:39:59 2015 +0530| [322e960dbf32b846b26f95afa6c0e652bc04e90d] | committer: 
Michael Niedermayer

avcodec/mips: build fix for MSA 64bit

Modified datatype of function argument (pitch from int32_t to ptrdiff_t)

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=322e960dbf32b846b26f95afa6c0e652bc04e90d
---

 libavcodec/mips/vp9_lpf_msa.c |   42 -
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/libavcodec/mips/vp9_lpf_msa.c b/libavcodec/mips/vp9_lpf_msa.c
index 63e538e..eef8afc 100644
--- a/libavcodec/mips/vp9_lpf_msa.c
+++ b/libavcodec/mips/vp9_lpf_msa.c
@@ -259,7 +259,7 @@
 mask_out = __msa_xori_b(mask_out, 0xff);   \
 }
 
-void ff_loop_filter_v_4_8_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_4_8_msa(uint8_t *src, ptrdiff_t pitch,
   int32_t b_limit_ptr,
   int32_t limit_ptr,
   int32_t thresh_ptr)
@@ -288,7 +288,7 @@ void ff_loop_filter_v_4_8_msa(uint8_t *src, int32_t pitch,
 }
 
 
-void ff_loop_filter_v_44_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_44_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -318,7 +318,7 @@ void ff_loop_filter_v_44_16_msa(uint8_t *src, int32_t pitch,
 ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
 }
 
-void ff_loop_filter_v_8_8_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_8_8_msa(uint8_t *src, ptrdiff_t pitch,
   int32_t b_limit_ptr,
   int32_t limit_ptr,
   int32_t thresh_ptr)
@@ -392,7 +392,7 @@ void ff_loop_filter_v_8_8_msa(uint8_t *src, int32_t pitch,
 }
 }
 
-void ff_loop_filter_v_88_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_88_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -471,7 +471,7 @@ void ff_loop_filter_v_88_16_msa(uint8_t *src, int32_t pitch,
 }
 }
 
-void ff_loop_filter_v_84_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_84_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -542,7 +542,7 @@ void ff_loop_filter_v_84_16_msa(uint8_t *src, int32_t pitch,
 }
 }
 
-void ff_loop_filter_v_48_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_48_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -614,7 +614,7 @@ void ff_loop_filter_v_48_16_msa(uint8_t *src, int32_t pitch,
 }
 }
 
-static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
+static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, ptrdiff_t pitch,
 uint8_t *filter48,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
@@ -689,7 +689,7 @@ static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, 
int32_t pitch,
 }
 }
 
-static void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48)
+static void vp9_hz_lpf_t16_16w(uint8_t *src, ptrdiff_t pitch, uint8_t 
*filter48)
 {
 v16u8 flat, flat2, filter8;
 v16i8 zero = { 0 };
@@ -1021,7 +1021,7 @@ static void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t 
pitch, uint8_t *filter48)
 }
 }
 
-void ff_loop_filter_v_16_16_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_16_16_msa(uint8_t *src, ptrdiff_t pitch,
 int32_t b_limit_ptr,
 int32_t limit_ptr,
 int32_t thresh_ptr)
@@ -1037,7 +1037,7 @@ void ff_loop_filter_v_16_16_msa(uint8_t *src, int32_t 
pitch,
 }
 }
 
-void ff_loop_filter_v_16_8_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_v_16_8_msa(uint8_t *src, ptrdiff_t pitch,
int32_t b_limit_ptr,
int32_t limit_ptr,
int32_t thresh_ptr)
@@ -1261,7 +1261,7 @@ void ff_loop_filter_v_16_8_msa(uint8_t *src, int32_t 
pitch,
 }
 }
 
-void ff_loop_filter_h_4_8_msa(uint8_t *src, int32_t pitch,
+void ff_loop_filter_h_4_8_msa(uint8_t *src, ptrdiff_t pitch,
   int32_t b_limit_ptr,
   int32_t limit_ptr,
   int32_t thresh_ptr)
@@ -1290,7 +1290,7 @@ void ff_loop_filter_h_4_8_msa(uint8_t *

[FFmpeg-cvslog] avcodec/mips: build fix for MSA

2015-10-07 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Wed Oct  7 
18:38:53 2015 +0530| [b0732b0214a40cdbcaf49d72cc6f25a7e9e5f115] | committer: 
Michael Niedermayer

avcodec/mips: build fix for MSA

Modified sps and pps access from old HEVCContext(s) structure to newly 
introduced HEVCParamSets(ps)

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b0732b0214a40cdbcaf49d72cc6f25a7e9e5f115
---

 libavcodec/mips/hevcpred_msa.c |  282 
 1 file changed, 141 insertions(+), 141 deletions(-)

diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c
index 5d9299f..6a3b281 100644
--- a/libavcodec/mips/hevcpred_msa.c
+++ b/libavcodec/mips/hevcpred_msa.c
@@ -1915,24 +1915,24 @@ void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, 
int y0, int c_idx)
 v16u8 vec0;
 HEVCLocalContext *lc = s->HEVClc;
 int i;
-int hshift = s->sps->hshift[c_idx];
-int vshift = s->sps->vshift[c_idx];
+int hshift = s->ps.sps->hshift[c_idx];
+int vshift = s->ps.sps->vshift[c_idx];
 int size_in_luma_h = 16 << hshift;
-int size_in_tbs_h = size_in_luma_h >> s->sps->log2_min_tb_size;
+int size_in_tbs_h = size_in_luma_h >> s->ps.sps->log2_min_tb_size;
 int size_in_luma_v = 16 << vshift;
-int size_in_tbs_v = size_in_luma_v >> s->sps->log2_min_tb_size;
+int size_in_tbs_v = size_in_luma_v >> s->ps.sps->log2_min_tb_size;
 int x = x0 >> hshift;
 int y = y0 >> vshift;
-int x_tb = (x0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
-int y_tb = (y0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
+int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
+int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
 
 int cur_tb_addr =
-s->pps->min_tb_addr_zs[(y_tb) * (s->sps->tb_mask + 2) + (x_tb)];
+s->ps.pps->min_tb_addr_zs[(y_tb) * (s->ps.sps->tb_mask + 2) + (x_tb)];
 
 ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(uint8_t);
 uint8_t *src = (uint8_t *) s->frame->data[c_idx] + x + y * stride;
 
-int min_pu_width = s->sps->min_pu_width;
+int min_pu_width = s->ps.sps->min_pu_width;
 
 enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
 lc->tu.intra_pred_mode;
@@ -1948,41 +1948,41 @@ void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, 
int y0, int c_idx)
 uint8_t *filtered_top = filtered_top_array + 1;
 int cand_bottom_left = lc->na.cand_bottom_left
 && cur_tb_addr >
-s->pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & s->sps->tb_mask) *
-   (s->sps->tb_mask + 2) + (x_tb - 1)];
+s->ps.pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & 
s->ps.sps->tb_mask) *
+   (s->ps.sps->tb_mask + 2) + (x_tb - 1)];
 int cand_left = lc->na.cand_left;
 int cand_up_left = lc->na.cand_up_left;
 int cand_up = lc->na.cand_up;
 int cand_up_right = lc->na.cand_up_right
 && cur_tb_addr >
-s->pps->min_tb_addr_zs[(y_tb - 1) * (s->sps->tb_mask + 2) +
-   ((x_tb + size_in_tbs_h) & s->sps->tb_mask)];
+s->ps.pps->min_tb_addr_zs[(y_tb - 1) * (s->ps.sps->tb_mask + 2) +
+   ((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask)];
 
 int bottom_left_size =
 (((y0 + 2 * size_in_luma_v) >
-  (s->sps->height) ? (s->sps->height) : (y0 +
+  (s->ps.sps->height) ? (s->ps.sps->height) : (y0 +
  2 * size_in_luma_v)) -
  (y0 + size_in_luma_v)) >> vshift;
 int top_right_size =
 (((x0 + 2 * size_in_luma_h) >
-  (s->sps->width) ? (s->sps->width) : (x0 + 2 * size_in_luma_h)) -
+  (s->ps.sps->width) ? (s->ps.sps->width) : (x0 + 2 * size_in_luma_h)) 
-
  (x0 + size_in_luma_h)) >> hshift;
 
-if (s->pps->constrained_intra_pred_flag == 1) {
-int size_in_luma_pu_v = ((size_in_luma_v) >> s->sps->log2_min_pu_size);
-int size_in_luma_pu_h = ((size_in_luma_h) >> s->sps->log2_min_pu_size);
-int on_pu_edge_x = !(x0 & ((1 << s->sps->log2_min_pu_size) - 1));
-int on_pu_edge_y = !(y0 & ((1 << s->sps->log2_min_pu_size) - 1));
+if (s->ps.pps->constrained_intra_pred_flag == 1) {
+int size_in_luma_pu_v = ((size_in_luma_v) >> 
s->ps.sps->log2_min_pu_size);
+int size_in_luma_pu_h = ((size_

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 bilinear functions

2015-07-27 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Mon Jul 27 
17:47:34 2015 +0530| [71aede3ced76a5adb4d8cd44a70dfe2487db882a] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 bilinear functions

Signed-off-by: Shivraj Patil 
Reviewed-by: "Ronald S. Bultje" 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=71aede3ced76a5adb4d8cd44a70dfe2487db882a
---

 libavcodec/mips/vp9_mc_msa.c   | 2123 
 libavcodec/mips/vp9dsp_init_mips.c |2 +
 libavcodec/mips/vp9dsp_mips.h  |   32 +
 3 files changed, 2157 insertions(+)

diff --git a/libavcodec/mips/vp9_mc_msa.c b/libavcodec/mips/vp9_mc_msa.c
index a1d7798..1671d97 100644
--- a/libavcodec/mips/vp9_mc_msa.c
+++ b/libavcodec/mips/vp9_mc_msa.c
@@ -31,6 +31,24 @@ static const uint8_t mc_filt_mask_arr[16 * 3] = {
 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
 };
 
+static const int8_t vp9_bilinear_filters_msa[15][2] = {
+{120, 8},
+{112, 16},
+{104, 24},
+{96, 32},
+{88, 40},
+{80, 48},
+{72, 56},
+{64, 64},
+{56, 72},
+{48, 80},
+{40, 88},
+{32, 96},
+{24, 104},
+{16, 112},
+{8, 120}
+};
+
 #define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, \
 filt0, filt1, filt2, filt3) \
 ( { \
@@ -1827,6 +1845,2111 @@ static void 
common_hv_8ht_8vt_and_aver_dst_64w_msa(const uint8_t *src,
 }
 }
 
+static void common_hz_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
+ uint8_t *dst, int32_t dst_stride,
+ const int8_t *filter)
+{
+v16i8 src0, src1, src2, src3, mask;
+v16u8 filt0, vec0, vec1, res0, res1;
+v8u16 vec2, vec3, filt;
+
+mask = LD_SB(&mc_filt_mask_arr[16]);
+
+/* rearranging filter */
+filt = LD_UH(filter);
+filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+LD_SB4(src, src_stride, src0, src1, src2, src3);
+VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
+SRARI_H2_UH(vec2, vec3, 7);
+PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
+ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
+ uint8_t *dst, int32_t dst_stride,
+ const int8_t *filter)
+{
+v16u8 vec0, vec1, vec2, vec3, filt0;
+v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+v16i8 res0, res1, res2, res3;
+v8u16 vec4, vec5, vec6, vec7, filt;
+
+mask = LD_SB(&mc_filt_mask_arr[16]);
+
+/* rearranging filter */
+filt = LD_UH(filter);
+filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
+DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+vec4, vec5, vec6, vec7);
+SRARI_H4_UH(vec4, vec5, vec6, vec7, 7);
+PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
+res0, res1, res2, res3);
+ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+dst += (4 * dst_stride);
+ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+void ff_put_bilin_4h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+ const uint8_t *src, ptrdiff_t src_stride,
+ int height, int mx, int my)
+{
+const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+
+if (4 == height) {
+common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+} else if (8 == height) {
+common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+}
+}
+
+static void common_hz_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
+ uint8_t *dst, int32_t dst_stride,
+ const int8_t *filter)
+{
+v16u8 filt0;
+v16i8 src0, src1, src2, src3, mask;
+v8u16 vec0, vec1, vec2, vec3, filt;
+
+mask = LD_SB(&mc_filt_mask_arr[0]);
+
+/* rearranging filter */
+filt = LD_UH(filter);
+filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+LD_SB4(src, src_stride, src0, src1, src2, src3);
+VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+vec0, vec1, vec2, vec3);
+SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
+ST8x4_UB(src0, src1, dst, dst_stride);
+}
+
+static void common_hz_2t_8x8mult_msa(const uint8_t

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 intra functions

2015-07-24 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Thu Jul 23 
18:43:07 2015 +0530| [e21b090bfb7b6b723ff1c28cc5bb16e7498addb2] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 intra functions

Signed-off-by: Shivraj Patil 
Reviewed-by: "Ronald S. Bultje" 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e21b090bfb7b6b723ff1c28cc5bb16e7498addb2
---

 libavcodec/mips/Makefile   |3 +-
 libavcodec/mips/vp9_intra_msa.c|  533 
 libavcodec/mips/vp9dsp_init_mips.c |   31 +++
 libavcodec/mips/vp9dsp_mips.h  |   53 
 4 files changed, 619 insertions(+), 1 deletion(-)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 7cbad54..f543448 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -45,7 +45,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_msa.o\
  mips/hevcpred_msa.o
 MSA-OBJS-$(CONFIG_VP9_DECODER)+= mips/vp9_mc_msa.o \
  mips/vp9_lpf_msa.o\
- mips/vp9_idct_msa.o
+ mips/vp9_idct_msa.o   \
+ mips/vp9_intra_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o\
  mips/h264idct_msa.o
 MSA-OBJS-$(CONFIG_H264QPEL)   += mips/h264qpel_msa.o
diff --git a/libavcodec/mips/vp9_intra_msa.c b/libavcodec/mips/vp9_intra_msa.c
new file mode 100644
index 000..54cf0ae
--- /dev/null
+++ b/libavcodec/mips/vp9_intra_msa.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/vp9dsp.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "vp9dsp_mips.h"
+
+#define IPRED_SUBS_UH2_UH(in0, in1, out0, out1)  \
+{\
+out0 = __msa_subs_u_h(out0, in0);\
+out1 = __msa_subs_u_h(out1, in1);\
+}
+
+void ff_vert_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left,
+   const uint8_t *src)
+{
+uint32_t row;
+v16u8 src0;
+
+src0 = LD_UB(src);
+
+for (row = 16; row--;) {
+ST_UB(src0, dst);
+dst += dst_stride;
+}
+}
+
+void ff_vert_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left,
+   const uint8_t *src)
+{
+uint32_t row;
+v16u8 src1, src2;
+
+src1 = LD_UB(src);
+src2 = LD_UB(src + 16);
+
+for (row = 32; row--;) {
+ST_UB2(src1, src2, dst, 16);
+dst += dst_stride;
+}
+}
+
+void ff_hor_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src,
+  const uint8_t *top)
+{
+uint32_t row, inp;
+v16u8 src0, src1, src2, src3;
+
+src += 12;
+for (row = 4; row--;) {
+inp = LW(src);
+src -= 4;
+
+src0 = (v16u8) __msa_fill_b(inp >> 24);
+src1 = (v16u8) __msa_fill_b(inp >> 16);
+src2 = (v16u8) __msa_fill_b(inp >> 8);
+src3 = (v16u8) __msa_fill_b(inp);
+
+ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+dst += (4 * dst_stride);
+}
+}
+
+void ff_hor_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src,
+  const uint8_t *top)
+{
+uint32_t row, inp;
+v16u8 src0, src1, src2, src3;
+
+src += 28;
+for (row = 8; row--;) {
+inp = LW(src);
+src -= 4;
+
+src0 = (v16u8) __msa_fill_b(inp >> 24);
+src1 = (v16u8) __msa_fill_b(inp >> 16);
+src2 = (v16u8) __msa_fill_b(inp >> 8);
+src3 = (v16u8) __msa_fill_b(inp);
+
+ST_UB2(src0, src0, dst, 16);
+dst += dst_stride;
+ST_UB2(src1, src1, dst, 16);
+dst += dst_stride;
+ST_UB2(src2, src2, dst, 16);
+dst += dst_stride;
+ST_UB2(src3, src3, dst, 16);
+dst += dst_stride;
+}
+}
+

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 idct functions

2015-07-23 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Wed Jul 22 
17:30:23 2015 +0530| [c03800d5921e5359a78c2d2af781d059bc53bfda] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 idct functions

Signed-off-by: Shivraj Patil 
Reviewed-by: "Ronald S. Bultje" 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c03800d5921e5359a78c2d2af781d059bc53bfda
---

 libavcodec/mips/Makefile   |3 +-
 libavcodec/mips/vp9_idct_msa.c | 2138 
 libavcodec/mips/vp9dsp_init_mips.c |   24 +
 libavcodec/mips/vp9dsp_mips.h  |   28 +
 4 files changed, 2192 insertions(+), 1 deletion(-)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index a946897..7cbad54 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -44,7 +44,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_msa.o\
  mips/hevc_lpf_sao_msa.o   \
  mips/hevcpred_msa.o
 MSA-OBJS-$(CONFIG_VP9_DECODER)+= mips/vp9_mc_msa.o \
- mips/vp9_lpf_msa.o
+ mips/vp9_lpf_msa.o\
+ mips/vp9_idct_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o\
  mips/h264idct_msa.o
 MSA-OBJS-$(CONFIG_H264QPEL)   += mips/h264qpel_msa.o
diff --git a/libavcodec/mips/vp9_idct_msa.c b/libavcodec/mips/vp9_idct_msa.c
new file mode 100644
index 000..aeb2387
--- /dev/null
+++ b/libavcodec/mips/vp9_idct_msa.c
@@ -0,0 +1,2138 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+#include "libavcodec/vp9dsp.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "vp9dsp_mips.h"
+
+#define VP9_DCT_CONST_BITS   14
+#define ROUND_POWER_OF_TWO(value, n)  (((value) + (1 << ((n) - 1))) >> (n))
+
+static const int32_t cospi_1_64 = 16364;
+static const int32_t cospi_2_64 = 16305;
+static const int32_t cospi_3_64 = 16207;
+static const int32_t cospi_4_64 = 16069;
+static const int32_t cospi_5_64 = 15893;
+static const int32_t cospi_6_64 = 15679;
+static const int32_t cospi_7_64 = 15426;
+static const int32_t cospi_8_64 = 15137;
+static const int32_t cospi_9_64 = 14811;
+static const int32_t cospi_10_64 = 14449;
+static const int32_t cospi_11_64 = 14053;
+static const int32_t cospi_12_64 = 13623;
+static const int32_t cospi_13_64 = 13160;
+static const int32_t cospi_14_64 = 12665;
+static const int32_t cospi_15_64 = 12140;
+static const int32_t cospi_16_64 = 11585;
+static const int32_t cospi_17_64 = 11003;
+static const int32_t cospi_18_64 = 10394;
+static const int32_t cospi_19_64 = 9760;
+static const int32_t cospi_20_64 = 9102;
+static const int32_t cospi_21_64 = 8423;
+static const int32_t cospi_22_64 = 7723;
+static const int32_t cospi_23_64 = 7005;
+static const int32_t cospi_24_64 = 6270;
+static const int32_t cospi_25_64 = 5520;
+static const int32_t cospi_26_64 = 4756;
+static const int32_t cospi_27_64 = 3981;
+static const int32_t cospi_28_64 = 3196;
+static const int32_t cospi_29_64 = 2404;
+static const int32_t cospi_30_64 = 1606;
+static const int32_t cospi_31_64 = 804;
+
+//  16384 * sqrt(2) * sin(kPi/9) * 2 / 3
+static const int32_t sinpi_1_9 = 5283;
+static const int32_t sinpi_2_9 = 9929;
+static const int32_t sinpi_3_9 = 13377;
+static const int32_t sinpi_4_9 = 15212;
+
+#define VP9_DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1)  \
+{  \
+v8i16 k0_m = __msa_fill_h(cnst0);  \
+v4i32 s0_m, s1_m, s2_m, s3_m;  \
+   \
+s0_m = (v4i32) __msa_fill_h(cnst1);\
+k0_m = __msa_ilvev_h((v8i16) s0_m, k0_m);  \
+

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions

2015-07-07 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Mon Jun 29 
21:15:15 2015 +0530| [d12f76ffbb1b68d3c8a2859b7a095080ba985fa2] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions in new 
file idctdsp_msa.c and simple_idct_msa.c

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d12f76ffbb1b68d3c8a2859b7a095080ba985fa2
---

 libavcodec/idctdsp.c|2 +
 libavcodec/idctdsp.h|2 +
 libavcodec/mips/Makefile|3 +
 libavcodec/mips/idctdsp_init_mips.c |   49 +++
 libavcodec/mips/idctdsp_mips.h  |   42 +++
 libavcodec/mips/idctdsp_msa.c   |  149 +
 libavcodec/mips/simple_idct_msa.c   |  573 +++
 libavutil/mips/generic_macros_msa.h |   37 +++
 8 files changed, 857 insertions(+)

diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
index ae804d9..63e9b52 100644
--- a/libavcodec/idctdsp.c
+++ b/libavcodec/idctdsp.c
@@ -305,6 +305,8 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, 
AVCodecContext *avctx)
 ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
 if (ARCH_X86)
 ff_idctdsp_init_x86(c, avctx, high_bit_depth);
+if (ARCH_MIPS)
+ff_idctdsp_init_mips(c, avctx, high_bit_depth);
 
 ff_put_pixels_clamped = c->put_pixels_clamped;
 ff_add_pixels_clamped = c->add_pixels_clamped;
diff --git a/libavcodec/idctdsp.h b/libavcodec/idctdsp.h
index 538b716..b180a67 100644
--- a/libavcodec/idctdsp.h
+++ b/libavcodec/idctdsp.h
@@ -108,5 +108,7 @@ void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext 
*avctx,
  unsigned high_bit_depth);
 void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
  unsigned high_bit_depth);
+void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
+  unsigned high_bit_depth);
 
 #endif /* AVCODEC_IDCTDSP_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 695ee36..5569a03 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -29,6 +29,7 @@ OBJS-$(CONFIG_QPELDSP)+= 
mips/qpeldsp_init_mips.o
 OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o
 OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_init_mips.o
 OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o
+OBJS-$(CONFIG_IDCTDSP)+= mips/idctdsp_init_mips.o
 OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_init_mips.o
 OBJS-$(CONFIG_MPEGVIDEOENC)   += mips/mpegvideoencdsp_init_mips.o
 OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o
@@ -50,6 +51,8 @@ MSA-OBJS-$(CONFIG_QPELDSP)+= 
mips/qpeldsp_msa.o
 MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o
 MSA-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_msa.o
 MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o
+MSA-OBJS-$(CONFIG_IDCTDSP)+= mips/idctdsp_msa.o   \
+ mips/simple_idct_msa.o
 MSA-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_msa.o
 MSA-OBJS-$(CONFIG_MPEGVIDEOENC)   += mips/mpegvideoencdsp_msa.o
 MSA-OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_msa.o
diff --git a/libavcodec/mips/idctdsp_init_mips.c 
b/libavcodec/mips/idctdsp_init_mips.c
new file mode 100644
index 000..c964340
--- /dev/null
+++ b/libavcodec/mips/idctdsp_init_mips.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "idctdsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void idctdsp_init_msa(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
+(avctx->bits_per_raw_sample != 10) &&
+(avctx->bits_per_raw_sample != 12) &&
+(avctx->idct_a

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions

2015-07-06 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Mon Jun 29 
21:15:14 2015 +0530| [709bb45c660ae7c2d065bcade931e068620f9b92] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions in new 
file me_cmp_msa.c

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=709bb45c660ae7c2d065bcade931e068620f9b92
---

 libavcodec/me_cmp.c |2 +
 libavcodec/me_cmp.h |1 +
 libavcodec/mips/Makefile|2 +
 libavcodec/mips/me_cmp_init_mips.c  |   56 +++
 libavcodec/mips/me_cmp_mips.h   |   60 +++
 libavcodec/mips/me_cmp_msa.c|  686 +++
 libavutil/mips/generic_macros_msa.h |   59 +++
 7 files changed, 866 insertions(+)

diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c
index d4213d2..dc76b07 100644
--- a/libavcodec/me_cmp.c
+++ b/libavcodec/me_cmp.c
@@ -991,4 +991,6 @@ av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext 
*avctx)
 ff_me_cmp_init_ppc(c, avctx);
 if (ARCH_X86)
 ff_me_cmp_init_x86(c, avctx);
+if (ARCH_MIPS)
+ff_me_cmp_init_mips(c, avctx);
 }
diff --git a/libavcodec/me_cmp.h b/libavcodec/me_cmp.h
index 98ee53c..a3603ec 100644
--- a/libavcodec/me_cmp.h
+++ b/libavcodec/me_cmp.h
@@ -87,6 +87,7 @@ void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext 
*avctx);
 void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx);
 void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx);
 void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx);
 
 void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type);
 
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 59c1f79..2993891 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -31,6 +31,7 @@ OBJS-$(CONFIG_BLOCKDSP)   += 
mips/blockdsp_init_mips.o
 OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o
 OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_init_mips.o
 OBJS-$(CONFIG_MPEGVIDEOENC)   += mips/mpegvideoencdsp_init_mips.o
+OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -51,5 +52,6 @@ MSA-OBJS-$(CONFIG_BLOCKDSP)   += 
mips/blockdsp_msa.o
 MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o
 MSA-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_msa.o
 MSA-OBJS-$(CONFIG_MPEGVIDEOENC)   += mips/mpegvideoencdsp_msa.o
+MSA-OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/me_cmp_init_mips.c 
b/libavcodec/mips/me_cmp_init_mips.c
new file mode 100644
index 000..219a0dc
--- /dev/null
+++ b/libavcodec/mips/me_cmp_init_mips.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "me_cmp_mips.h"
+
+#if HAVE_MSA
+static av_cold void me_cmp_msa(MECmpContext *c, AVCodecContext *avctx)
+{
+#if BIT_DEPTH == 8
+c->pix_abs[0][0] = ff_pix_abs16_msa;
+c->pix_abs[0][1] = ff_pix_abs16_x2_msa;
+c->pix_abs[0][2] = ff_pix_abs16_y2_msa;
+c->pix_abs[0][3] = ff_pix_abs16_xy2_msa;
+c->pix_abs[1][0] = ff_pix_abs8_msa;
+c->pix_abs[1][1] = ff_pix_abs8_x2_msa;
+c->pix_abs[1][2] = ff_pix_abs8_y2_msa;
+c->pix_abs[1][3] = ff_pix_abs8_xy2_msa;
+
+c->hadamard8_diff[0] = ff_hadamard8_diff16_msa;
+c->hadamard8_diff[1] = ff_hadamard8_diff8x8_msa;
+
+c->hadamard8_diff[4] = ff_hadamard8_intra16_msa;
+c->hadamard8_diff[5] = ff_hadamard8_intra8x8_msa;
+
+c->sad[0] = ff_pix_abs16_msa;
+c->sad[1] = ff_pix

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for mpegvideoencdsp functions

2015-07-06 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Mon Jun 29 
21:15:13 2015 +0530| [2f3f98af2b3215b7f3ab302275a0b3b4acaf84a5] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for mpegvideoencdsp functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for mpegvideoencdsp 
functions in new file mpegvideoencdsp_msa.c

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2f3f98af2b3215b7f3ab302275a0b3b4acaf84a5
---

 libavcodec/mips/Makefile|2 +
 libavcodec/mips/mpegvideoencdsp_init_mips.c |   40 +
 libavcodec/mips/mpegvideoencdsp_msa.c   |   62 +++
 libavcodec/mpegvideoencdsp.c|2 +
 libavcodec/mpegvideoencdsp.h|2 +
 libavutil/mips/generic_macros_msa.h |   34 +++
 6 files changed, 142 insertions(+)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 277ac2a..59c1f79 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -30,6 +30,7 @@ OBJS-$(CONFIG_HPELDSP)+= 
mips/hpeldsp_init_mips.o
 OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_init_mips.o
 OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o
 OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_init_mips.o
+OBJS-$(CONFIG_MPEGVIDEOENC)   += mips/mpegvideoencdsp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -49,5 +50,6 @@ MSA-OBJS-$(CONFIG_HPELDSP)+= 
mips/hpeldsp_msa.o
 MSA-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_msa.o
 MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o
 MSA-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_msa.o
+MSA-OBJS-$(CONFIG_MPEGVIDEOENC)   += mips/mpegvideoencdsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/mpegvideoencdsp_init_mips.c 
b/libavcodec/mips/mpegvideoencdsp_init_mips.c
new file mode 100644
index 000..9bfe94e
--- /dev/null
+++ b/libavcodec/mips/mpegvideoencdsp_init_mips.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/bit_depth_template.c"
+#include "h263dsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void mpegvideoencdsp_init_msa(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx)
+{
+#if BIT_DEPTH == 8
+c->pix_sum = ff_pix_sum_msa;
+#endif
+}
+#endif  // #if HAVE_MSA
+
+av_cold void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c,
+  AVCodecContext *avctx)
+{
+#if HAVE_MSA
+mpegvideoencdsp_init_msa(c, avctx);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/mpegvideoencdsp_msa.c 
b/libavcodec/mips/mpegvideoencdsp_msa.c
new file mode 100644
index 000..46473da
--- /dev/null
+++ b/libavcodec/mips/mpegvideoencdsp_msa.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h263dsp_mips.h"
+#include 

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for mpegvideo functions

2015-07-01 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Mon Jun 29 
21:15:12 2015 +0530| [2eb28e889d9c16914e547cc128db521b5d6c5390] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for mpegvideo functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for mpegvideo functions in 
new file mpegvideo_msa.c

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2eb28e889d9c16914e547cc128db521b5d6c5390
---

 libavcodec/mips/Makefile  |2 +
 libavcodec/mips/mpegvideo_init_mips.c |   37 +
 libavcodec/mips/mpegvideo_msa.c   |  250 +
 libavcodec/mpegvideo.c|2 +
 libavcodec/mpegvideo.h|1 +
 libavutil/mips/generic_macros_msa.h   |   94 +
 6 files changed, 386 insertions(+)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index c0ecb15..277ac2a 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -29,6 +29,7 @@ OBJS-$(CONFIG_QPELDSP)+= 
mips/qpeldsp_init_mips.o
 OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o
 OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_init_mips.o
 OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o
+OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -47,5 +48,6 @@ MSA-OBJS-$(CONFIG_QPELDSP)+= 
mips/qpeldsp_msa.o
 MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o
 MSA-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_msa.o
 MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o
+MSA-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/mpegvideo_init_mips.c 
b/libavcodec/mips/mpegvideo_init_mips.c
new file mode 100644
index 000..ee14b31
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_init_mips.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h263dsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void dct_unquantize_init_msa(MpegEncContext *s)
+{
+s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_msa;
+s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_msa;
+s->dct_unquantize_mpeg2_inter = ff_dct_unquantize_mpeg2_inter_msa;
+}
+#endif  // #if HAVE_MSA
+
+av_cold void ff_mpv_common_init_mips(MpegEncContext *s)
+{
+#if HAVE_MSA
+dct_unquantize_init_msa(s);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/mpegvideo_msa.c b/libavcodec/mips/mpegvideo_msa.c
new file mode 100644
index 000..aa9ef77
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_msa.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "h263dsp_mips.h"
+
+static void h263_dct_unquantize_msa(int16_t *block, int16_t qmul,
+   

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for pixblock functions

2015-06-29 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Sun Jun 14 
23:44:26 2015 +0530| [d9deae04a78b6b698b90d050a67a3bd9155aba74] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for pixblock functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for pixblock functions in 
new file pixblockdsp_msa.c
Adds new generic macros (needed for this patch) in 
libavutil/mips/generic_macros_msa.h

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d9deae04a78b6b698b90d050a67a3bd9155aba74
---

 libavcodec/mips/Makefile|2 +
 libavcodec/mips/pixblockdsp_init_mips.c |   53 
 libavcodec/mips/pixblockdsp_mips.h  |   33 +++
 libavcodec/mips/pixblockdsp_msa.c   |  143 +++
 libavcodec/pixblockdsp.c|2 +
 libavcodec/pixblockdsp.h|2 +
 libavutil/mips/generic_macros_msa.h |8 ++
 7 files changed, 243 insertions(+)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 823a2c5..c0ecb15 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -28,6 +28,7 @@ OBJS-$(CONFIG_H263DSP)+= 
mips/h263dsp_init_mips.o
 OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_init_mips.o
 OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o
 OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_init_mips.o
+OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -45,5 +46,6 @@ MSA-OBJS-$(CONFIG_H263DSP)+= 
mips/h263dsp_msa.o
 MSA-OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_msa.o
 MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o
 MSA-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_msa.o
+MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/pixblockdsp_init_mips.c 
b/libavcodec/mips/pixblockdsp_init_mips.c
new file mode 100644
index 000..0f2fb15
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_init_mips.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "pixblockdsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void pixblockdsp_init_msa(PixblockDSPContext *c,
+ AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+c->diff_pixels = ff_diff_pixels_msa;
+
+switch (avctx->bits_per_raw_sample) {
+case 9:
+case 10:
+case 12:
+case 14:
+c->get_pixels = ff_get_pixels_16_msa;
+break;
+default:
+if (avctx->bits_per_raw_sample <= 8 || avctx->codec_type !=
+AVMEDIA_TYPE_VIDEO) {
+c->get_pixels = ff_get_pixels_8_msa;
+}
+break;
+}
+}
+#endif  // #if HAVE_MSA
+
+void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
+  unsigned high_bit_depth)
+{
+#if HAVE_MSA
+pixblockdsp_init_msa(c, avctx, high_bit_depth);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/pixblockdsp_mips.h 
b/libavcodec/mips/pixblockdsp_mips.h
new file mode 100644
index 000..3eee6e0
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_mips.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for block functions

2015-06-22 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Sun Jun 14 
23:44:25 2015 +0530| [f6276842f38d0511a2d2ab4bb7f5b47b195c8de1] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for block functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for block functions in new 
file blockdsp_msa.c

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f6276842f38d0511a2d2ab4bb7f5b47b195c8de1
---

 libavcodec/blockdsp.c|2 +
 libavcodec/blockdsp.h|1 +
 libavcodec/mips/Makefile |2 +
 libavcodec/mips/blockdsp_init_mips.c |   40 
 libavcodec/mips/blockdsp_mips.h  |   31 
 libavcodec/mips/blockdsp_msa.c   |   86 ++
 6 files changed, 162 insertions(+)

diff --git a/libavcodec/blockdsp.c b/libavcodec/blockdsp.c
index f5259f6..8480f0b 100644
--- a/libavcodec/blockdsp.c
+++ b/libavcodec/blockdsp.c
@@ -77,4 +77,6 @@ av_cold void ff_blockdsp_init(BlockDSPContext *c, 
AVCodecContext *avctx)
 #else
 ff_blockdsp_init_x86(c, high_bit_depth);
 #endif /* FF_API_XVMC */
+if (ARCH_MIPS)
+ff_blockdsp_init_mips(c, high_bit_depth);
 }
diff --git a/libavcodec/blockdsp.h b/libavcodec/blockdsp.h
index c7ad265..32ea107 100644
--- a/libavcodec/blockdsp.h
+++ b/libavcodec/blockdsp.h
@@ -49,5 +49,6 @@ void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned 
high_bit_depth,
 #else
 void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth);
 #endif /* FF_API_XVMC */
+void ff_blockdsp_init_mips(BlockDSPContext *c, unsigned high_bit_depth);
 
 #endif /* AVCODEC_BLOCKDSP_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index b2c8a7b..823a2c5 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -27,6 +27,7 @@ OBJS-$(CONFIG_H264PRED)   += 
mips/h264pred_init_mips.o
 OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_init_mips.o
 OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_init_mips.o
 OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o
+OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -43,5 +44,6 @@ MSA-OBJS-$(CONFIG_H264PRED)   += 
mips/h264pred_msa.o
 MSA-OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_msa.o
 MSA-OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_msa.o
 MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o
+MSA-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/blockdsp_init_mips.c 
b/libavcodec/mips/blockdsp_init_mips.c
new file mode 100644
index 000..99ae316
--- /dev/null
+++ b/libavcodec/mips/blockdsp_init_mips.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "blockdsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void blockdsp_init_msa(BlockDSPContext *c,
+  unsigned high_bit_depth)
+{
+c->clear_block = ff_clear_block_msa;
+c->clear_blocks = ff_clear_blocks_msa;
+
+c->fill_block_tab[0] = ff_fill_block16_msa;
+c->fill_block_tab[1] = ff_fill_block8_msa;
+}
+#endif  // #if HAVE_MSA
+
+void ff_blockdsp_init_mips(BlockDSPContext *c, unsigned high_bit_depth)
+{
+#if HAVE_MSA
+blockdsp_init_msa(c, high_bit_depth);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/blockdsp_mips.h b/libavcodec/mips/blockdsp_mips.h
new file mode 100644
index 000..0b6bb67
--- /dev/null
+++ b/libavcodec/mips/blockdsp_mips.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the 

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for hpel functions

2015-06-19 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Sun Jun 14 
23:44:24 2015 +0530| [ee3ef5fda2f11cb5bf555d4f49698eb5dcde6ee1] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for hpel functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for hpel functions in new 
file hpeldsp_msa.c
Adds new generic macros (needed for this patch) in 
libavutil/mips/generic_macros_msa.h

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ee3ef5fda2f11cb5bf555d4f49698eb5dcde6ee1
---

 libavcodec/hpeldsp.c|2 +
 libavcodec/hpeldsp.h|1 +
 libavcodec/mips/Makefile|2 +
 libavcodec/mips/hpeldsp_init_mips.c |   73 ++
 libavcodec/mips/hpeldsp_mips.h  |   87 ++
 libavcodec/mips/hpeldsp_msa.c   | 1498 +++
 libavutil/mips/generic_macros_msa.h |  162 
 7 files changed, 1825 insertions(+)

diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c
index 7763760..8e2fd8f 100644
--- a/libavcodec/hpeldsp.c
+++ b/libavcodec/hpeldsp.c
@@ -365,4 +365,6 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
 ff_hpeldsp_init_ppc(c, flags);
 if (ARCH_X86)
 ff_hpeldsp_init_x86(c, flags);
+if (ARCH_MIPS)
+ff_hpeldsp_init_mips(c, flags);
 }
diff --git a/libavcodec/hpeldsp.h b/libavcodec/hpeldsp.h
index 07c293a..1a3cea5 100644
--- a/libavcodec/hpeldsp.h
+++ b/libavcodec/hpeldsp.h
@@ -99,5 +99,6 @@ void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags);
 
 #endif /* AVCODEC_HPELDSP_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 7742eea..b2c8a7b 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -26,6 +26,7 @@ OBJS-$(CONFIG_H264CHROMA) += 
mips/h264chroma_init_mips.o
 OBJS-$(CONFIG_H264PRED)   += mips/h264pred_init_mips.o
 OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_init_mips.o
 OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_init_mips.o
+OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -41,5 +42,6 @@ MSA-OBJS-$(CONFIG_H264CHROMA) += 
mips/h264chroma_msa.o
 MSA-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_msa.o
 MSA-OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_msa.o
 MSA-OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_msa.o
+MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/hpeldsp_init_mips.c 
b/libavcodec/mips/hpeldsp_init_mips.c
new file mode 100644
index 000..82f2310
--- /dev/null
+++ b/libavcodec/mips/hpeldsp_init_mips.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../hpeldsp.h"
+#include "libavcodec/mips/hpeldsp_mips.h"
+
+#if HAVE_MSA
+static void ff_hpeldsp_init_msa(HpelDSPContext *c, int flags)
+{
+c->put_pixels_tab[0][0] = ff_put_pixels16_msa;
+c->put_pixels_tab[0][1] = ff_put_pixels16_x2_msa;
+c->put_pixels_tab[0][2] = ff_put_pixels16_y2_msa;
+c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_msa;
+
+c->put_pixels_tab[1][0] = ff_put_pixels8_msa;
+c->put_pixels_tab[1][1] = ff_put_pixels8_x2_msa;
+c->put_pixels_tab[1][2] = ff_put_pixels8_y2_msa;
+c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_msa;
+
+c->put_pixels_tab[2][1] = ff_put_pixels4_x2_msa;
+c->put_pixels_tab[2][2] = ff_put_pixels4_y2_msa;
+c->put_pixels_tab[2][3] = ff_put_pixels4_xy2_msa;
+
+c->put_no_rnd_pixels_tab[0][0

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for H263 lpf functions

2015-06-17 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Sun Jun 14 
23:44:22 2015 +0530| [63eaf529bcfa2f685f5978d4ba4d327ac837c2e2] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for H263 lpf functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for H263 lpf functions in 
new file h263dsp_msa.c

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=63eaf529bcfa2f685f5978d4ba4d327ac837c2e2
---

 libavcodec/h263dsp.c|2 +
 libavcodec/h263dsp.h|1 +
 libavcodec/mips/Makefile|2 +
 libavcodec/mips/h263dsp_init_mips.c |   36 
 libavcodec/mips/h263dsp_mips.h  |   36 
 libavcodec/mips/h263dsp_msa.c   |  164 +++
 6 files changed, 241 insertions(+)

diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
index a70ff24..b3c0bcd 100644
--- a/libavcodec/h263dsp.c
+++ b/libavcodec/h263dsp.c
@@ -121,4 +121,6 @@ av_cold void ff_h263dsp_init(H263DSPContext *ctx)
 
 if (ARCH_X86)
 ff_h263dsp_init_x86(ctx);
+if (ARCH_MIPS)
+ff_h263dsp_init_mips(ctx);
 }
diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
index d2cc2ff..1abea3c 100644
--- a/libavcodec/h263dsp.h
+++ b/libavcodec/h263dsp.h
@@ -30,5 +30,6 @@ typedef struct H263DSPContext {
 
 void ff_h263dsp_init(H263DSPContext *ctx);
 void ff_h263dsp_init_x86(H263DSPContext *ctx);
+void ff_h263dsp_init_mips(H263DSPContext *ctx);
 
 #endif /* AVCODEC_H263DSP_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index d80d4e6..63c7298 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -24,6 +24,7 @@ OBJS-$(CONFIG_H264DSP)+= 
mips/h264dsp_init_mips.o
 OBJS-$(CONFIG_H264QPEL)   += mips/h264qpel_init_mips.o
 OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o
 OBJS-$(CONFIG_H264PRED)   += mips/h264pred_init_mips.o
+OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -37,5 +38,6 @@ MSA-OBJS-$(CONFIG_H264DSP)+= 
mips/h264dsp_msa.o\
 MSA-OBJS-$(CONFIG_H264QPEL)   += mips/h264qpel_msa.o
 MSA-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_msa.o
 MSA-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_msa.o
+MSA-OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/h263dsp_init_mips.c 
b/libavcodec/mips/h263dsp_init_mips.c
new file mode 100644
index 000..09bd937
--- /dev/null
+++ b/libavcodec/mips/h263dsp_init_mips.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h263dsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void h263dsp_init_msa(H263DSPContext *c)
+{
+c->h263_h_loop_filter = ff_h263_h_loop_filter_msa;
+c->h263_v_loop_filter = ff_h263_v_loop_filter_msa;
+}
+#endif  // #if HAVE_MSA
+
+av_cold void ff_h263dsp_init_mips(H263DSPContext *c)
+{
+#if HAVE_MSA
+h263dsp_init_msa(c);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/h263dsp_mips.h b/libavcodec/mips/h263dsp_mips.h
new file mode 100644
index 000..99a43cd
--- /dev/null
+++ b/libavcodec/mips/h263dsp_mips.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILI

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC idct functions

2015-06-11 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Wed Jun 10 
20:06:40 2015 +0530| [fb92f3ecb4d48a5612ee61aa39bd538cac9d08cf] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC idct functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for AVC idct functions in 
new file h264idct_msa.c
Adds new generic macros (needed for this patch) in 
libavutil/mips/generic_macros_msa.h

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fb92f3ecb4d48a5612ee61aa39bd538cac9d08cf
---

 libavcodec/mips/Makefile|3 +-
 libavcodec/mips/h264dsp_init_mips.c |   15 ++
 libavcodec/mips/h264dsp_mips.h  |   24 ++
 libavcodec/mips/h264idct_msa.c  |  469 +++
 libavutil/mips/generic_macros_msa.h |   96 +++
 5 files changed, 606 insertions(+), 1 deletion(-)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 0a97e7c..993c649 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -31,7 +31,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_msa.o\
  mips/hevc_idct_msa.o  \
  mips/hevc_lpf_sao_msa.o   \
  mips/hevcpred_msa.o
-MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
+MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o\
+ mips/h264idct_msa.o
 MSA-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_msa.o
 MSA-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
diff --git a/libavcodec/mips/h264dsp_init_mips.c 
b/libavcodec/mips/h264dsp_init_mips.c
index d9182f2..7f74adf 100644
--- a/libavcodec/mips/h264dsp_init_mips.c
+++ b/libavcodec/mips/h264dsp_init_mips.c
@@ -62,6 +62,21 @@ static av_cold void h264dsp_init_msa(H264DSPContext *c,
 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_msa;
 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_msa;
 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels4_8_msa;
+
+c->h264_idct_add = ff_h264_idct_add_msa;
+c->h264_idct8_add = ff_h264_idct8_addblk_msa;
+c->h264_idct_dc_add = ff_h264_idct4x4_addblk_dc_msa;
+c->h264_idct8_dc_add = ff_h264_idct8_dc_addblk_msa;
+c->h264_idct_add16 = ff_h264_idct_add16_msa;
+c->h264_idct8_add4 = ff_h264_idct8_add4_msa;
+
+if (chroma_format_idc <= 1)
+c->h264_idct_add8 = ff_h264_idct_add8_msa;
+else
+c->h264_idct_add8 = ff_h264_idct_add8_422_msa;
+
+c->h264_idct_add16intra = ff_h264_idct_add16_intra_msa;
+c->h264_luma_dc_dequant_idct = ff_h264_deq_idct_luma_dc_msa;
 }  // if (8 == bit_depth)
 }
 #endif  // #if HAVE_MSA
diff --git a/libavcodec/mips/h264dsp_mips.h b/libavcodec/mips/h264dsp_mips.h
index ef380fa..0e39057 100644
--- a/libavcodec/mips/h264dsp_mips.h
+++ b/libavcodec/mips/h264dsp_mips.h
@@ -41,6 +41,30 @@ void ff_h264_h_loop_filter_luma_mbaff_msa(uint8_t *src, 
int32_t stride,
   int32_t alpha, int32_t beta,
   int8_t *tc0);
 
+void ff_h264_idct_add_msa(uint8_t *dst, int16_t *src, int32_t dst_stride);
+void ff_h264_idct4x4_addblk_dc_msa(uint8_t *dst, int16_t *src,
+   int32_t dst_stride);
+void ff_h264_deq_idct_luma_dc_msa(int16_t *dst, int16_t *src,
+  int32_t de_q_val);
+void ff_h264_idct_add16_msa(uint8_t *dst, const int32_t *blk_offset,
+int16_t *block, int32_t stride,
+const uint8_t nnzc[15 * 8]);
+void ff_h264_idct_add16_intra_msa(uint8_t *dst, const int32_t *blk_offset,
+  int16_t *block, int32_t dst_stride,
+  const uint8_t nnzc[15 * 8]);
+void ff_h264_idct_add8_msa(uint8_t **dst, const int32_t *blk_offset,
+   int16_t *block, int32_t dst_stride,
+   const uint8_t nnzc[15 * 8]);
+void ff_h264_idct_add8_422_msa(uint8_t **dst, const int32_t *blk_offset,
+   int16_t *block, int32_t dst_stride,
+   const uint8_t nnzc[15 * 8]);
+void ff_h264_idct8_addblk_msa(uint8_t *dst, int16_t *src, int32_t dst_stride);
+void ff_h264_idct8_dc_addblk_msa(uint8_t *dst, int16_t *src,
+ int32_t dst_stride);
+void ff_h264_idct8_add4_msa(uint8_t *dst, const int *blk_offset,
+int16_t *blk, int dst_stride,
+const uint8_t nnzc[15 * 8]);
+
 void ff_h264_h_lpf_luma_intra_msa(uint8_t *src, int s

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC intra prediction functions

2015-06-11 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Wed Jun 10 
20:06:39 2015 +0530| [1d70b6fe1d9d67a35daf2ec4c653ba3eff5d31b7] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC intra prediction 
functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for AVC intra prediction 
functions in new file h264pred_msa.c
Adds new generic macros (needed for this patch) in 
libavutil/mips/generic_macros_msa.h

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1d70b6fe1d9d67a35daf2ec4c653ba3eff5d31b7
---

 libavcodec/h264pred.c|1 +
 libavcodec/h264pred.h|2 +
 libavcodec/mips/Makefile |2 +
 libavcodec/mips/h264dsp_mips.h   |   27 ++
 libavcodec/mips/h264pred_init_mips.c |  104 +
 libavcodec/mips/h264pred_msa.c   |  723 ++
 libavutil/mips/generic_macros_msa.h  |   11 +
 7 files changed, 870 insertions(+)

diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 044fc90..497b080 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -594,4 +594,5 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int 
codec_id,
 
 if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth, 
chroma_format_idc);
 if (ARCH_X86) ff_h264_pred_init_x86(h, codec_id, bit_depth, 
chroma_format_idc);
+if (ARCH_MIPS) ff_h264_pred_init_mips(h, codec_id, bit_depth, 
chroma_format_idc);
 }
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index 6708292..edeca91 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -117,5 +117,7 @@ void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
const int bit_depth, const int chroma_format_idc);
 void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
+const int bit_depth, const int chroma_format_idc);
 
 #endif /* AVCODEC_H264PRED_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index aa98774..0a97e7c 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -22,6 +22,7 @@ OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_init_mips.o  \
  mips/hevcpred_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
 OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o
+OBJS-$(CONFIG_H264PRED)   += mips/h264pred_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -32,5 +33,6 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_msa.o\
  mips/hevcpred_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 MSA-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_msa.o
+MSA-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/h264dsp_mips.h b/libavcodec/mips/h264dsp_mips.h
index 319f6d3..ef380fa 100644
--- a/libavcodec/mips/h264dsp_mips.h
+++ b/libavcodec/mips/h264dsp_mips.h
@@ -68,6 +68,33 @@ void ff_weight_h264_pixels8_8_msa(uint8_t *src, int stride, 
int height,
 void ff_weight_h264_pixels4_8_msa(uint8_t *src, int stride, int height,
   int log2_denom, int weight, int offset);
 
+void ff_h264_intra_predict_plane_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_predict_dc_4blk_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_predict_hor_dc_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_predict_vert_dc_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_predict_mad_cow_dc_l0t_8x8_msa(uint8_t *src,
+  ptrdiff_t stride);
+void ff_h264_intra_predict_mad_cow_dc_0lt_8x8_msa(uint8_t *src,
+  ptrdiff_t stride);
+void ff_h264_intra_predict_mad_cow_dc_l00_8x8_msa(uint8_t *src,
+  ptrdiff_t stride);
+void ff_h264_intra_predict_mad_cow_dc_0l0_8x8_msa(uint8_t *src,
+  ptrdiff_t stride);
+void ff_h264_intra_predict_plane_16x16_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_vert_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_horiz_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_dc_16x16_msa(uint8_t *src, ptrdiff_t stride);
+v

[FFmpeg-cvslog] avcodec/mips: Add 'const' to static arrays in HEVC MSA code

2015-06-11 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Thu Jun 11 
14:25:50 2015 +0530| [e4fb8816f99d09aa6436bff0e4e2453a02b053e1] | committer: 
Michael Niedermayer

avcodec/mips: Add 'const' to static arrays in HEVC MSA code

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e4fb8816f99d09aa6436bff0e4e2453a02b053e1
---

 libavcodec/mips/hevc_idct_msa.c   |   12 ++--
 libavcodec/mips/hevc_mc_uni_msa.c |2 +-
 libavcodec/mips/hevcpred_msa.c|4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavcodec/mips/hevc_idct_msa.c b/libavcodec/mips/hevc_idct_msa.c
index b5a4c5a..975d91f 100644
--- a/libavcodec/mips/hevc_idct_msa.c
+++ b/libavcodec/mips/hevc_idct_msa.c
@@ -21,18 +21,18 @@
 #include "libavutil/mips/generic_macros_msa.h"
 #include "libavcodec/mips/hevcdsp_mips.h"
 
-static int16_t gt8x8_cnst[16] = {
+static const int16_t gt8x8_cnst[16] = {
 64, 64, 83, 36, 89, 50, 18, 75, 64, -64, 36, -83, 75, -89, -50, -18
 };
 
-static int16_t gt16x16_cnst[64] = {
+static const int16_t gt16x16_cnst[64] = {
 64, 83, 64, 36, 89, 75, 50, 18, 90, 80, 57, 25, 70, 87, 9, 43,
 64, 36, -64, -83, 75, -18, -89, -50, 87, 9, -80, -70, -43, 57, -25, -90,
 64, -36, -64, 83, 50, -89, 18, 75, 80, -70, -25, 90, -87, 9, 43, 57,
 64, -83, 64, -36, 18, -50, 75, -89, 70, -87, 90, -80, 9, -43, -57, 25
 };
 
-static int16_t gt32x32_cnst0[256] = {
+static const int16_t gt32x32_cnst0[256] = {
 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4,
 90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13,
 88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22,
@@ -51,18 +51,18 @@ static int16_t gt32x32_cnst0[256] = {
 4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90
 };
 
-static int16_t gt32x32_cnst1[64] = {
+static const int16_t gt32x32_cnst1[64] = {
 90, 87, 80, 70, 57, 43, 25, 9, 87, 57, 9, -43, -80, -90, -70, -25,
 80, 9, -70, -87, -25, 57, 90, 43, 70, -43, -87, 9, 90, 25, -80, -57,
 57, -80, -25, 90, -9, -87, 43, 70, 43, -90, 57, 25, -87, 70, 9, -80,
 25, -70, 90, -80, 43, 9, -57, 87, 9, -25, 43, -57, 70, -80, 87, -90
 };
 
-static int16_t gt32x32_cnst2[16] = {
+static const int16_t gt32x32_cnst2[16] = {
 89, 75, 50, 18, 75, -18, -89, -50, 50, -89, 18, 75, 18, -50, 75, -89
 };
 
-static int16_t gt32x32_cnst3[16] = {
+static const int16_t gt32x32_cnst3[16] = {
 64, 64, 64, 64, 83, 36, -36, -83, 64, -64, -64, 64, 36, -83, 83, -36
 };
 
diff --git a/libavcodec/mips/hevc_mc_uni_msa.c 
b/libavcodec/mips/hevc_mc_uni_msa.c
index 7d02ce8..61a67c9 100644
--- a/libavcodec/mips/hevc_mc_uni_msa.c
+++ b/libavcodec/mips/hevc_mc_uni_msa.c
@@ -249,7 +249,7 @@ static void copy_width64_msa(uint8_t *src, int32_t 
src_stride,
 copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
 }
 
-uint8_t mc_filt_mask_arr[16 * 3] = {
+static const uint8_t mc_filt_mask_arr[16 * 3] = {
 /* 8 width cases */
 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
 /* 4 width cases */
diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c
index 8a8aa96..5d9299f 100644
--- a/libavcodec/mips/hevcpred_msa.c
+++ b/libavcodec/mips/hevcpred_msa.c
@@ -22,11 +22,11 @@
 #include "libavutil/mips/generic_macros_msa.h"
 #include "hevcpred_mips.h"
 
-static int8_t intra_pred_angle_up[17] = {
+static const int8_t intra_pred_angle_up[17] = {
 -32, -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32
 };
 
-static int8_t intra_pred_angle_low[16] = {
+static const int8_t intra_pred_angle_low[16] = {
 32, 26, 21, 17, 13, 9, 5, 2, 0, -2, -5, -9, -13, -17, -21, -26
 };
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC chroma mc functions

2015-06-11 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Thu Jun 11 
11:45:01 2015 +0530| [b87dc70c6590556d42ddc21ba0f6e9c790ddd23d] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC chroma mc functions

s patch adds MSA (MIPS-SIMD-Arch) optimizations for AVC chroma mc functions in 
new file h264chroma_msa.c
Adds new generic macros (needed for this patch) in 
libavutil/mips/generic_macros_msa.h

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b87dc70c6590556d42ddc21ba0f6e9c790ddd23d
---

 libavcodec/mips/Makefile   |1 +
 libavcodec/mips/h264chroma_init_mips.c |   21 +
 libavcodec/mips/h264chroma_mips.h  |   12 +
 libavcodec/mips/h264chroma_msa.c   | 2003 
 libavutil/mips/generic_macros_msa.h|   56 +
 5 files changed, 2093 insertions(+)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 82d1d70..aa98774 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -31,5 +31,6 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_msa.o\
  mips/hevc_lpf_sao_msa.o   \
  mips/hevcpred_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
+MSA-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/h264chroma_init_mips.c 
b/libavcodec/mips/h264chroma_init_mips.c
index 4c10da7..1cc5767 100644
--- a/libavcodec/mips/h264chroma_init_mips.c
+++ b/libavcodec/mips/h264chroma_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Zhou Xiaoyong 
+ * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
  *
  * This file is part of FFmpeg.
  *
@@ -20,6 +21,23 @@
 
 #include "h264chroma_mips.h"
 
+#if HAVE_MSA
+static av_cold void h264chroma_init_msa(H264ChromaContext *c, int bit_depth)
+{
+const int high_bit_depth = bit_depth > 8;
+
+if (!high_bit_depth) {
+c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_msa;
+c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_msa;
+c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_msa;
+
+c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_msa;
+c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_msa;
+c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_msa;
+}
+}
+#endif  // #if HAVE_MSA
+
 #if HAVE_LOONGSON3
 static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth)
 {
@@ -36,6 +54,9 @@ static av_cold void h264chroma_init_mmi(H264ChromaContext *c, 
int bit_depth)
 
 av_cold void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth)
 {
+#if HAVE_MSA
+h264chroma_init_msa(c, bit_depth);
+#endif  // #if HAVE_MSA
 #if HAVE_LOONGSON3
 h264chroma_init_mmi(c, bit_depth);
 #endif /* HAVE_LOONGSON3 */
diff --git a/libavcodec/mips/h264chroma_mips.h 
b/libavcodec/mips/h264chroma_mips.h
index 314e8a3..7a373b8 100644
--- a/libavcodec/mips/h264chroma_mips.h
+++ b/libavcodec/mips/h264chroma_mips.h
@@ -22,6 +22,18 @@
 #define H264_CHROMA_MIPS_H
 
 #include "libavcodec/h264.h"
+void ff_put_h264_chroma_mc8_msa(uint8_t *dst, uint8_t *src, int stride,
+int height, int x, int y);
+void ff_put_h264_chroma_mc4_msa(uint8_t *dst, uint8_t *src, int stride,
+int height, int x, int y);
+void ff_put_h264_chroma_mc2_msa(uint8_t *dst, uint8_t *src, int stride,
+int height, int x, int y);
+void ff_avg_h264_chroma_mc8_msa(uint8_t *dst, uint8_t *src, int stride,
+int height, int x, int y);
+void ff_avg_h264_chroma_mc4_msa(uint8_t *dst, uint8_t *src, int stride,
+int height, int x, int y);
+void ff_avg_h264_chroma_mc2_msa(uint8_t *dst, uint8_t *src, int stride,
+int height, int x, int y);
 
 void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
 int h, int x, int y);
diff --git a/libavcodec/mips/h264chroma_msa.c b/libavcodec/mips/h264chroma_msa.c
new file mode 100644
index 000..67d0bc1
--- /dev/null
+++ b/libavcodec/mips/h264chroma_msa.c
@@ -0,0 +1,2003 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even th

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC intra prediction functions

2015-06-10 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Thu Jun  4 
13:49:49 2015 +0530| [d6d98237ed01aec7d79e7724d43004c8b9c8d383] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC intra prediction 
functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC intra predition 
functions in new file hevcpred_msa.c
Adds new generic macros (needed for this patch) in 
libavutil/mips/generic_macros_msa.h

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d6d98237ed01aec7d79e7724d43004c8b9c8d383
---

 libavcodec/hevcpred.c|3 +
 libavcodec/hevcpred.h|1 +
 libavcodec/mips/Makefile |6 +-
 libavcodec/mips/hevcpred_init_mips.c |   48 +
 libavcodec/mips/hevcpred_mips.h  |   73 +
 libavcodec/mips/hevcpred_msa.c   | 3084 ++
 libavutil/mips/generic_macros_msa.h  |   46 +
 7 files changed, 3259 insertions(+), 2 deletions(-)

diff --git a/libavcodec/hevcpred.c b/libavcodec/hevcpred.c
index 4598229..02c1766 100644
--- a/libavcodec/hevcpred.c
+++ b/libavcodec/hevcpred.c
@@ -74,4 +74,7 @@ void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
 HEVC_PRED(8);
 break;
 }
+
+if (ARCH_MIPS)
+ff_hevc_pred_init_mips(hpc, bit_depth);
 }
diff --git a/libavcodec/hevcpred.h b/libavcodec/hevcpred.h
index 7f14a76..eb17663 100644
--- a/libavcodec/hevcpred.h
+++ b/libavcodec/hevcpred.h
@@ -41,5 +41,6 @@ typedef struct HEVCPredContext {
 } HEVCPredContext;
 
 void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
+void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth);
 
 #endif /* AVCODEC_HEVCPRED_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index b8bb1fe..82d1d70 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -18,7 +18,8 @@ OBJS-$(CONFIG_AAC_DECODER)+= 
mips/aacdec_mips.o\
  mips/aacpsdsp_mips.o
 MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)  += mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o
-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_init_mips.o
+OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_init_mips.o  \
+ mips/hevcpred_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
 OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
@@ -27,7 +28,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_msa.o\
  mips/hevc_mc_bi_msa.o \
  mips/hevc_mc_biw_msa.o\
  mips/hevc_idct_msa.o  \
- mips/hevc_lpf_sao_msa.o
+ mips/hevc_lpf_sao_msa.o   \
+ mips/hevcpred_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/hevcpred_init_mips.c 
b/libavcodec/mips/hevcpred_init_mips.c
new file mode 100644
index 000..331cfac
--- /dev/null
+++ b/libavcodec/mips/hevcpred_init_mips.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/hevc.h"
+#include "libavcodec/mips/hevcpred_mips.h"
+
+#if HAVE_MSA
+static av_cold void hevc_pred_init_msa(HEVCPredContext *c, const int bit_depth)
+{
+if (8 == bit_depth) {
+c->intra_pred[2] = ff_intra_pred_8_16x16_msa;
+c->intra_pred[3] = ff_intra_pred_8_32x32_msa;
+c->pred_planar[0] = ff_hevc_intra_pred_planar_0_msa;
+c->pred_planar[1] = ff_hevc_intra_pred_planar_1_msa;
+c->pred_planar[2] = ff_hevc_intra_pred_p

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC loop filter and sao functions

2015-06-10 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Tue Jun  9 
21:08:26 2015 +0530| [271195f85bbce284ac80ed31c62fba9b7e74e99d] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC loop filter and sao 
functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC loop filter and sao 
functions in new file hevc_lpf_sao_msa.c
Adds new generic macros (needed for this patch) in 
libavutil/mips/generic_macros_msa.h

In this patch, in comparision with previous patch, duplicated c functions are 
removed.

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=271195f85bbce284ac80ed31c62fba9b7e74e99d
---

 libavcodec/mips/Makefile|3 +-
 libavcodec/mips/hevc_lpf_sao_msa.c  | 2088 +++
 libavcodec/mips/hevcdsp_init_mips.c |   26 +
 libavcodec/mips/hevcdsp_mips.h  |   30 +
 libavutil/mips/generic_macros_msa.h |  111 +-
 5 files changed, 2256 insertions(+), 2 deletions(-)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 463072a..b8bb1fe 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -26,7 +26,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_msa.o\
  mips/hevc_mc_uniw_msa.o   \
  mips/hevc_mc_bi_msa.o \
  mips/hevc_mc_biw_msa.o\
- mips/hevc_idct_msa.o
+ mips/hevc_idct_msa.o  \
+ mips/hevc_lpf_sao_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/hevc_lpf_sao_msa.c 
b/libavcodec/mips/hevc_lpf_sao_msa.c
new file mode 100644
index 000..da1db51
--- /dev/null
+++ b/libavcodec/mips/hevc_lpf_sao_msa.c
@@ -0,0 +1,2088 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "libavcodec/mips/hevcdsp_mips.h"
+
+static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
+ int32_t beta, int32_t *tc,
+ uint8_t *p_is_pcm, uint8_t *q_is_pcm)
+{
+uint8_t *p3 = src - (stride << 2);
+uint8_t *p2 = src - ((stride << 1) + stride);
+uint8_t *p1 = src - (stride << 1);
+uint8_t *p0 = src - stride;
+uint8_t *q0 = src;
+uint8_t *q1 = src + stride;
+uint8_t *q2 = src + (stride << 1);
+uint8_t *q3 = src + (stride << 1) + stride;
+uint8_t flag0, flag1;
+int32_t dp00, dq00, dp30, dq30, d00, d30;
+int32_t dp04, dq04, dp34, dq34, d04, d34;
+int32_t tc0, p_is_pcm0, q_is_pcm0, beta30, beta20, tc250;
+int32_t tc4, p_is_pcm4, q_is_pcm4, tc254, tmp;
+uint64_t dst_val0, dst_val1;
+v16u8 dst0, dst1, dst2, dst3, dst4, dst5;
+v2i64 cmp0, cmp1, cmp2, p_is_pcm_vec, q_is_pcm_vec;
+v8u16 temp0, temp1;
+v8i16 temp2;
+v8i16 tc_pos, tc_neg;
+v8i16 diff0, diff1, delta0, delta1, delta2, abs_delta0;
+v16i8 zero = { 0 };
+v8u16 p3_src, p2_src, p1_src, p0_src, q0_src, q1_src, q2_src, q3_src;
+
+dp00 = abs(p2[0] - (p1[0] << 1) + p0[0]);
+dq00 = abs(q2[0] - (q1[0] << 1) + q0[0]);
+dp30 = abs(p2[3] - (p1[3] << 1) + p0[3]);
+dq30 = abs(q2[3] - (q1[3] << 1) + q0[3]);
+d00 = dp00 + dq00;
+d30 = dp30 + dq30;
+p_is_pcm0 = p_is_pcm[0];
+q_is_pcm0 = q_is_pcm[0];
+dp04 = abs(p2[4] - (p1[4] << 1) + p0[4]);
+dq04 = abs(q2[4] - (q1[4] << 1) + q0[4]);
+dp34 = abs(p2[7] - (p1[7] << 1) + p0[7]);
+dq34 = abs(q2[7] - (q1[7] << 1) + q0[7]);
+d04 = dp04 + dq04;
+d34 = dp34 + dq34;
+p_is_pcm4 = p_is_pcm[1];
+q_is_pcm4 = q_is_pcm[1];
+
+if (!p_is_pcm0 || !p_is_pcm4 || !q_is_pcm0 || !q_i

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC idct functions

2015-06-04 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Thu Jun  4 
13:49:47 2015 +0530| [a34d902325895a1cecd322cbe94915225c91017a] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC idct functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC idct functions in 
new file hevc_idct_msa.c
Adds new generic macros (needed for this patch) in 
libavutil/mips/generic_macros_msa.h

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a34d902325895a1cecd322cbe94915225c91017a
---

 libavcodec/mips/Makefile|3 +-
 libavcodec/mips/hevc_idct_msa.c |  939 +++
 libavcodec/mips/hevcdsp_init_mips.c |   14 +
 libavcodec/mips/hevcdsp_mips.h  |   23 +
 libavutil/mips/generic_macros_msa.h |  195 
 5 files changed, 1173 insertions(+), 1 deletion(-)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 691ea35..463072a 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -25,7 +25,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
  mips/hevc_mc_bi_msa.o \
- mips/hevc_mc_biw_msa.o
+ mips/hevc_mc_biw_msa.o\
+ mips/hevc_idct_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/hevc_idct_msa.c b/libavcodec/mips/hevc_idct_msa.c
new file mode 100644
index 000..b5a4c5a
--- /dev/null
+++ b/libavcodec/mips/hevc_idct_msa.c
@@ -0,0 +1,939 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "libavcodec/mips/hevcdsp_mips.h"
+
+static int16_t gt8x8_cnst[16] = {
+64, 64, 83, 36, 89, 50, 18, 75, 64, -64, 36, -83, 75, -89, -50, -18
+};
+
+static int16_t gt16x16_cnst[64] = {
+64, 83, 64, 36, 89, 75, 50, 18, 90, 80, 57, 25, 70, 87, 9, 43,
+64, 36, -64, -83, 75, -18, -89, -50, 87, 9, -80, -70, -43, 57, -25, -90,
+64, -36, -64, 83, 50, -89, 18, 75, 80, -70, -25, 90, -87, 9, 43, 57,
+64, -83, 64, -36, 18, -50, 75, -89, 70, -87, 90, -80, 9, -43, -57, 25
+};
+
+static int16_t gt32x32_cnst0[256] = {
+90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4,
+90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13,
+88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22,
+85, 46, -13, -67, -90, -73, -22, 38, 82, 88, 54, -4, -61, -90, -78, -31,
+82, 22, -54, -90, -61, 13, 78, 85, 31, -46, -90, -67, 4, 73, 88, 38,
+78, -4, -82, -73, 13, 85, 67, -22, -88, -61, 31, 90, 54, -38, -90, -46,
+73, -31, -90, -22, 78, 67, -38, -90, -13, 82, 61, -46, -88, -4, 85, 54,
+67, -54, -78, 38, 85, -22, -90, 4, 90, 13, -88, -31, 82, 46, -73, -61,
+61, -73, -46, 82, 31, -88, -13, 90, -4, -90, 22, 85, -38, -78, 54, 67,
+54, -85, -4, 88, -46, -61, 82, 13, -90, 38, 67, -78, -22, 90, -31, -73,
+46, -90, 38, 54, -90, 31, 61, -88, 22, 67, -85, 13, 73, -82, 4, 78,
+38, -88, 73, -4, -67, 90, -46, -31, 85, -78, 13, 61, -90, 54, 22, -82,
+31, -78, 90, -61, 4, 54, -88, 82, -38, -22, 73, -90, 67, -13, -46, 85,
+22, -61, 85, -90, 73, -38, -4, 46, -78, 90, -82, 54, -13, -31, 67, -88,
+13, -38, 61, -78, 88, -90, 85, -73, 54, -31, 4, 22, -46, 67, -82, 90,
+4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90
+};
+
+static int16_t gt32x32_cnst1[64] = {
+90, 87, 80, 70, 57, 43, 25, 9, 87, 57, 9, -43, -80, -90, -70, -25,
+80, 9, -70, -87, -25, 57, 90, 43, 70, -43, -87, 9, 90, 25, -80, -57,
+57, -80, -25, 90, -9, -87, 43, 70, 43, -90, 57, 25, -87, 70, 9, -80,
+25, -70, 90, -80, 43, 9, -57, 87, 9, -2

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC mc epel functions

2015-06-03 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Tue Jun  2 
14:26:11 2015 +0530| [c96c73b0b0520fc734554d34b4693ca7af4edd02] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC mc epel functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC mc epel functions.

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c96c73b0b0520fc734554d34b4693ca7af4edd02
---

 libavcodec/mips/hevcdsp_init_mips.c |   32 +
 libavcodec/mips/hevcdsp_mips.h  |   30 +
 libavcodec/mips/hevcdsp_msa.c   | 2265 +++
 3 files changed, 2327 insertions(+)

diff --git a/libavcodec/mips/hevcdsp_init_mips.c 
b/libavcodec/mips/hevcdsp_init_mips.c
index 5dc13fb..837c046 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -62,6 +62,38 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
 c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa;
 c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa;
 
+c->put_hevc_epel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa;
+c->put_hevc_epel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa;
+c->put_hevc_epel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa;
+c->put_hevc_epel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa;
+c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa;
+c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa;
+c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa;
+
+c->put_hevc_epel[1][0][1] = ff_hevc_put_hevc_epel_h4_8_msa;
+c->put_hevc_epel[2][0][1] = ff_hevc_put_hevc_epel_h6_8_msa;
+c->put_hevc_epel[3][0][1] = ff_hevc_put_hevc_epel_h8_8_msa;
+c->put_hevc_epel[4][0][1] = ff_hevc_put_hevc_epel_h12_8_msa;
+c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_8_msa;
+c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_8_msa;
+c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_msa;
+
+c->put_hevc_epel[1][1][0] = ff_hevc_put_hevc_epel_v4_8_msa;
+c->put_hevc_epel[2][1][0] = ff_hevc_put_hevc_epel_v6_8_msa;
+c->put_hevc_epel[3][1][0] = ff_hevc_put_hevc_epel_v8_8_msa;
+c->put_hevc_epel[4][1][0] = ff_hevc_put_hevc_epel_v12_8_msa;
+c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_8_msa;
+c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_8_msa;
+c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_msa;
+
+c->put_hevc_epel[1][1][1] = ff_hevc_put_hevc_epel_hv4_8_msa;
+c->put_hevc_epel[2][1][1] = ff_hevc_put_hevc_epel_hv6_8_msa;
+c->put_hevc_epel[3][1][1] = ff_hevc_put_hevc_epel_hv8_8_msa;
+c->put_hevc_epel[4][1][1] = ff_hevc_put_hevc_epel_hv12_8_msa;
+c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_8_msa;
+c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_8_msa;
+c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_msa;
+
 c->put_hevc_qpel_uni[3][0][0] = ff_hevc_put_hevc_uni_pel_pixels8_8_msa;
 c->put_hevc_qpel_uni[4][0][0] = 
ff_hevc_put_hevc_uni_pel_pixels12_8_msa;
 c->put_hevc_qpel_uni[5][0][0] = 
ff_hevc_put_hevc_uni_pel_pixels16_8_msa;
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index 64605e6..389f025 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -66,6 +66,36 @@ MC(qpel, hv, 32);
 MC(qpel, hv, 48);
 MC(qpel, hv, 64);
 
+MC(epel, h, 4);
+MC(epel, h, 6);
+MC(epel, h, 8);
+MC(epel, h, 12);
+MC(epel, h, 16);
+MC(epel, h, 24);
+MC(epel, h, 32);
+MC(epel, h, 48);
+MC(epel, h, 64);
+
+MC(epel, v, 4);
+MC(epel, v, 6);
+MC(epel, v, 8);
+MC(epel, v, 12);
+MC(epel, v, 16);
+MC(epel, v, 24);
+MC(epel, v, 32);
+MC(epel, v, 48);
+MC(epel, v, 64);
+
+MC(epel, hv, 4);
+MC(epel, hv, 6);
+MC(epel, hv, 8);
+MC(epel, hv, 12);
+MC(epel, hv, 16);
+MC(epel, hv, 24);
+MC(epel, hv, 32);
+MC(epel, hv, 48);
+MC(epel, hv, 64);
+
 #undef MC
 
 #define UNI_MC(PEL, DIR, WIDTH)
\
diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index 1ecef0a..ed3acbb 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -1526,6 +1526,2247 @@ static void hevc_hv_8t_64w_msa(uint8_t *src, int32_t 
src_stride,
filter_x, filter_y, height, 64);
 }
 
+static void hevc_hz_4t_4x2_msa(uint8_t *src,
+   int32_t src_stride,
+   int16_t *dst,
+   int32_t dst_stride,
+   const int8_t *filter)
+{
+v8i16 filt0, filt1;
+v16i8 src0, src1;
+v16i8 mask1, vec0, vec1;
+v8i16 dst0;
+v8

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni mc epel functions

2015-06-03 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Tue Jun  2 
14:26:12 2015 +0530| [aef34ab95048aade062d4c669ea272e0f08b81a4] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni mc epel functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC uni mc epel 
functions.
Adds new generic macros (needed for this patch) in 
libavutil/mips/generic_macros_msa.h

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aef34ab95048aade062d4c669ea272e0f08b81a4
---

 libavcodec/mips/hevc_mc_uni_msa.c   | 2331 +--
 libavcodec/mips/hevcdsp_init_mips.c |   30 +
 libavcodec/mips/hevcdsp_mips.h  |   30 +
 libavutil/mips/generic_macros_msa.h |   51 +
 4 files changed, 2364 insertions(+), 78 deletions(-)

diff --git a/libavcodec/mips/hevc_mc_uni_msa.c 
b/libavcodec/mips/hevc_mc_uni_msa.c
index 09179d2..7d02ce8 100644
--- a/libavcodec/mips/hevc_mc_uni_msa.c
+++ b/libavcodec/mips/hevc_mc_uni_msa.c
@@ -319,6 +319,44 @@ uint8_t mc_filt_mask_arr[16 * 3] = {
 res7_m, out0, out1, out2, out3);  \
 }
 
+#define FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1)   \
+( { \
+v8i16 tmp0; \
+\
+tmp0 = __msa_dotp_s_h((v16i8) vec0, (v16i8) filt0); \
+tmp0 = __msa_dpadd_s_h(tmp0, (v16i8) vec1, (v16i8) filt1);  \
+\
+tmp0;   \
+} )
+
+#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, \
+   mask0, mask1, filt0, filt1, \
+   out0, out1) \
+{  \
+v16i8 vec0_m, vec1_m, vec2_m, vec3_m;  \
+   \
+VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);  \
+DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1); \
+VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);  \
+DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1);\
+}
+
+#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3,\
+   mask0, mask1, filt0, filt1,\
+   out0, out1, out2, out3)\
+{ \
+v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \
+  \
+VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \
+VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \
+DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,   \
+out0, out1, out2, out3);  \
+VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m); \
+VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m); \
+DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1,  \
+ out0, out1, out2, out3); \
+}
+
 static void common_hz_8t_4x4_msa(uint8_t *src, int32_t src_stride,
  uint8_t *dst, int32_t dst_stride,
  const int8_t *filter, uint8_t rnd_val)
@@ -1696,94 +1734,2231 @@ static void hevc_hv_uni_8t_64w_msa(uint8_t *src,
filter_x, filter_y, height, 64);
 }
 
-#define UNI_MC_COPY(WIDTH) \
-void ff_hevc_put_hevc_uni_pel_pixels##WIDTH##_8_msa(uint8_t *dst,  \
-ptrdiff_t dst_stride,  \
-uint8_t *src,  \
-ptrdiff_t src_stride,  \
-int height,\
-intptr_t mx,   \
-intptr_t my,   \
-int width) \
-{  \
-copy_width##WIDTH##_msa(src, src_stride, dst, dst_stride, height); \
+static void common_hz_4t_4x2_msa(uint8_t *src, int32_t src_stride,
+ uint8_t *dst, int32_t dst_str

[FFmpeg-cvslog] avcodec/mips/hevcdsp_msa: Restructure as per avutil/mips/ generic_macros_msa.h

2015-05-28 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Thu May 28 
20:32:42 2015 +0530| [7b45790771c0db8b5039ff54ee3cfe68d40e1302] | committer: 
Michael Niedermayer

avcodec/mips/hevcdsp_msa: Restructure as per avutil/mips/generic_macros_msa.h

This patch modifies HEVC mc MIPS-SIMD optimized code according to improved 
version of generic macros.

Overall, this patch is just upgrading the code with styling changes and will 
bring it in sync with MIPS-SIMD optimized latest codebase at our end.

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7b45790771c0db8b5039ff54ee3cfe68d40e1302
---

 libavcodec/mips/hevcdsp_msa.c | 2428 ++---
 1 file changed, 842 insertions(+), 1586 deletions(-)

diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index fcc344b..1ecef0a 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -20,405 +20,181 @@
 
 #include "libavutil/mips/generic_macros_msa.h"
 #include "libavcodec/mips/hevcdsp_mips.h"
+#include "libavcodec/mips/hevc_macros_msa.h"
 
-#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\
-   filt0, filt1, filt2, filt3)\
-( {   \
-v4i32 out;\
-  \
-out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\
-out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1));  \
-out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2));  \
-out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3));  \
-out;  \
-} )
-
-#define HEVC_FILT_8TAP_DPADD_H(vec0, vec1, vec2, vec3, 
\
-   filt0, filt1, filt2, filt3, 
\
-   var_in) 
\
-( {
\
-v8i16 out; 
\
-   
\
-out = __msa_dpadd_s_h((v8i16) (var_in), (v16i8) (vec0), (v16i8) (filt0));  
\
-out = __msa_dpadd_s_h(out, (v16i8) (vec1), (v16i8) (filt1));   
\
-out = __msa_dpadd_s_h(out, (v16i8) (vec2), (v16i8) (filt2));   
\
-out = __msa_dpadd_s_h(out, (v16i8) (vec3), (v16i8) (filt3));   
\
-out;   
\
-} )
-
-static void hevc_copy_4w_msa(uint8_t * __restrict src, int32_t src_stride,
- int16_t * __restrict dst, int32_t dst_stride,
+static void hevc_copy_4w_msa(uint8_t *src, int32_t src_stride,
+ int16_t *dst, int32_t dst_stride,
  int32_t height)
 {
 v16i8 zero = { 0 };
 
 if (2 == height) {
-uint64_t out0, out1;
 v16i8 src0, src1;
-v8i16 input0;
+v8i16 in0;
 
-LOAD_2VECS_SB(src, src_stride, src0, src1);
+LD_SB2(src, src_stride, src0, src1);
 
 src0 = (v16i8) __msa_ilvr_w((v4i32) src1, (v4i32) src0);
-
-input0 = (v8i16) __msa_ilvr_b(zero, src0);
-
-input0 <<= 6;
-
-out0 = __msa_copy_u_d((v2i64) input0, 0);
-out1 = __msa_copy_u_d((v2i64) input0, 1);
-
-STORE_DWORD(dst, out0);
-dst += dst_stride;
-STORE_DWORD(dst, out1);
+in0 = (v8i16) __msa_ilvr_b(zero, src0);
+in0 <<= 6;
+ST8x2_UB(in0, dst, 2 * dst_stride);
 } else if (4 == height) {
-uint64_t out0, out1, out2, out3;
 v16i8 src0, src1, src2, src3;
-v8i16 input0, input1;
+v8i16 in0, in1;
 
-LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+LD_SB4(src, src_stride, src0, src1, src2, src3);
 
-src0 = (v16i8) __msa_ilvr_w((v4i32) src1, (v4i32) src0);
-src1 = (v16i8) __msa_ilvr_w((v4i32) src3, (v4i32) src2);
-
-input0 = (v8i16) __msa_ilvr_b(zero, src0);
-input1 = (v8i16) __msa_ilvr_b(zero, src1);
-
-input0 <<= 6;
-input1 <<= 6;
-
-out0 = __msa_copy_u_d((v2i64) input0, 0);
-out1 = __msa_copy_u_d((v2i64) input0, 1);
-out2 = __msa_copy_u_d((v2i64) input1, 0);
-out3 = __msa_copy_u_d((v2i64) input1, 1);
-
-STORE_DWORD(dst, out0);
-dst += dst_stride;
-STORE_DWORD(dst, out1);
-dst += dst_stride;
-STORE_DWORD(dst, out2);
-dst += dst_stride;
-STORE_DWORD(dst, out3);
+ILVR_W2_SB(src1, src0, src3, src2, src0, src1);
+ILVR_B2_SH(zero, src0, zero, src1, in0, in1);
+in0 <<= 6;
+   

[FFmpeg-cvslog] avcodec/mips: Restructure as per avutil/mips/generic_macros_msa.h

2015-05-28 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Thu May 28 
15:01:25 2015 +0530| [bcd7bf7eeb09a395cc01698842d1b8be9af483fc] | committer: 
Michael Niedermayer

avcodec/mips: Restructure as per avutil/mips/generic_macros_msa.h

This patch modifies H264 loopfilter, weighted & bi-weighted prediction 
MIPS-SIMD optimized code according to improved version of generic macros.
Also there are minor code alignment changes.

Overall, this patch is just upgrading the code with styling changes and will 
bring it in sync with MIPS-SIMD optimized latest codebase at our end.

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bcd7bf7eeb09a395cc01698842d1b8be9af483fc
---

 libavcodec/mips/h264dsp_msa.c | 1758 +++--
 1 file changed, 634 insertions(+), 1124 deletions(-)

diff --git a/libavcodec/mips/h264dsp_msa.c b/libavcodec/mips/h264dsp_msa.c
index f728fcc..fce01ac 100644
--- a/libavcodec/mips/h264dsp_msa.c
+++ b/libavcodec/mips/h264dsp_msa.c
@@ -21,19 +21,16 @@
 #include "libavutil/mips/generic_macros_msa.h"
 #include "h264dsp_mips.h"
 
-static void avc_wgt_4x2_msa(uint8_t *data,
-int32_t stride,
-int32_t log2_denom,
-int32_t src_weight,
+static void avc_wgt_4x2_msa(uint8_t *data, int32_t stride,
+int32_t log2_denom, int32_t src_weight,
 int32_t offset_in)
 {
 uint32_t data0, data1;
 v16u8 zero = { 0 };
 v16u8 src0, src1;
 v4i32 res0, res1;
-v8i16 temp0, temp1;
-v16u8 vec0, vec1;
-v8i16 wgt, denom, offset;
+v8i16 temp0, temp1, vec0, vec1, wgt, denom, offset;
+v8u16 out0, out1;
 
 offset_in <<= (log2_denom);
 
@@ -45,53 +42,40 @@ static void avc_wgt_4x2_msa(uint8_t *data,
 offset = __msa_fill_h(offset_in);
 denom = __msa_fill_h(log2_denom);
 
-data0 = LOAD_WORD(data);
-data1 = LOAD_WORD(data + stride);
+data0 = LW(data);
+data1 = LW(data + stride);
 
 src0 = (v16u8) __msa_fill_w(data0);
 src1 = (v16u8) __msa_fill_w(data1);
 
-ILVR_B_2VECS_UB(src0, src1, zero, zero, vec0, vec1);
+ILVR_B2_SH(zero, src0, zero, src1, vec0, vec1);
+MUL2(wgt, vec0, wgt, vec1, temp0, temp1);
+ADDS_SH2_SH(temp0, offset, temp1, offset, temp0, temp1);
+MAXI_SH2_SH(temp0, temp1, 0);
 
-temp0 = wgt * (v8i16) vec0;
-temp1 = wgt * (v8i16) vec1;
+out0 = (v8u16) __msa_srl_h(temp0, denom);
+out1 = (v8u16) __msa_srl_h(temp1, denom);
 
-temp0 = __msa_adds_s_h(temp0, offset);
-temp1 = __msa_adds_s_h(temp1, offset);
-
-temp0 = __msa_maxi_s_h(temp0, 0);
-temp1 = __msa_maxi_s_h(temp1, 0);
-
-temp0 = __msa_srl_h(temp0, denom);
-temp1 = __msa_srl_h(temp1, denom);
-
-temp0 = (v8i16) __msa_sat_u_h((v8u16) temp0, 7);
-temp1 = (v8i16) __msa_sat_u_h((v8u16) temp1, 7);
-
-res0 = (v4i32) __msa_pckev_b((v16i8) temp0, (v16i8) temp0);
-res1 = (v4i32) __msa_pckev_b((v16i8) temp1, (v16i8) temp1);
+SAT_UH2_UH(out0, out1, 7);
+PCKEV_B2_SW(out0, out0, out1, out1, res0, res1);
 
 data0 = __msa_copy_u_w(res0, 0);
 data1 = __msa_copy_u_w(res1, 0);
-
-STORE_WORD(data, data0);
+SW(data0, data);
 data += stride;
-STORE_WORD(data, data1);
+SW(data1, data);
 }
 
-static void avc_wgt_4x4multiple_msa(uint8_t *data,
-int32_t stride,
-int32_t height,
-int32_t log2_denom,
-int32_t src_weight,
-int32_t offset_in)
+static void avc_wgt_4x4multiple_msa(uint8_t *data, int32_t stride,
+int32_t height, int32_t log2_denom,
+int32_t src_weight, int32_t offset_in)
 {
 uint8_t cnt;
 uint32_t data0, data1, data2, data3;
 v16u8 zero = { 0 };
 v16u8 src0, src1, src2, src3;
-v8u16 temp0, temp1, temp2, temp3;
-v8i16 wgt, denom, offset;
+v8u16 temp0, temp1, temp2, temp3, wgt;
+v8i16 denom, offset;
 
 offset_in <<= (log2_denom);
 
@@ -99,63 +83,47 @@ static void avc_wgt_4x4multiple_msa(uint8_t *data,
 offset_in += (1 << (log2_denom - 1));
 }
 
-wgt = __msa_fill_h(src_weight);
+wgt = (v8u16) __msa_fill_h(src_weight);
 offset = __msa_fill_h(offset_in);
 denom = __msa_fill_h(log2_denom);
 
 for (cnt = height / 4; cnt--;) {
-LOAD_4WORDS_WITH_STRIDE(data, stride, data0, data1, data2, data3);
+LW4(data, stride, data0, data1, data2, data3);
 
 src0 = (v16u8) __msa_fill_w(data0);
 src1 = (v16u8) __msa_fill_w(data1);
 src2 = (v16u8) __msa_fill_w(data2);
 src3 = (v16u8) __msa_fill_w(data3);
 
-ILVR_B_4VECS_UH(src0, src1, src2, src3, zero, zero, zero, zero,
-

[FFmpeg-cvslog] avutil/mips: Restructure of generic macros

2015-05-28 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Thu May 28 
14:54:53 2015 +0530| [02a49912301fa6eac68fb790255275897fc8a971] | committer: 
Michael Niedermayer

avutil/mips: Restructure of generic macros

This patch includes restructuring of existing macros and addition of more 
generic macros.

This change was necessary to avoid repeated review comments in remaining 
patches which we were about to submit.
Also this patch reduces number of code lines due to maximum use of generic 
macros, allows better code alignment & readability etc.

These modifications in commonly used .libavutil/mips/generic_macros_msa.h. 
impacts the already accepted code, hence re-submitting it in 2/4,3/4 & 4/4.
Overall, this patch set is just upgrading the code with styling changes and 
will bring it in sync with MIPS-SIMD optimized latest codebase at our end.

Signed-off-by: Shivraj Patil 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=02a49912301fa6eac68fb790255275897fc8a971
---

 libavutil/mips/generic_macros_msa.h | 2570 +--
 1 file changed, 1279 insertions(+), 1291 deletions(-)

diff --git a/libavutil/mips/generic_macros_msa.h 
b/libavutil/mips/generic_macros_msa.h
index 48dc78e..fbe7abf 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -24,1403 +24,1391 @@
 #include 
 #include 
 
-#define LOAD_UB(psrc) \
-( {   \
-v16u8 out_m;  \
-out_m = *((v16u8 *) (psrc));  \
-out_m;\
-} )
-
-#define LOAD_SB(psrc) \
-( {   \
-v16i8 out_m;  \
-out_m = *((v16i8 *) (psrc));  \
-out_m;\
-} )
+#define LD_B(RTYPE, psrc) *((RTYPE *)(psrc))
+#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
+#define LD_SB(...) LD_B(v16i8, __VA_ARGS__)
 
-#define LOAD_UH(psrc) *((const v8u16 *)(psrc))
+#define LD_H(RTYPE, psrc) *((RTYPE *)(psrc))
+#define LD_SH(...) LD_H(v8i16, __VA_ARGS__)
 
-#define LOAD_SH(psrc) \
-( {   \
-v8i16 out_m;  \
-out_m = *((v8i16 *) (psrc));  \
-out_m;\
-} )
+#define LD_W(RTYPE, psrc) *((RTYPE *)(psrc))
+#define LD_SW(...) LD_W(v4i32, __VA_ARGS__)
 
-#define LOAD_SW(psrc) *((const v4i32 *)(psrc))
+#define ST_B(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
+#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
 
-#define STORE_UB(vec, pdest) *((v16u8 *)(pdest)) = (vec)
-#define STORE_SB(vec, pdest) *((v16i8 *)(pdest)) = (vec)
+#define ST_H(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
+#define ST_SH(...) ST_H(v8i16, __VA_ARGS__)
 
-#define STORE_SH(vec, pdest)   \
-{  \
-*((v8i16 *) (pdest)) = (vec);  \
-}
-
-#define STORE_SW(vec, pdest)   \
-{  \
-*((v4i32 *) (pdest)) = (vec);  \
-}
+#define ST_W(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
+#define ST_SW(...) ST_W(v4i32, __VA_ARGS__)
 
 #if (__mips_isa_rev >= 6)
-#define LOAD_WORD(psrc)   \
-( {   \
-uint8_t *src_m = (uint8_t *) (psrc);  \
-uint32_t val_m;   \
-  \
-__asm__ volatile (\
-"lw  %[val_m],  %[src_m]  \n\t"   \
-  \
-: [val_m] "=r" (val_m)\
-: [src_m] "m" (*src_m)\
-);\
-  \
-val_m;\
+#define LW(psrc)   \
+( {\
+uint8_t *psrc_m = (uint8_t *) (psrc);  \
+uint32_t val_m;\
+   \
+__asm__ volatile ( \
+"lw  %[val_m],  %[psrc_m]  \n\t"   \
+   \
+: [val_m] "=r" (val_m) \
+: [psrc_m] "m" (*psrc_m)   \
+); \
+   \
+val_m; \
 } )
 
 #if (__mips == 64)
-#define LOAD_DWORD(psrc)  \
-( {   \
-uint8_t *src_m = (uint8_t *) (psrc);  \
-uint64_t val_m = 0;   \
-  \
-__asm__ volatile (\
-"ld  %[val_m],  %[src_m]  \n\t"   \
-  \
-  

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni hv mc functions

2015-05-13 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Fri May  8 
13:50:01 2015 +0530| [8252f63d1b982fb8adeb3ac3a79406e3cb422650] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni hv mc functions

Signed-off-by: Shivraj Patil 
Reviewed-by: Nedeljko Babic 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8252f63d1b982fb8adeb3ac3a79406e3cb422650
---

 libavcodec/mips/hevcdsp_init_mips.c |9 +
 libavcodec/mips/hevcdsp_mips.h  |9 +
 libavcodec/mips/hevcdsp_msa.c   |  512 +++
 3 files changed, 530 insertions(+)

diff --git a/libavcodec/mips/hevcdsp_init_mips.c 
b/libavcodec/mips/hevcdsp_init_mips.c
index 1e22f35..d2e3c60 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -87,6 +87,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
 c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_msa;
 c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_msa;
 c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_msa;
+
+c->put_hevc_qpel_uni[1][1][1] = ff_hevc_put_hevc_uni_qpel_hv4_8_msa;
+c->put_hevc_qpel_uni[3][1][1] = ff_hevc_put_hevc_uni_qpel_hv8_8_msa;
+c->put_hevc_qpel_uni[4][1][1] = ff_hevc_put_hevc_uni_qpel_hv12_8_msa;
+c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_8_msa;
+c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_8_msa;
+c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_8_msa;
+c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_8_msa;
+c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_8_msa;
 }
 }
 #endif  // #if HAVE_MSA
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index 76a6784..a8c8848 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -106,4 +106,13 @@ UNI_MC(qpel, v, 32);
 UNI_MC(qpel, v, 48);
 UNI_MC(qpel, v, 64);
 
+UNI_MC(qpel, hv, 4);
+UNI_MC(qpel, hv, 8);
+UNI_MC(qpel, hv, 12);
+UNI_MC(qpel, hv, 16);
+UNI_MC(qpel, hv, 24);
+UNI_MC(qpel, hv, 32);
+UNI_MC(qpel, hv, 48);
+UNI_MC(qpel, hv, 64);
+
 #undef UNI_MC
diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index d0e6f64..781264d 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -46,6 +46,24 @@
 out;   
\
 } )
 
+#define HEVC_RND_W_CLIP_UNSIGNED_CHAR_W_VEC2(vec0_r, vec0_l,   \
+ vec1_r, vec1_l,   \
+ out0, out1)   \
+{  \
+(vec0_r) = __msa_srari_w((vec0_r), 6); \
+(vec0_l) = __msa_srari_w((vec0_l), 6); \
+(vec1_r) = __msa_srari_w((vec1_r), 6); \
+(vec1_l) = __msa_srari_w((vec1_l), 6); \
+   \
+(vec0_r) = CLIP_UNSIGNED_CHAR_W((vec0_r)); \
+(vec0_l) = CLIP_UNSIGNED_CHAR_W((vec0_l)); \
+(vec1_r) = CLIP_UNSIGNED_CHAR_W((vec1_r)); \
+(vec1_l) = CLIP_UNSIGNED_CHAR_W((vec1_l)); \
+   \
+out0 = (v4i32) __msa_pckev_h((v8i16) (vec0_l), (v8i16) (vec0_r));  \
+out1 = (v4i32) __msa_pckev_h((v8i16) (vec1_l), (v8i16) (vec1_r));  \
+}
+
 static void hevc_copy_4w_msa(uint8_t * __restrict src, int32_t src_stride,
  int16_t * __restrict dst, int32_t dst_stride,
  int32_t height)
@@ -2270,6 +2288,469 @@ static void hevc_hv_8t_64w_msa(uint8_t * __restrict 
src, int32_t src_stride,
filter_x, filter_y, height, 64);
 }
 
+static void hevc_hv_uni_8t_4w_msa(uint8_t * __restrict src,
+  int32_t src_stride,
+  uint8_t * __restrict dst,
+  int32_t dst_stride,
+  const int8_t * __restrict filter_x,
+  const int8_t * __restrict filter_y,
+  int32_t height)
+{
+uint32_t loop_cnt;
+uint32_t out0, out1;
+v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+v8i16 filt0, filt1, filt2, filt3, filter_vec;
+v4i32 filt_h0, filt_h1, filt_h2, filt_h3;
+v16i8 mask1, mask2, mask3;
+v8u16 const_vec;
+v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni copy, uni horizontal and uni vertical mc functions

2015-05-07 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Mon May  4 
18:09:34 2015 +0530| [7174df44fe7b27c85637438ee0052d9d9ff8f382] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni copy, uni 
horizontal and uni vertical mc functions

Signed-off-by: Shivraj Patil 
Reviewed-by: Nedeljko Babic 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7174df44fe7b27c85637438ee0052d9d9ff8f382
---

 libavcodec/mips/hevcdsp_init_mips.c |   26 +
 libavcodec/mips/hevcdsp_mips.h  |   40 +
 libavcodec/mips/hevcdsp_msa.c   | 1822 +++
 libavutil/mips/generic_macros_msa.h |  533 ++
 4 files changed, 2421 insertions(+)

diff --git a/libavcodec/mips/hevcdsp_init_mips.c 
b/libavcodec/mips/hevcdsp_init_mips.c
index 4fec336..1e22f35 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -61,6 +61,32 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
 c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa;
 c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa;
 c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa;
+
+c->put_hevc_qpel_uni[3][0][0] = ff_hevc_put_hevc_uni_pel_pixels8_8_msa;
+c->put_hevc_qpel_uni[4][0][0] = 
ff_hevc_put_hevc_uni_pel_pixels12_8_msa;
+c->put_hevc_qpel_uni[5][0][0] = 
ff_hevc_put_hevc_uni_pel_pixels16_8_msa;
+c->put_hevc_qpel_uni[6][0][0] = 
ff_hevc_put_hevc_uni_pel_pixels24_8_msa;
+c->put_hevc_qpel_uni[7][0][0] = 
ff_hevc_put_hevc_uni_pel_pixels32_8_msa;
+c->put_hevc_qpel_uni[8][0][0] = 
ff_hevc_put_hevc_uni_pel_pixels48_8_msa;
+c->put_hevc_qpel_uni[9][0][0] = 
ff_hevc_put_hevc_uni_pel_pixels64_8_msa;
+
+c->put_hevc_qpel_uni[1][0][1] = ff_hevc_put_hevc_uni_qpel_h4_8_msa;
+c->put_hevc_qpel_uni[3][0][1] = ff_hevc_put_hevc_uni_qpel_h8_8_msa;
+c->put_hevc_qpel_uni[4][0][1] = ff_hevc_put_hevc_uni_qpel_h12_8_msa;
+c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_8_msa;
+c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_8_msa;
+c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_msa;
+c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_msa;
+c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_msa;
+
+c->put_hevc_qpel_uni[1][1][0] = ff_hevc_put_hevc_uni_qpel_v4_8_msa;
+c->put_hevc_qpel_uni[3][1][0] = ff_hevc_put_hevc_uni_qpel_v8_8_msa;
+c->put_hevc_qpel_uni[4][1][0] = ff_hevc_put_hevc_uni_qpel_v12_8_msa;
+c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_8_msa;
+c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_8_msa;
+c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_msa;
+c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_msa;
+c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_msa;
 }
 }
 #endif  // #if HAVE_MSA
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index 4f7f273..76a6784 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -67,3 +67,43 @@ MC(qpel, hv, 48);
 MC(qpel, hv, 64);
 
 #undef MC
+
+#define UNI_MC(PEL, DIR, WIDTH)
\
+void ff_hevc_put_hevc_uni_##PEL##_##DIRWIDTH##_8_msa(uint8_t *dst, 
\
+ ptrdiff_t dst_stride, 
\
+ uint8_t *src, 
\
+ ptrdiff_t src_stride, 
\
+ int height,   
\
+ intptr_t mx,  
\
+ intptr_t my,  
\
+ int width)
+
+UNI_MC(pel, pixels, 4);
+UNI_MC(pel, pixels, 6);
+UNI_MC(pel, pixels, 8);
+UNI_MC(pel, pixels, 12);
+UNI_MC(pel, pixels, 16);
+UNI_MC(pel, pixels, 24);
+UNI_MC(pel, pixels, 32);
+UNI_MC(pel, pixels, 48);
+UNI_MC(pel, pixels, 64);
+
+UNI_MC(qpel, h, 4);
+UNI_MC(qpel, h, 8);
+UNI_MC(qpel, h, 12);
+UNI_MC(qpel, h, 16);
+UNI_MC(qpel, h, 24);
+UNI_MC(qpel, h, 32);
+UNI_MC(qpel, h, 48);
+UNI_MC(qpel, h, 64);
+
+UNI_MC(qpel, v, 4);
+UNI_MC(qpel, v, 8);
+UNI_MC(qpel, v, 12);
+UNI_MC(qpel, v, 16);
+UNI_MC(qpel, v, 24);
+UNI_MC(qpel, v, 32);
+UNI_MC(qpel, v, 48);
+UNI_MC(qpel, v, 64);
+
+#undef UNI_MC
diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index fcc344b..d0e6f64 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -2270,6 +2270,1767 @@ static void hevc_hv_8t_64w_msa(uint8_

[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions

2015-04-24 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Wed Apr 22 
14:52:44 2015 +0530| [97f074f134048276db2f2c552e6e6b24fe0a6894] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc 
functions

Incorporated review comment.
Removed "__" from volatile.

Signed-off-by: Shivraj Patil 
Reviewed-by: Nedeljko Babic 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=97f074f134048276db2f2c552e6e6b24fe0a6894
---

 libavcodec/mips/hevcdsp_init_mips.c |   19 +
 libavcodec/mips/hevcdsp_mips.h  |   20 +
 libavcodec/mips/hevcdsp_msa.c   | 1098 +++
 libavutil/mips/generic_macros_msa.h |  133 +
 4 files changed, 1270 insertions(+)

diff --git a/libavcodec/mips/hevcdsp_init_mips.c 
b/libavcodec/mips/hevcdsp_init_mips.c
index 05ed81f..4fec336 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -25,6 +25,16 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
   const int bit_depth)
 {
 if (8 == bit_depth) {
+c->put_hevc_qpel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa;
+c->put_hevc_qpel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa;
+c->put_hevc_qpel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa;
+c->put_hevc_qpel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa;
+c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa;
+c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa;
+c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa;
+c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_msa;
+c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_msa;
+
 c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa;
 c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa;
 c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa;
@@ -42,6 +52,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa;
 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa;
 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa;
+
+c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_msa;
+c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_msa;
+c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_msa;
+c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_msa;
+c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_msa;
+c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa;
+c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa;
+c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa;
 }
 }
 #endif  // #if HAVE_MSA
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index 13cdb5b..4f7f273 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -29,6 +29,16 @@ void ff_hevc_put_hevc_##PEL##_##DIRWIDTH##_8_msa(int16_t 
*dst,  \
  intptr_t my,   \
  int width)
 
+MC(pel, pixels, 4);
+MC(pel, pixels, 6);
+MC(pel, pixels, 8);
+MC(pel, pixels, 12);
+MC(pel, pixels, 16);
+MC(pel, pixels, 24);
+MC(pel, pixels, 32);
+MC(pel, pixels, 48);
+MC(pel, pixels, 64);
+
 MC(qpel, h, 4);
 MC(qpel, h, 8);
 MC(qpel, h, 12);
@@ -46,4 +56,14 @@ MC(qpel, v, 24);
 MC(qpel, v, 32);
 MC(qpel, v, 48);
 MC(qpel, v, 64);
+
+MC(qpel, hv, 4);
+MC(qpel, hv, 8);
+MC(qpel, hv, 12);
+MC(qpel, hv, 16);
+MC(qpel, hv, 24);
+MC(qpel, hv, 32);
+MC(qpel, hv, 48);
+MC(qpel, hv, 64);
+
 #undef MC
diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index 88e97d6..fcc344b 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -21,6 +21,18 @@
 #include "libavutil/mips/generic_macros_msa.h"
 #include "libavcodec/mips/hevcdsp_mips.h"
 
+#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\
+   filt0, filt1, filt2, filt3)\
+( {   \
+v4i32 out;\
+  \
+out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\
+out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1));  \
+out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2));  \
+out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3));  \
+out;  \
+} )
+
 #define HEVC_FILT_8TAP_DPADD_H(vec0, v

[FFmpeg-cvslog] Makefile: Add support for MSA (MIPS-SIMD-Arch)

2015-04-17 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Fri Apr 17 
19:00:27 2015 +0530| [35a7170e69b31f0299a85261bf48babdc5d4883f] | committer: 
Michael Niedermayer

Makefile: Add support for MSA (MIPS-SIMD-Arch)

Signed-off-by: Shivraj Patil 
Reviewed-by: Nedeljko Babic 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=35a7170e69b31f0299a85261bf48babdc5d4883f
---

 Makefile |2 +-
 arch.mak |1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ca2ce59..fe0e02f 100644
--- a/Makefile
+++ b/Makefile
@@ -80,7 +80,7 @@ SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS 
TOOLS  \
HEADERS ARCH_HEADERS BUILT_HEADERS SKIPHEADERS\
ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \
ALTIVEC-OBJS MMX-OBJS YASM-OBJS   \
-   MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSPR1-OBJS\
+   MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSPR1-OBJS MSA-OBJS   \
OBJS SLIBOBJS HOSTOBJS TESTOBJS
 
 define RESET
diff --git a/arch.mak b/arch.mak
index 48bc2d3..39f2917 100644
--- a/arch.mak
+++ b/arch.mak
@@ -7,6 +7,7 @@ OBJS-$(HAVE_NEON)+= $(NEON-OBJS)$(NEON-OBJS-yes)
 OBJS-$(HAVE_MIPSFPU)   += $(MIPSFPU-OBJS)$(MIPSFPU-OBJS-yes)
 OBJS-$(HAVE_MIPSDSPR1) += $(MIPSDSPR1-OBJS)  $(MIPSDSPR1-OBJS-yes)
 OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS)  $(MIPSDSPR2-OBJS-yes)
+OBJS-$(HAVE_MSA)   += $(MSA-OBJS)$(MSA-OBJS-yes)
 
 OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog


[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC horizontal and vertical mc functions

2015-04-17 Thread Shivraj Patil
ffmpeg | branch: master | Shivraj Patil  | Fri Apr 17 
19:00:28 2015 +0530| [4efc0e6451fa77e1e1d5b4b3873917c1916765f1] | committer: 
Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC horizontal and 
vertical mc functions

Signed-off-by: Shivraj Patil 
Reviewed-by: Nedeljko Babic 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4efc0e6451fa77e1e1d5b4b3873917c1916765f1
---

 libavcodec/hevcdsp.c|2 +
 libavcodec/hevcdsp.h|1 +
 libavcodec/mips/Makefile|2 +
 libavcodec/mips/hevcdsp_init_mips.c |   54 ++
 libavcodec/mips/hevcdsp_mips.h  |   49 ++
 libavcodec/mips/hevcdsp_msa.c   | 1259 +++
 libavutil/mips/generic_macros_msa.h |  285 
 7 files changed, 1652 insertions(+)

diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 04af178..be01e92 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -261,4 +261,6 @@ int i = 0;
 ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
 if (ARCH_ARM)
 ff_hevcdsp_init_arm(hevcdsp, bit_depth);
+if (ARCH_MIPS)
+ff_hevc_dsp_init_mips(hevcdsp, bit_depth);
 }
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index a891ea7..d2ea867 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -129,4 +129,5 @@ extern const int8_t ff_hevc_qpel_filters[3][16];
 
 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
 void ff_hevcdsp_init_arm(HEVCDSPContext *c, const int bit_depth);
+void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth);
 #endif /* AVCODEC_HEVCDSP_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 8e2459f..b41d8c7 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -18,3 +18,5 @@ OBJS-$(CONFIG_AAC_DECODER)+= 
mips/aacdec_mips.o\
  mips/aacpsdsp_mips.o
 MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)  += mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o
+OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_init_mips.o
+MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o
diff --git a/libavcodec/mips/hevcdsp_init_mips.c 
b/libavcodec/mips/hevcdsp_init_mips.c
new file mode 100644
index 000..05ed81f
--- /dev/null
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/mips/hevcdsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
+  const int bit_depth)
+{
+if (8 == bit_depth) {
+c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa;
+c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa;
+c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa;
+c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_msa;
+c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_8_msa;
+c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_msa;
+c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_msa;
+c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_msa;
+
+c->put_hevc_qpel[1][1][0] = ff_hevc_put_hevc_qpel_v4_8_msa;
+c->put_hevc_qpel[3][1][0] = ff_hevc_put_hevc_qpel_v8_8_msa;
+c->put_hevc_qpel[4][1][0] = ff_hevc_put_hevc_qpel_v12_8_msa;
+c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_8_msa;
+c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_8_msa;
+c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa;
+c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa;
+c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa;
+}
+}
+#endif  // #if HAVE_MSA
+
+void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth)
+{
+#if HAVE_MSA
+hevc_dsp_init_msa(c, bit_depth);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
new file mode 1006