[FFmpeg-cvslog] Disable MSA optimization for big endian arch
ffmpeg | branch: master | Shivraj Patil | Thu Jun 1 14:25:19 2017 +0530| [6f35c21659f7802a5533dea04b24958502886d7a] | committer: Michael Niedermayer Disable MSA optimization for big endian arch The current upstreamed code has been written and tested for Little Endian systems. We do have plans to add the Big Endian support in near future, but till that time, need to disable all to avoid its usage and failures. Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6f35c21659f7802a5533dea04b24958502886d7a --- configure | 4 1 file changed, 4 insertions(+) diff --git a/configure b/configure index 72060ef0e9..4ec8f21814 100755 --- a/configure +++ b/configure @@ -5392,6 +5392,10 @@ elif enabled mips; then enabled mipsdsp && check_inline_asm_flags mipsdsp '"addu.qb $t0, $t1, $t2"' '-mdsp' enabled mipsdspr2 && check_inline_asm_flags mipsdspr2 '"absq_s.qb $t0, $t1"' '-mdspr2' +if enabled bigendian && enabled msa; then +disable msa +fi + elif enabled parisc; then if enabled gcc; then ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] build fix for mips
ffmpeg | branch: master | Shivraj Patil | Tue Apr 4 19:14:01 2017 +0530| [2a512f86c12d2ed588733c454a12930efbad49f4] | committer: Ronald S. Bultje build fix for mips Signed-off-by: Shivraj Patil Signed-off-by: Ronald S. Bultje > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2a512f86c12d2ed588733c454a12930efbad49f4 --- libavcodec/mips/hevcpred_init_mips.c | 3 ++- libavcodec/mips/hevcpred_msa.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libavcodec/mips/hevcpred_init_mips.c b/libavcodec/mips/hevcpred_init_mips.c index 331cfac..e987698 100644 --- a/libavcodec/mips/hevcpred_init_mips.c +++ b/libavcodec/mips/hevcpred_init_mips.c @@ -18,7 +18,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavcodec/hevc.h" +#include "config.h" +#include "libavutil/attributes.h" #include "libavcodec/mips/hevcpred_mips.h" #if HAVE_MSA diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c index 6a3b281..963c64c 100644 --- a/libavcodec/mips/hevcpred_msa.c +++ b/libavcodec/mips/hevcpred_msa.c @@ -18,7 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavcodec/hevc.h" +#include "libavcodec/hevcdec.h" #include "libavutil/mips/generic_macros_msa.h" #include "hevcpred_mips.h" ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6
ffmpeg | branch: release/2.8 | Shivraj Patil | Wed Oct 5 18:10:24 2016 +0530| [b9ec80322b5b6dc22747e6bc9d463d2bcc683c6e] | committer: Michael Niedermayer avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6 Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer (cherry picked from commit c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa) Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b9ec80322b5b6dc22747e6bc9d463d2bcc683c6e --- libavutil/mips/generic_macros_msa.h | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index b1d18dd..0a59619 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -85,12 +85,12 @@ #else // !(__mips == 64) #define LD(psrc) \ ( { \ -uint8_t *psrc_m = (uint8_t *) (psrc); \ +uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ -val0_m = LW(psrc_m); \ -val1_m = LW(psrc_m + 4); \ +val0_m = LW(psrc_ld_m); \ +val1_m = LW(psrc_ld_m + 4); \ \ val_m = (uint64_t) (val1_m); \ val_m = (uint64_t) ((val_m << 32) & 0x); \ @@ -172,12 +172,12 @@ #else // !(__mips == 64) #define LD(psrc) \ ( { \ -uint8_t *psrc_m1 = (uint8_t *) (psrc);\ +uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ -val0_m = LW(psrc_m1); \ -val1_m = LW(psrc_m1 + 4); \ +val0_m = LW(psrc_ld_m); \ +val1_m = LW(psrc_ld_m + 4); \ \ val_m = (uint64_t) (val1_m); \ val_m = (uint64_t) ((val_m << 32) & 0x); \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6
ffmpeg | branch: release/3.0 | Shivraj Patil | Wed Oct 5 18:10:24 2016 +0530| [ef5b120e2dd6ef83b41f31317890ad947955d916] | committer: Michael Niedermayer avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6 Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer (cherry picked from commit c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa) Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ef5b120e2dd6ef83b41f31317890ad947955d916 --- libavutil/mips/generic_macros_msa.h | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index b1d18dd..0a59619 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -85,12 +85,12 @@ #else // !(__mips == 64) #define LD(psrc) \ ( { \ -uint8_t *psrc_m = (uint8_t *) (psrc); \ +uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ -val0_m = LW(psrc_m); \ -val1_m = LW(psrc_m + 4); \ +val0_m = LW(psrc_ld_m); \ +val1_m = LW(psrc_ld_m + 4); \ \ val_m = (uint64_t) (val1_m); \ val_m = (uint64_t) ((val_m << 32) & 0x); \ @@ -172,12 +172,12 @@ #else // !(__mips == 64) #define LD(psrc) \ ( { \ -uint8_t *psrc_m1 = (uint8_t *) (psrc);\ +uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ -val0_m = LW(psrc_m1); \ -val1_m = LW(psrc_m1 + 4); \ +val0_m = LW(psrc_ld_m); \ +val1_m = LW(psrc_ld_m + 4); \ \ val_m = (uint64_t) (val1_m); \ val_m = (uint64_t) ((val_m << 32) & 0x); \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Support for MIPS cpu P6600
ffmpeg | branch: release/3.0 | Shivraj Patil | Fri Aug 5 14:00:44 2016 +0530| [c993a11e56abcc8a6199ebed894394acd37f3c0c] | committer: Michael Niedermayer Support for MIPS cpu P6600 Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer (cherry picked from commit 6803a298f4338c19c3032d2417c6e857eb6d95be) Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c993a11e56abcc8a6199ebed894394acd37f3c0c --- configure | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/configure b/configure index c9c139e..1f36d37 100755 --- a/configure +++ b/configure @@ -4189,7 +4189,7 @@ elif enabled mips; then enable mips32r2 disable msa ;; -p5600|i6400) +p5600|i6400|p6600) disable mipsdsp disable mipsdspr2 ;; @@ -4254,6 +4254,10 @@ elif enabled mips; then enable mips64r6 check_cflags "-mtune=i6400 -mabi=64" && check_cflags "-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64" ;; +p6600) +enable mips64r6 +check_cflags "-mtune=p6600 -mabi=64" && check_cflags "-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64" +;; esac else # We do not disable anything. Is up to the user to disable the unwanted features. ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6
ffmpeg | branch: release/3.1 | Shivraj Patil | Wed Oct 5 18:10:24 2016 +0530| [d89979e86b322210862987ebf1473fbd7cdc4c45] | committer: Michael Niedermayer avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6 Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer (cherry picked from commit c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa) Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d89979e86b322210862987ebf1473fbd7cdc4c45 --- libavutil/mips/generic_macros_msa.h | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index b1d18dd..0a59619 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -85,12 +85,12 @@ #else // !(__mips == 64) #define LD(psrc) \ ( { \ -uint8_t *psrc_m = (uint8_t *) (psrc); \ +uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ -val0_m = LW(psrc_m); \ -val1_m = LW(psrc_m + 4); \ +val0_m = LW(psrc_ld_m); \ +val1_m = LW(psrc_ld_m + 4); \ \ val_m = (uint64_t) (val1_m); \ val_m = (uint64_t) ((val_m << 32) & 0x); \ @@ -172,12 +172,12 @@ #else // !(__mips == 64) #define LD(psrc) \ ( { \ -uint8_t *psrc_m1 = (uint8_t *) (psrc);\ +uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ -val0_m = LW(psrc_m1); \ -val1_m = LW(psrc_m1 + 4); \ +val0_m = LW(psrc_ld_m); \ +val1_m = LW(psrc_ld_m + 4); \ \ val_m = (uint64_t) (val1_m); \ val_m = (uint64_t) ((val_m << 32) & 0x); \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Support for MIPS cpu P6600
ffmpeg | branch: release/3.1 | Shivraj Patil | Fri Aug 5 14:00:44 2016 +0530| [d2566b124af849d28eaedcba60c3a7ac280070ab] | committer: Michael Niedermayer Support for MIPS cpu P6600 Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer (cherry picked from commit 6803a298f4338c19c3032d2417c6e857eb6d95be) Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d2566b124af849d28eaedcba60c3a7ac280070ab --- configure | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 5b069eb..64142e7 100755 --- a/configure +++ b/configure @@ -4336,7 +4336,7 @@ elif enabled mips; then enable mips32r2 disable msa ;; -p5600|i6400) +p5600|i6400|p6600) disable mipsdsp disable mipsdspr2 ;; @@ -4401,6 +4401,10 @@ elif enabled mips; then enable mips64r6 check_cflags "-mtune=i6400 -mabi=64" && check_cflags "-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64" ;; +p6600) +enable mips64r6 +check_cflags "-mtune=p6600 -mabi=64" && check_cflags "-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64" +;; esac else # We do not disable anything. Is up to the user to disable the unwanted features. ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6
ffmpeg | branch: master | Shivraj Patil | Wed Oct 5 18:10:24 2016 +0530| [c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa] | committer: Michael Niedermayer avutil/mips/generic_macros_msa: rename macro variable which causes segfault for mips r6 Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c1cc13cd2a9b8d6d2810ec42454f328a1a0d5efa --- libavutil/mips/generic_macros_msa.h | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index b1d18dd..0a59619 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -85,12 +85,12 @@ #else // !(__mips == 64) #define LD(psrc) \ ( { \ -uint8_t *psrc_m = (uint8_t *) (psrc); \ +uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ -val0_m = LW(psrc_m); \ -val1_m = LW(psrc_m + 4); \ +val0_m = LW(psrc_ld_m); \ +val1_m = LW(psrc_ld_m + 4); \ \ val_m = (uint64_t) (val1_m); \ val_m = (uint64_t) ((val_m << 32) & 0x); \ @@ -172,12 +172,12 @@ #else // !(__mips == 64) #define LD(psrc) \ ( { \ -uint8_t *psrc_m1 = (uint8_t *) (psrc);\ +uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ -val0_m = LW(psrc_m1); \ -val1_m = LW(psrc_m1 + 4); \ +val0_m = LW(psrc_ld_m); \ +val1_m = LW(psrc_ld_m + 4); \ \ val_m = (uint64_t) (val1_m); \ val_m = (uint64_t) ((val_m << 32) & 0x); \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] Support for MIPS cpu P6600
ffmpeg | branch: master | Shivraj Patil | Fri Aug 5 14:00:44 2016 +0530| [6803a298f4338c19c3032d2417c6e857eb6d95be] | committer: Michael Niedermayer Support for MIPS cpu P6600 Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6803a298f4338c19c3032d2417c6e857eb6d95be --- configure | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 9f5b31f..8e30c68 100755 --- a/configure +++ b/configure @@ -4346,7 +4346,7 @@ elif enabled mips; then enable mips32r2 disable msa ;; -p5600|i6400) +p5600|i6400|p6600) disable mipsdsp disable mipsdspr2 ;; @@ -4411,6 +4411,10 @@ elif enabled mips; then enable mips64r6 check_cflags "-mtune=i6400 -mabi=64" && check_cflags "-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64" ;; +p6600) +enable mips64r6 +check_cflags "-mtune=p6600 -mabi=64" && check_cflags "-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64" +;; esac else # We do not disable anything. Is up to the user to disable the unwanted features. ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] configure: build fix for P5600 with mips code restructuring
ffmpeg | branch: release/3.0 | Shivraj Patil | Tue Apr 26 12:35:15 2016 +0530| [83eaaae0057fc471a621a2c1bf1e95e4ab27484f] | committer: Michael Niedermayer configure: build fix for P5600 with mips code restructuring Note:- backporting commit 15ef98afd10b3696d29fb6d19606ba03a9dd47ad from head Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=83eaaae0057fc471a621a2c1bf1e95e4ab27484f --- configure | 252 + 1 file changed, 120 insertions(+), 132 deletions(-) diff --git a/configure b/configure index 475c087..9103e85 100755 --- a/configure +++ b/configure @@ -913,6 +913,25 @@ void foo(void){ __asm__ volatile($code); } EOF } +check_inline_asm_flags(){ +log check_inline_asm_flags "$@" +name="$1" +code="$2" +flags='' +shift 2 +while [ "$1" != "" ]; do + append flags $1 + shift +done; +disable $name +cat > $TMPC <http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] configure: build fix for P5600 along option --disable-msa
ffmpeg | branch: master | Shivraj Patil | Fri Mar 11 14:04:42 2016 +0530| [15ef98afd10b3696d29fb6d19606ba03a9dd47ad] | committer: Michael Niedermayer configure: build fix for P5600 along option --disable-msa Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=15ef98afd10b3696d29fb6d19606ba03a9dd47ad --- configure |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 1516b01..1b189328 100755 --- a/configure +++ b/configure @@ -5118,7 +5118,8 @@ elif enabled mips; then fi enabled mipsfpu && check_inline_asm_flags mipsfpu '"cvt.d.l $f0, $f2"' '-mhard-float' -enabled mipsfpu && enabled msa && check_inline_asm_flags msa '"addvi.b $w0, $w1, 1"' '-mfp64 -mmsa' && check_header msa.h || disable msa +enabled mipsfpu && (enabled mips32r5 || enabled mips32r6 || enabled mips64r6) && check_inline_asm_flags mipsfpu '"cvt.d.l $f0, $f1"' '-mfp64' +enabled mipsfpu && enabled msa && check_inline_asm_flags msa '"addvi.b $w0, $w1, 1"' '-mmsa' && check_header msa.h || disable msa enabled mipsdsp && check_inline_asm_flags mipsdsp '"addu.qb $t0, $t1, $t2"' '-mdsp' enabled mipsdspr2 && check_inline_asm_flags mipsdspr2 '"absq_s.qb $t0, $t1"' '-mdspr2' ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] configure: add check_inline_asm_flags()
ffmpeg | branch: master | Shivraj Patil | Mon Mar 7 20:07:10 2016 +0530| [b59d06d5f4155c27d6c3aa14be8379723aaf0a08] | committer: Michael Niedermayer configure: add check_inline_asm_flags() Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b59d06d5f4155c27d6c3aa14be8379723aaf0a08 --- configure | 19 +++ 1 file changed, 19 insertions(+) diff --git a/configure b/configure index 3299b1b..c895b50 100755 --- a/configure +++ b/configure @@ -918,6 +918,25 @@ void foo(void){ __asm__ volatile($code); } EOF } +check_inline_asm_flags(){ +log check_inline_asm_flags "$@" +name="$1" +code="$2" +flags='' +shift 2 +while [ "$1" != "" ]; do + append flags $1 + shift +done; +disable $name +cat > $TMPC <http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] configure: build fix for P5600 with mips code restructuring
ffmpeg | branch: master | Shivraj Patil | Mon Mar 7 20:07:10 2016 +0530| [8ca2c872b650182958f733db8b1d99c061dd3bf9] | committer: Michael Niedermayer configure: build fix for P5600 with mips code restructuring Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8ca2c872b650182958f733db8b1d99c061dd3bf9 --- configure | 293 + 1 file changed, 98 insertions(+), 195 deletions(-) diff --git a/configure b/configure index c895b50..81ec105 100755 --- a/configure +++ b/configure @@ -1685,6 +1685,7 @@ ARCH_EXT_LIST_ARM=" ARCH_EXT_LIST_MIPS=" mipsfpu mips32r2 +mips32r5 mips64r2 mips32r6 mips64r6 @@ -2183,10 +2184,11 @@ mipsfpu_deps="mips" mipsdsp_deps="mips" mipsdspr2_deps="mips" mips32r2_deps="mips" +mips32r5_deps="mips" mips32r6_deps="mips" mips64r2_deps="mips" mips64r6_deps="mips" -msa_deps="mips" +msa_deps="mipsfpu" mmi_deps="mips" altivec_deps="ppc" @@ -4208,118 +4210,90 @@ elif enabled mips; then cpuflags="-march=$cpu" -case $cpu in -24kc) -disable mips32r6 -disable mips64r2 -disable mips64r6 -disable mipsfpu -disable mipsdsp -disable mipsdspr2 -disable msa -;; -24kf*) -disable mips32r6 -disable mips64r2 -disable mips64r6 -disable mipsdsp -disable mipsdspr2 -disable msa -;; -24kec|34kc|1004kc) -disable mips32r6 -disable mips64r2 -disable mips64r6 -disable mipsfpu -disable mipsdspr2 -disable msa -;; -24kef*|34kf*|1004kf*) -disable mips32r6 -disable mips64r2 -disable mips64r6 -disable mipsdspr2 -disable msa -;; -74kc) -disable mips32r6 -disable mips64r2 -disable mips64r6 -disable mipsfpu -disable msa -;; -74kf) -disable mips32r6 -disable mips64r2 -disable mips64r6 -disable msa -;; -p5600) -disable mips32r6 -disable mips64r2 -disable mips64r6 -disable mipsdsp -disable mipsdspr2 -check_cflags "-mtune=p5600" && -check_cflags "-mfp64 -msched-weight -mload-store-pairs -funroll-loops" && -add_asflags "-mfp64" -;; -i6400) -disable mips32r2 -disable mips32r6 -disable mips64r2 -disable mipsdsp -disable mipsdspr2 -check_cflags "-mtune=i6400 -mabi=64" && -check_cflags "-mfp64 -msched-weight -mload-store-pairs -funroll-loops" && -check_ldflags "-mabi=64" && -add_asflags "-mfp64" -;; -loongson*) -disable mips32r2 -disable mips32r6 -disable mips64r2 -disable mips64r6 -disable mipsfpu -disable mipsdsp -disable mipsdspr2 -disable msa -enable local_aligned_8 local_aligned_16 local_aligned_32 -enable simd_align_16 -enable fast_64bit -enable fast_clz -enable fast_cmov -enable fast_unaligned -disable aligned_stack -case $cpu in -loongson3*) -cpuflags="-march=loongson3a -mhard-float -fno-expensive-optimizations" -;; -loongson2e) -cpuflags="-march=loongson2e -mhard-float -fno-expensive-optimizations" -;; -loongson2f) -cpuflags="-march=loongson2f -mhard-float -fno-expensive-optimizations" -;; -esac -;; -generic) -# We do not disable anything. Is up to the user to disable -# the unwanted features. -;; -*) -# Unknown CPU. Disable everything. -warn "unknown CPU. Disabling all MIPS optimizations." -disable mipsfpu -disable mips32r2 -disable mips32r6 -disable mips64r2 -disable mips64r6 -disable mipsdsp -disable mipsdspr2 -disable msa -;; -esac +if [ "$cpu" != "generic" ]; then +disable mips32r2 +disable mips32r5 +disable mips64r2 +disable mips32r6 +
[FFmpeg-cvslog] avcodec/mips: build fix for MSA 64bit
ffmpeg | branch: release/2.8 | Shivraj Patil | Thu Oct 8 15:05:52 2015 +0530| [a931ad554d0d7a337f3ac3340622f189556885fc] | committer: Michael Niedermayer avcodec/mips: build fix for MSA 64bit Modified datatype of function argument (pitch from int32_t to ptrdiff_t). Signed-off-by: Shivraj Patil Commit in master: 322e960dbf32b846b26f95afa6c0e652bc04e90d Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a931ad554d0d7a337f3ac3340622f189556885fc --- libavcodec/mips/vp9_lpf_msa.c | 42 - 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/libavcodec/mips/vp9_lpf_msa.c b/libavcodec/mips/vp9_lpf_msa.c index 63e538e..eef8afc 100644 --- a/libavcodec/mips/vp9_lpf_msa.c +++ b/libavcodec/mips/vp9_lpf_msa.c @@ -259,7 +259,7 @@ mask_out = __msa_xori_b(mask_out, 0xff); \ } -void ff_loop_filter_v_4_8_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_4_8_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -288,7 +288,7 @@ void ff_loop_filter_v_4_8_msa(uint8_t *src, int32_t pitch, } -void ff_loop_filter_v_44_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_44_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -318,7 +318,7 @@ void ff_loop_filter_v_44_16_msa(uint8_t *src, int32_t pitch, ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch); } -void ff_loop_filter_v_8_8_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_8_8_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -392,7 +392,7 @@ void ff_loop_filter_v_8_8_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_v_88_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_88_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -471,7 +471,7 @@ void ff_loop_filter_v_88_16_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_v_84_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_84_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -542,7 +542,7 @@ void ff_loop_filter_v_84_16_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_v_48_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_48_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -614,7 +614,7 @@ void ff_loop_filter_v_48_16_msa(uint8_t *src, int32_t pitch, } } -static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, +static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, ptrdiff_t pitch, uint8_t *filter48, int32_t b_limit_ptr, int32_t limit_ptr, @@ -689,7 +689,7 @@ static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, } } -static void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) +static void vp9_hz_lpf_t16_16w(uint8_t *src, ptrdiff_t pitch, uint8_t *filter48) { v16u8 flat, flat2, filter8; v16i8 zero = { 0 }; @@ -1021,7 +1021,7 @@ static void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) } } -void ff_loop_filter_v_16_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_16_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -1037,7 +1037,7 @@ void ff_loop_filter_v_16_16_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_v_16_8_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_16_8_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -1261,7 +1261,7 @@ void ff_loop_filter_v_16_8_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_h_4_8_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_h_4_8_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_
[FFmpeg-cvslog] avcodec/mips: build fix for MSA
ffmpeg | branch: release/2.8 | Shivraj Patil | Thu Oct 8 14:21:22 2015 +0530| [7236080d2721dff24f9716a6cce91bbacb32581f] | committer: Michael Niedermayer avcodec/mips: build fix for MSA Modified sps and pps access from old HEVCContext(s) structure to newly introduced HEVCParamSets(ps). Signed-off-by: Shivraj Patil Commit in master: b0732b0214a40cdbcaf49d72cc6f25a7e9e5f115 Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7236080d2721dff24f9716a6cce91bbacb32581f --- libavcodec/mips/hevcpred_msa.c | 282 1 file changed, 141 insertions(+), 141 deletions(-) diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c index 5d9299f..6a3b281 100644 --- a/libavcodec/mips/hevcpred_msa.c +++ b/libavcodec/mips/hevcpred_msa.c @@ -1915,24 +1915,24 @@ void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx) v16u8 vec0; HEVCLocalContext *lc = s->HEVClc; int i; -int hshift = s->sps->hshift[c_idx]; -int vshift = s->sps->vshift[c_idx]; +int hshift = s->ps.sps->hshift[c_idx]; +int vshift = s->ps.sps->vshift[c_idx]; int size_in_luma_h = 16 << hshift; -int size_in_tbs_h = size_in_luma_h >> s->sps->log2_min_tb_size; +int size_in_tbs_h = size_in_luma_h >> s->ps.sps->log2_min_tb_size; int size_in_luma_v = 16 << vshift; -int size_in_tbs_v = size_in_luma_v >> s->sps->log2_min_tb_size; +int size_in_tbs_v = size_in_luma_v >> s->ps.sps->log2_min_tb_size; int x = x0 >> hshift; int y = y0 >> vshift; -int x_tb = (x0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask; -int y_tb = (y0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask; +int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask; +int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask; int cur_tb_addr = -s->pps->min_tb_addr_zs[(y_tb) * (s->sps->tb_mask + 2) + (x_tb)]; +s->ps.pps->min_tb_addr_zs[(y_tb) * (s->ps.sps->tb_mask + 2) + (x_tb)]; ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(uint8_t); uint8_t *src = (uint8_t *) s->frame->data[c_idx] + x + y * stride; -int min_pu_width = s->sps->min_pu_width; +int min_pu_width = s->ps.sps->min_pu_width; enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c : lc->tu.intra_pred_mode; @@ -1948,41 +1948,41 @@ void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx) uint8_t *filtered_top = filtered_top_array + 1; int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > -s->pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & s->sps->tb_mask) * - (s->sps->tb_mask + 2) + (x_tb - 1)]; +s->ps.pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & s->ps.sps->tb_mask) * + (s->ps.sps->tb_mask + 2) + (x_tb - 1)]; int cand_left = lc->na.cand_left; int cand_up_left = lc->na.cand_up_left; int cand_up = lc->na.cand_up; int cand_up_right = lc->na.cand_up_right && cur_tb_addr > -s->pps->min_tb_addr_zs[(y_tb - 1) * (s->sps->tb_mask + 2) + - ((x_tb + size_in_tbs_h) & s->sps->tb_mask)]; +s->ps.pps->min_tb_addr_zs[(y_tb - 1) * (s->ps.sps->tb_mask + 2) + + ((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask)]; int bottom_left_size = (((y0 + 2 * size_in_luma_v) > - (s->sps->height) ? (s->sps->height) : (y0 + + (s->ps.sps->height) ? (s->ps.sps->height) : (y0 + 2 * size_in_luma_v)) - (y0 + size_in_luma_v)) >> vshift; int top_right_size = (((x0 + 2 * size_in_luma_h) > - (s->sps->width) ? (s->sps->width) : (x0 + 2 * size_in_luma_h)) - + (s->ps.sps->width) ? (s->ps.sps->width) : (x0 + 2 * size_in_luma_h)) - (x0 + size_in_luma_h)) >> hshift; -if (s->pps->constrained_intra_pred_flag == 1) { -int size_in_luma_pu_v = ((size_in_luma_v) >> s->sps->log2_min_pu_size); -int size_in_luma_pu_h = ((size_in_luma_h) >> s->sps->log2_min_pu_size); -int on_pu_edge_x = !(x0 & ((1 << s->sps->log2_min_pu_size) - 1)); -int on_pu_edge_y = !(y0 & ((1 << s->sps->log2_min_pu_size) - 1)); +if (s->ps.pps->constrained_intra_pred_flag == 1) { +int size_in_luma_pu_v = ((size_in_luma_v) >> s-&
[FFmpeg-cvslog] avcodec/mips: build fix for MSA 64bit
ffmpeg | branch: master | Shivraj Patil | Wed Oct 7 18:39:59 2015 +0530| [322e960dbf32b846b26f95afa6c0e652bc04e90d] | committer: Michael Niedermayer avcodec/mips: build fix for MSA 64bit Modified datatype of function argument (pitch from int32_t to ptrdiff_t) Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=322e960dbf32b846b26f95afa6c0e652bc04e90d --- libavcodec/mips/vp9_lpf_msa.c | 42 - 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/libavcodec/mips/vp9_lpf_msa.c b/libavcodec/mips/vp9_lpf_msa.c index 63e538e..eef8afc 100644 --- a/libavcodec/mips/vp9_lpf_msa.c +++ b/libavcodec/mips/vp9_lpf_msa.c @@ -259,7 +259,7 @@ mask_out = __msa_xori_b(mask_out, 0xff); \ } -void ff_loop_filter_v_4_8_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_4_8_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -288,7 +288,7 @@ void ff_loop_filter_v_4_8_msa(uint8_t *src, int32_t pitch, } -void ff_loop_filter_v_44_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_44_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -318,7 +318,7 @@ void ff_loop_filter_v_44_16_msa(uint8_t *src, int32_t pitch, ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch); } -void ff_loop_filter_v_8_8_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_8_8_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -392,7 +392,7 @@ void ff_loop_filter_v_8_8_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_v_88_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_88_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -471,7 +471,7 @@ void ff_loop_filter_v_88_16_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_v_84_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_84_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -542,7 +542,7 @@ void ff_loop_filter_v_84_16_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_v_48_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_48_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -614,7 +614,7 @@ void ff_loop_filter_v_48_16_msa(uint8_t *src, int32_t pitch, } } -static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, +static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, ptrdiff_t pitch, uint8_t *filter48, int32_t b_limit_ptr, int32_t limit_ptr, @@ -689,7 +689,7 @@ static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, } } -static void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) +static void vp9_hz_lpf_t16_16w(uint8_t *src, ptrdiff_t pitch, uint8_t *filter48) { v16u8 flat, flat2, filter8; v16i8 zero = { 0 }; @@ -1021,7 +1021,7 @@ static void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) } } -void ff_loop_filter_v_16_16_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_16_16_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -1037,7 +1037,7 @@ void ff_loop_filter_v_16_16_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_v_16_8_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_v_16_8_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -1261,7 +1261,7 @@ void ff_loop_filter_v_16_8_msa(uint8_t *src, int32_t pitch, } } -void ff_loop_filter_h_4_8_msa(uint8_t *src, int32_t pitch, +void ff_loop_filter_h_4_8_msa(uint8_t *src, ptrdiff_t pitch, int32_t b_limit_ptr, int32_t limit_ptr, int32_t thresh_ptr) @@ -1290,7 +1290,7 @@ void ff_loop_filter_h_4_8_msa(uint8_t *
[FFmpeg-cvslog] avcodec/mips: build fix for MSA
ffmpeg | branch: master | Shivraj Patil | Wed Oct 7 18:38:53 2015 +0530| [b0732b0214a40cdbcaf49d72cc6f25a7e9e5f115] | committer: Michael Niedermayer avcodec/mips: build fix for MSA Modified sps and pps access from old HEVCContext(s) structure to newly introduced HEVCParamSets(ps) Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b0732b0214a40cdbcaf49d72cc6f25a7e9e5f115 --- libavcodec/mips/hevcpred_msa.c | 282 1 file changed, 141 insertions(+), 141 deletions(-) diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c index 5d9299f..6a3b281 100644 --- a/libavcodec/mips/hevcpred_msa.c +++ b/libavcodec/mips/hevcpred_msa.c @@ -1915,24 +1915,24 @@ void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx) v16u8 vec0; HEVCLocalContext *lc = s->HEVClc; int i; -int hshift = s->sps->hshift[c_idx]; -int vshift = s->sps->vshift[c_idx]; +int hshift = s->ps.sps->hshift[c_idx]; +int vshift = s->ps.sps->vshift[c_idx]; int size_in_luma_h = 16 << hshift; -int size_in_tbs_h = size_in_luma_h >> s->sps->log2_min_tb_size; +int size_in_tbs_h = size_in_luma_h >> s->ps.sps->log2_min_tb_size; int size_in_luma_v = 16 << vshift; -int size_in_tbs_v = size_in_luma_v >> s->sps->log2_min_tb_size; +int size_in_tbs_v = size_in_luma_v >> s->ps.sps->log2_min_tb_size; int x = x0 >> hshift; int y = y0 >> vshift; -int x_tb = (x0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask; -int y_tb = (y0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask; +int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask; +int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask; int cur_tb_addr = -s->pps->min_tb_addr_zs[(y_tb) * (s->sps->tb_mask + 2) + (x_tb)]; +s->ps.pps->min_tb_addr_zs[(y_tb) * (s->ps.sps->tb_mask + 2) + (x_tb)]; ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(uint8_t); uint8_t *src = (uint8_t *) s->frame->data[c_idx] + x + y * stride; -int min_pu_width = s->sps->min_pu_width; +int min_pu_width = s->ps.sps->min_pu_width; enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c : lc->tu.intra_pred_mode; @@ -1948,41 +1948,41 @@ void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx) uint8_t *filtered_top = filtered_top_array + 1; int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > -s->pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & s->sps->tb_mask) * - (s->sps->tb_mask + 2) + (x_tb - 1)]; +s->ps.pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & s->ps.sps->tb_mask) * + (s->ps.sps->tb_mask + 2) + (x_tb - 1)]; int cand_left = lc->na.cand_left; int cand_up_left = lc->na.cand_up_left; int cand_up = lc->na.cand_up; int cand_up_right = lc->na.cand_up_right && cur_tb_addr > -s->pps->min_tb_addr_zs[(y_tb - 1) * (s->sps->tb_mask + 2) + - ((x_tb + size_in_tbs_h) & s->sps->tb_mask)]; +s->ps.pps->min_tb_addr_zs[(y_tb - 1) * (s->ps.sps->tb_mask + 2) + + ((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask)]; int bottom_left_size = (((y0 + 2 * size_in_luma_v) > - (s->sps->height) ? (s->sps->height) : (y0 + + (s->ps.sps->height) ? (s->ps.sps->height) : (y0 + 2 * size_in_luma_v)) - (y0 + size_in_luma_v)) >> vshift; int top_right_size = (((x0 + 2 * size_in_luma_h) > - (s->sps->width) ? (s->sps->width) : (x0 + 2 * size_in_luma_h)) - + (s->ps.sps->width) ? (s->ps.sps->width) : (x0 + 2 * size_in_luma_h)) - (x0 + size_in_luma_h)) >> hshift; -if (s->pps->constrained_intra_pred_flag == 1) { -int size_in_luma_pu_v = ((size_in_luma_v) >> s->sps->log2_min_pu_size); -int size_in_luma_pu_h = ((size_in_luma_h) >> s->sps->log2_min_pu_size); -int on_pu_edge_x = !(x0 & ((1 << s->sps->log2_min_pu_size) - 1)); -int on_pu_edge_y = !(y0 & ((1 << s->sps->log2_min_pu_size) - 1)); +if (s->ps.pps->constrained_intra_pred_flag == 1) { +int size_in_luma_pu_v = ((size_in_luma_v) >> s->ps.sps->log2_min_pu_size); +int size_in_luma_pu_h = ((size_
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 bilinear functions
ffmpeg | branch: master | Shivraj Patil | Mon Jul 27 17:47:34 2015 +0530| [71aede3ced76a5adb4d8cd44a70dfe2487db882a] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 bilinear functions Signed-off-by: Shivraj Patil Reviewed-by: "Ronald S. Bultje" Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=71aede3ced76a5adb4d8cd44a70dfe2487db882a --- libavcodec/mips/vp9_mc_msa.c | 2123 libavcodec/mips/vp9dsp_init_mips.c |2 + libavcodec/mips/vp9dsp_mips.h | 32 + 3 files changed, 2157 insertions(+) diff --git a/libavcodec/mips/vp9_mc_msa.c b/libavcodec/mips/vp9_mc_msa.c index a1d7798..1671d97 100644 --- a/libavcodec/mips/vp9_mc_msa.c +++ b/libavcodec/mips/vp9_mc_msa.c @@ -31,6 +31,24 @@ static const uint8_t mc_filt_mask_arr[16 * 3] = { 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28 }; +static const int8_t vp9_bilinear_filters_msa[15][2] = { +{120, 8}, +{112, 16}, +{104, 24}, +{96, 32}, +{88, 40}, +{80, 48}, +{72, 56}, +{64, 64}, +{56, 72}, +{48, 80}, +{40, 88}, +{32, 96}, +{24, 104}, +{16, 112}, +{8, 120} +}; + #define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, \ filt0, filt1, filt2, filt3) \ ( { \ @@ -1827,6 +1845,2111 @@ static void common_hv_8ht_8vt_and_aver_dst_64w_msa(const uint8_t *src, } } +static void common_hz_2t_4x4_msa(const uint8_t *src, int32_t src_stride, + uint8_t *dst, int32_t dst_stride, + const int8_t *filter) +{ +v16i8 src0, src1, src2, src3, mask; +v16u8 filt0, vec0, vec1, res0, res1; +v8u16 vec2, vec3, filt; + +mask = LD_SB(&mc_filt_mask_arr[16]); + +/* rearranging filter */ +filt = LD_UH(filter); +filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0); + +LD_SB4(src, src_stride, src0, src1, src2, src3); +VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); +DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3); +SRARI_H2_UH(vec2, vec3, 7); +PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1); +ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); +} + +static void common_hz_2t_4x8_msa(const uint8_t *src, int32_t src_stride, + uint8_t *dst, int32_t dst_stride, + const int8_t *filter) +{ +v16u8 vec0, vec1, vec2, vec3, filt0; +v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; +v16i8 res0, res1, res2, res3; +v8u16 vec4, vec5, vec6, vec7, filt; + +mask = LD_SB(&mc_filt_mask_arr[16]); + +/* rearranging filter */ +filt = LD_UH(filter); +filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0); + +LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); +VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); +VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3); +DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, +vec4, vec5, vec6, vec7); +SRARI_H4_UH(vec4, vec5, vec6, vec7, 7); +PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, +res0, res1, res2, res3); +ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); +dst += (4 * dst_stride); +ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); +} + +void ff_put_bilin_4h_msa(uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *src, ptrdiff_t src_stride, + int height, int mx, int my) +{ +const int8_t *filter = vp9_bilinear_filters_msa[mx - 1]; + +if (4 == height) { +common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter); +} else if (8 == height) { +common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter); +} +} + +static void common_hz_2t_8x4_msa(const uint8_t *src, int32_t src_stride, + uint8_t *dst, int32_t dst_stride, + const int8_t *filter) +{ +v16u8 filt0; +v16i8 src0, src1, src2, src3, mask; +v8u16 vec0, vec1, vec2, vec3, filt; + +mask = LD_SB(&mc_filt_mask_arr[0]); + +/* rearranging filter */ +filt = LD_UH(filter); +filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0); + +LD_SB4(src, src_stride, src0, src1, src2, src3); +VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); +VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); +DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, +vec0, vec1, vec2, vec3); +SRARI_H4_UH(vec0, vec1, vec2, vec3, 7); +PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1); +ST8x4_UB(src0, src1, dst, dst_stride); +} + +static void common_hz_2t_8x8mult_msa(const uint8_t
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 intra functions
ffmpeg | branch: master | Shivraj Patil | Thu Jul 23 18:43:07 2015 +0530| [e21b090bfb7b6b723ff1c28cc5bb16e7498addb2] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 intra functions Signed-off-by: Shivraj Patil Reviewed-by: "Ronald S. Bultje" Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e21b090bfb7b6b723ff1c28cc5bb16e7498addb2 --- libavcodec/mips/Makefile |3 +- libavcodec/mips/vp9_intra_msa.c| 533 libavcodec/mips/vp9dsp_init_mips.c | 31 +++ libavcodec/mips/vp9dsp_mips.h | 53 4 files changed, 619 insertions(+), 1 deletion(-) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 7cbad54..f543448 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -45,7 +45,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevcpred_msa.o MSA-OBJS-$(CONFIG_VP9_DECODER)+= mips/vp9_mc_msa.o \ mips/vp9_lpf_msa.o\ - mips/vp9_idct_msa.o + mips/vp9_idct_msa.o \ + mips/vp9_intra_msa.o MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o\ mips/h264idct_msa.o MSA-OBJS-$(CONFIG_H264QPEL) += mips/h264qpel_msa.o diff --git a/libavcodec/mips/vp9_intra_msa.c b/libavcodec/mips/vp9_intra_msa.c new file mode 100644 index 000..54cf0ae --- /dev/null +++ b/libavcodec/mips/vp9_intra_msa.c @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/vp9dsp.h" +#include "libavutil/mips/generic_macros_msa.h" +#include "vp9dsp_mips.h" + +#define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) \ +{\ +out0 = __msa_subs_u_h(out0, in0);\ +out1 = __msa_subs_u_h(out1, in1);\ +} + +void ff_vert_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left, + const uint8_t *src) +{ +uint32_t row; +v16u8 src0; + +src0 = LD_UB(src); + +for (row = 16; row--;) { +ST_UB(src0, dst); +dst += dst_stride; +} +} + +void ff_vert_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left, + const uint8_t *src) +{ +uint32_t row; +v16u8 src1, src2; + +src1 = LD_UB(src); +src2 = LD_UB(src + 16); + +for (row = 32; row--;) { +ST_UB2(src1, src2, dst, 16); +dst += dst_stride; +} +} + +void ff_hor_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, + const uint8_t *top) +{ +uint32_t row, inp; +v16u8 src0, src1, src2, src3; + +src += 12; +for (row = 4; row--;) { +inp = LW(src); +src -= 4; + +src0 = (v16u8) __msa_fill_b(inp >> 24); +src1 = (v16u8) __msa_fill_b(inp >> 16); +src2 = (v16u8) __msa_fill_b(inp >> 8); +src3 = (v16u8) __msa_fill_b(inp); + +ST_UB4(src0, src1, src2, src3, dst, dst_stride); +dst += (4 * dst_stride); +} +} + +void ff_hor_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, + const uint8_t *top) +{ +uint32_t row, inp; +v16u8 src0, src1, src2, src3; + +src += 28; +for (row = 8; row--;) { +inp = LW(src); +src -= 4; + +src0 = (v16u8) __msa_fill_b(inp >> 24); +src1 = (v16u8) __msa_fill_b(inp >> 16); +src2 = (v16u8) __msa_fill_b(inp >> 8); +src3 = (v16u8) __msa_fill_b(inp); + +ST_UB2(src0, src0, dst, 16); +dst += dst_stride; +ST_UB2(src1, src1, dst, 16); +dst += dst_stride; +ST_UB2(src2, src2, dst, 16); +dst += dst_stride; +ST_UB2(src3, src3, dst, 16); +dst += dst_stride; +} +} +
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 idct functions
ffmpeg | branch: master | Shivraj Patil | Wed Jul 22 17:30:23 2015 +0530| [c03800d5921e5359a78c2d2af781d059bc53bfda] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 idct functions Signed-off-by: Shivraj Patil Reviewed-by: "Ronald S. Bultje" Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c03800d5921e5359a78c2d2af781d059bc53bfda --- libavcodec/mips/Makefile |3 +- libavcodec/mips/vp9_idct_msa.c | 2138 libavcodec/mips/vp9dsp_init_mips.c | 24 + libavcodec/mips/vp9dsp_mips.h | 28 + 4 files changed, 2192 insertions(+), 1 deletion(-) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index a946897..7cbad54 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -44,7 +44,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_lpf_sao_msa.o \ mips/hevcpred_msa.o MSA-OBJS-$(CONFIG_VP9_DECODER)+= mips/vp9_mc_msa.o \ - mips/vp9_lpf_msa.o + mips/vp9_lpf_msa.o\ + mips/vp9_idct_msa.o MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o\ mips/h264idct_msa.o MSA-OBJS-$(CONFIG_H264QPEL) += mips/h264qpel_msa.o diff --git a/libavcodec/mips/vp9_idct_msa.c b/libavcodec/mips/vp9_idct_msa.c new file mode 100644 index 000..aeb2387 --- /dev/null +++ b/libavcodec/mips/vp9_idct_msa.c @@ -0,0 +1,2138 @@ +/* + * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "libavcodec/vp9dsp.h" +#include "libavutil/mips/generic_macros_msa.h" +#include "vp9dsp_mips.h" + +#define VP9_DCT_CONST_BITS 14 +#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n)) + +static const int32_t cospi_1_64 = 16364; +static const int32_t cospi_2_64 = 16305; +static const int32_t cospi_3_64 = 16207; +static const int32_t cospi_4_64 = 16069; +static const int32_t cospi_5_64 = 15893; +static const int32_t cospi_6_64 = 15679; +static const int32_t cospi_7_64 = 15426; +static const int32_t cospi_8_64 = 15137; +static const int32_t cospi_9_64 = 14811; +static const int32_t cospi_10_64 = 14449; +static const int32_t cospi_11_64 = 14053; +static const int32_t cospi_12_64 = 13623; +static const int32_t cospi_13_64 = 13160; +static const int32_t cospi_14_64 = 12665; +static const int32_t cospi_15_64 = 12140; +static const int32_t cospi_16_64 = 11585; +static const int32_t cospi_17_64 = 11003; +static const int32_t cospi_18_64 = 10394; +static const int32_t cospi_19_64 = 9760; +static const int32_t cospi_20_64 = 9102; +static const int32_t cospi_21_64 = 8423; +static const int32_t cospi_22_64 = 7723; +static const int32_t cospi_23_64 = 7005; +static const int32_t cospi_24_64 = 6270; +static const int32_t cospi_25_64 = 5520; +static const int32_t cospi_26_64 = 4756; +static const int32_t cospi_27_64 = 3981; +static const int32_t cospi_28_64 = 3196; +static const int32_t cospi_29_64 = 2404; +static const int32_t cospi_30_64 = 1606; +static const int32_t cospi_31_64 = 804; + +// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 +static const int32_t sinpi_1_9 = 5283; +static const int32_t sinpi_2_9 = 9929; +static const int32_t sinpi_3_9 = 13377; +static const int32_t sinpi_4_9 = 15212; + +#define VP9_DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) \ +{ \ +v8i16 k0_m = __msa_fill_h(cnst0); \ +v4i32 s0_m, s1_m, s2_m, s3_m; \ + \ +s0_m = (v4i32) __msa_fill_h(cnst1);\ +k0_m = __msa_ilvev_h((v8i16) s0_m, k0_m); \ +
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions
ffmpeg | branch: master | Shivraj Patil | Mon Jun 29 21:15:15 2015 +0530| [d12f76ffbb1b68d3c8a2859b7a095080ba985fa2] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions in new file idctdsp_msa.c and simple_idct_msa.c Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d12f76ffbb1b68d3c8a2859b7a095080ba985fa2 --- libavcodec/idctdsp.c|2 + libavcodec/idctdsp.h|2 + libavcodec/mips/Makefile|3 + libavcodec/mips/idctdsp_init_mips.c | 49 +++ libavcodec/mips/idctdsp_mips.h | 42 +++ libavcodec/mips/idctdsp_msa.c | 149 + libavcodec/mips/simple_idct_msa.c | 573 +++ libavutil/mips/generic_macros_msa.h | 37 +++ 8 files changed, 857 insertions(+) diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c index ae804d9..63e9b52 100644 --- a/libavcodec/idctdsp.c +++ b/libavcodec/idctdsp.c @@ -305,6 +305,8 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) ff_idctdsp_init_ppc(c, avctx, high_bit_depth); if (ARCH_X86) ff_idctdsp_init_x86(c, avctx, high_bit_depth); +if (ARCH_MIPS) +ff_idctdsp_init_mips(c, avctx, high_bit_depth); ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; diff --git a/libavcodec/idctdsp.h b/libavcodec/idctdsp.h index 538b716..b180a67 100644 --- a/libavcodec/idctdsp.h +++ b/libavcodec/idctdsp.h @@ -108,5 +108,7 @@ void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth); void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth); +void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); #endif /* AVCODEC_IDCTDSP_H */ diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 695ee36..5569a03 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -29,6 +29,7 @@ OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_init_mips.o OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o +OBJS-$(CONFIG_IDCTDSP)+= mips/idctdsp_init_mips.o OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_init_mips.o OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o @@ -50,6 +51,8 @@ MSA-OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_msa.o MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o MSA-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_msa.o MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o +MSA-OBJS-$(CONFIG_IDCTDSP)+= mips/idctdsp_msa.o \ + mips/simple_idct_msa.o MSA-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_msa.o MSA-OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_msa.o MSA-OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_msa.o diff --git a/libavcodec/mips/idctdsp_init_mips.c b/libavcodec/mips/idctdsp_init_mips.c new file mode 100644 index 000..c964340 --- /dev/null +++ b/libavcodec/mips/idctdsp_init_mips.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "idctdsp_mips.h" + +#if HAVE_MSA +static av_cold void idctdsp_init_msa(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) +{ +if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) && +(avctx->bits_per_raw_sample != 10) && +(avctx->bits_per_raw_sample != 12) && +(avctx->idct_a
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions
ffmpeg | branch: master | Shivraj Patil | Mon Jun 29 21:15:14 2015 +0530| [709bb45c660ae7c2d065bcade931e068620f9b92] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions in new file me_cmp_msa.c Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=709bb45c660ae7c2d065bcade931e068620f9b92 --- libavcodec/me_cmp.c |2 + libavcodec/me_cmp.h |1 + libavcodec/mips/Makefile|2 + libavcodec/mips/me_cmp_init_mips.c | 56 +++ libavcodec/mips/me_cmp_mips.h | 60 +++ libavcodec/mips/me_cmp_msa.c| 686 +++ libavutil/mips/generic_macros_msa.h | 59 +++ 7 files changed, 866 insertions(+) diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c index d4213d2..dc76b07 100644 --- a/libavcodec/me_cmp.c +++ b/libavcodec/me_cmp.c @@ -991,4 +991,6 @@ av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx) ff_me_cmp_init_ppc(c, avctx); if (ARCH_X86) ff_me_cmp_init_x86(c, avctx); +if (ARCH_MIPS) +ff_me_cmp_init_mips(c, avctx); } diff --git a/libavcodec/me_cmp.h b/libavcodec/me_cmp.h index 98ee53c..a3603ec 100644 --- a/libavcodec/me_cmp.h +++ b/libavcodec/me_cmp.h @@ -87,6 +87,7 @@ void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx); void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx); void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx); void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx); +void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx); void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type); diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 59c1f79..2993891 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -31,6 +31,7 @@ OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_init_mips.o +OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uni_msa.o\ mips/hevc_mc_uniw_msa.o \ @@ -51,5 +52,6 @@ MSA-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_msa.o MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o MSA-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_msa.o MSA-OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_msa.o +MSA-OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/me_cmp_init_mips.c b/libavcodec/mips/me_cmp_init_mips.c new file mode 100644 index 000..219a0dc --- /dev/null +++ b/libavcodec/mips/me_cmp_init_mips.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "me_cmp_mips.h" + +#if HAVE_MSA +static av_cold void me_cmp_msa(MECmpContext *c, AVCodecContext *avctx) +{ +#if BIT_DEPTH == 8 +c->pix_abs[0][0] = ff_pix_abs16_msa; +c->pix_abs[0][1] = ff_pix_abs16_x2_msa; +c->pix_abs[0][2] = ff_pix_abs16_y2_msa; +c->pix_abs[0][3] = ff_pix_abs16_xy2_msa; +c->pix_abs[1][0] = ff_pix_abs8_msa; +c->pix_abs[1][1] = ff_pix_abs8_x2_msa; +c->pix_abs[1][2] = ff_pix_abs8_y2_msa; +c->pix_abs[1][3] = ff_pix_abs8_xy2_msa; + +c->hadamard8_diff[0] = ff_hadamard8_diff16_msa; +c->hadamard8_diff[1] = ff_hadamard8_diff8x8_msa; + +c->hadamard8_diff[4] = ff_hadamard8_intra16_msa; +c->hadamard8_diff[5] = ff_hadamard8_intra8x8_msa; + +c->sad[0] = ff_pix_abs16_msa; +c->sad[1] = ff_pix
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for mpegvideoencdsp functions
ffmpeg | branch: master | Shivraj Patil | Mon Jun 29 21:15:13 2015 +0530| [2f3f98af2b3215b7f3ab302275a0b3b4acaf84a5] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for mpegvideoencdsp functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for mpegvideoencdsp functions in new file mpegvideoencdsp_msa.c Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2f3f98af2b3215b7f3ab302275a0b3b4acaf84a5 --- libavcodec/mips/Makefile|2 + libavcodec/mips/mpegvideoencdsp_init_mips.c | 40 + libavcodec/mips/mpegvideoencdsp_msa.c | 62 +++ libavcodec/mpegvideoencdsp.c|2 + libavcodec/mpegvideoencdsp.h|2 + libavutil/mips/generic_macros_msa.h | 34 +++ 6 files changed, 142 insertions(+) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 277ac2a..59c1f79 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -30,6 +30,7 @@ OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o +OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uni_msa.o\ mips/hevc_mc_uniw_msa.o \ @@ -49,5 +50,6 @@ MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o MSA-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_msa.o MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o MSA-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_msa.o +MSA-OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/mpegvideoencdsp_init_mips.c b/libavcodec/mips/mpegvideoencdsp_init_mips.c new file mode 100644 index 000..9bfe94e --- /dev/null +++ b/libavcodec/mips/mpegvideoencdsp_init_mips.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/bit_depth_template.c" +#include "h263dsp_mips.h" + +#if HAVE_MSA +static av_cold void mpegvideoencdsp_init_msa(MpegvideoEncDSPContext *c, + AVCodecContext *avctx) +{ +#if BIT_DEPTH == 8 +c->pix_sum = ff_pix_sum_msa; +#endif +} +#endif // #if HAVE_MSA + +av_cold void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c, + AVCodecContext *avctx) +{ +#if HAVE_MSA +mpegvideoencdsp_init_msa(c, avctx); +#endif // #if HAVE_MSA +} diff --git a/libavcodec/mips/mpegvideoencdsp_msa.c b/libavcodec/mips/mpegvideoencdsp_msa.c new file mode 100644 index 000..46473da --- /dev/null +++ b/libavcodec/mips/mpegvideoencdsp_msa.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "h263dsp_mips.h" +#include
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for mpegvideo functions
ffmpeg | branch: master | Shivraj Patil | Mon Jun 29 21:15:12 2015 +0530| [2eb28e889d9c16914e547cc128db521b5d6c5390] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for mpegvideo functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for mpegvideo functions in new file mpegvideo_msa.c Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2eb28e889d9c16914e547cc128db521b5d6c5390 --- libavcodec/mips/Makefile |2 + libavcodec/mips/mpegvideo_init_mips.c | 37 + libavcodec/mips/mpegvideo_msa.c | 250 + libavcodec/mpegvideo.c|2 + libavcodec/mpegvideo.h|1 + libavutil/mips/generic_macros_msa.h | 94 + 6 files changed, 386 insertions(+) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index c0ecb15..277ac2a 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -29,6 +29,7 @@ OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_init_mips.o OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o +OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uni_msa.o\ mips/hevc_mc_uniw_msa.o \ @@ -47,5 +48,6 @@ MSA-OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_msa.o MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o MSA-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_msa.o MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o +MSA-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/mpegvideo_init_mips.c b/libavcodec/mips/mpegvideo_init_mips.c new file mode 100644 index 000..ee14b31 --- /dev/null +++ b/libavcodec/mips/mpegvideo_init_mips.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "h263dsp_mips.h" + +#if HAVE_MSA +static av_cold void dct_unquantize_init_msa(MpegEncContext *s) +{ +s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_msa; +s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_msa; +s->dct_unquantize_mpeg2_inter = ff_dct_unquantize_mpeg2_inter_msa; +} +#endif // #if HAVE_MSA + +av_cold void ff_mpv_common_init_mips(MpegEncContext *s) +{ +#if HAVE_MSA +dct_unquantize_init_msa(s); +#endif // #if HAVE_MSA +} diff --git a/libavcodec/mips/mpegvideo_msa.c b/libavcodec/mips/mpegvideo_msa.c new file mode 100644 index 000..aa9ef77 --- /dev/null +++ b/libavcodec/mips/mpegvideo_msa.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mips/generic_macros_msa.h" +#include "h263dsp_mips.h" + +static void h263_dct_unquantize_msa(int16_t *block, int16_t qmul, +
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for pixblock functions
ffmpeg | branch: master | Shivraj Patil | Sun Jun 14 23:44:26 2015 +0530| [d9deae04a78b6b698b90d050a67a3bd9155aba74] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for pixblock functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for pixblock functions in new file pixblockdsp_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d9deae04a78b6b698b90d050a67a3bd9155aba74 --- libavcodec/mips/Makefile|2 + libavcodec/mips/pixblockdsp_init_mips.c | 53 libavcodec/mips/pixblockdsp_mips.h | 33 +++ libavcodec/mips/pixblockdsp_msa.c | 143 +++ libavcodec/pixblockdsp.c|2 + libavcodec/pixblockdsp.h|2 + libavutil/mips/generic_macros_msa.h |8 ++ 7 files changed, 243 insertions(+) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 823a2c5..c0ecb15 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -28,6 +28,7 @@ OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_init_mips.o OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_init_mips.o OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o +OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uni_msa.o\ mips/hevc_mc_uniw_msa.o \ @@ -45,5 +46,6 @@ MSA-OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_msa.o MSA-OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_msa.o MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o MSA-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_msa.o +MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/pixblockdsp_init_mips.c b/libavcodec/mips/pixblockdsp_init_mips.c new file mode 100644 index 000..0f2fb15 --- /dev/null +++ b/libavcodec/mips/pixblockdsp_init_mips.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "pixblockdsp_mips.h" + +#if HAVE_MSA +static av_cold void pixblockdsp_init_msa(PixblockDSPContext *c, + AVCodecContext *avctx, + unsigned high_bit_depth) +{ +c->diff_pixels = ff_diff_pixels_msa; + +switch (avctx->bits_per_raw_sample) { +case 9: +case 10: +case 12: +case 14: +c->get_pixels = ff_get_pixels_16_msa; +break; +default: +if (avctx->bits_per_raw_sample <= 8 || avctx->codec_type != +AVMEDIA_TYPE_VIDEO) { +c->get_pixels = ff_get_pixels_8_msa; +} +break; +} +} +#endif // #if HAVE_MSA + +void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) +{ +#if HAVE_MSA +pixblockdsp_init_msa(c, avctx, high_bit_depth); +#endif // #if HAVE_MSA +} diff --git a/libavcodec/mips/pixblockdsp_mips.h b/libavcodec/mips/pixblockdsp_mips.h new file mode 100644 index 000..3eee6e0 --- /dev/null +++ b/libavcodec/mips/pixblockdsp_mips.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for block functions
ffmpeg | branch: master | Shivraj Patil | Sun Jun 14 23:44:25 2015 +0530| [f6276842f38d0511a2d2ab4bb7f5b47b195c8de1] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for block functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for block functions in new file blockdsp_msa.c Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f6276842f38d0511a2d2ab4bb7f5b47b195c8de1 --- libavcodec/blockdsp.c|2 + libavcodec/blockdsp.h|1 + libavcodec/mips/Makefile |2 + libavcodec/mips/blockdsp_init_mips.c | 40 libavcodec/mips/blockdsp_mips.h | 31 libavcodec/mips/blockdsp_msa.c | 86 ++ 6 files changed, 162 insertions(+) diff --git a/libavcodec/blockdsp.c b/libavcodec/blockdsp.c index f5259f6..8480f0b 100644 --- a/libavcodec/blockdsp.c +++ b/libavcodec/blockdsp.c @@ -77,4 +77,6 @@ av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx) #else ff_blockdsp_init_x86(c, high_bit_depth); #endif /* FF_API_XVMC */ +if (ARCH_MIPS) +ff_blockdsp_init_mips(c, high_bit_depth); } diff --git a/libavcodec/blockdsp.h b/libavcodec/blockdsp.h index c7ad265..32ea107 100644 --- a/libavcodec/blockdsp.h +++ b/libavcodec/blockdsp.h @@ -49,5 +49,6 @@ void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth, #else void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth); #endif /* FF_API_XVMC */ +void ff_blockdsp_init_mips(BlockDSPContext *c, unsigned high_bit_depth); #endif /* AVCODEC_BLOCKDSP_H */ diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index b2c8a7b..823a2c5 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -27,6 +27,7 @@ OBJS-$(CONFIG_H264PRED) += mips/h264pred_init_mips.o OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_init_mips.o OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_init_mips.o OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o +OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uni_msa.o\ mips/hevc_mc_uniw_msa.o \ @@ -43,5 +44,6 @@ MSA-OBJS-$(CONFIG_H264PRED) += mips/h264pred_msa.o MSA-OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_msa.o MSA-OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_msa.o MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o +MSA-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/blockdsp_init_mips.c b/libavcodec/mips/blockdsp_init_mips.c new file mode 100644 index 000..99ae316 --- /dev/null +++ b/libavcodec/mips/blockdsp_init_mips.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "blockdsp_mips.h" + +#if HAVE_MSA +static av_cold void blockdsp_init_msa(BlockDSPContext *c, + unsigned high_bit_depth) +{ +c->clear_block = ff_clear_block_msa; +c->clear_blocks = ff_clear_blocks_msa; + +c->fill_block_tab[0] = ff_fill_block16_msa; +c->fill_block_tab[1] = ff_fill_block8_msa; +} +#endif // #if HAVE_MSA + +void ff_blockdsp_init_mips(BlockDSPContext *c, unsigned high_bit_depth) +{ +#if HAVE_MSA +blockdsp_init_msa(c, high_bit_depth); +#endif // #if HAVE_MSA +} diff --git a/libavcodec/mips/blockdsp_mips.h b/libavcodec/mips/blockdsp_mips.h new file mode 100644 index 000..0b6bb67 --- /dev/null +++ b/libavcodec/mips/blockdsp_mips.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for hpel functions
ffmpeg | branch: master | Shivraj Patil | Sun Jun 14 23:44:24 2015 +0530| [ee3ef5fda2f11cb5bf555d4f49698eb5dcde6ee1] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for hpel functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for hpel functions in new file hpeldsp_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ee3ef5fda2f11cb5bf555d4f49698eb5dcde6ee1 --- libavcodec/hpeldsp.c|2 + libavcodec/hpeldsp.h|1 + libavcodec/mips/Makefile|2 + libavcodec/mips/hpeldsp_init_mips.c | 73 ++ libavcodec/mips/hpeldsp_mips.h | 87 ++ libavcodec/mips/hpeldsp_msa.c | 1498 +++ libavutil/mips/generic_macros_msa.h | 162 7 files changed, 1825 insertions(+) diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c index 7763760..8e2fd8f 100644 --- a/libavcodec/hpeldsp.c +++ b/libavcodec/hpeldsp.c @@ -365,4 +365,6 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags) ff_hpeldsp_init_ppc(c, flags); if (ARCH_X86) ff_hpeldsp_init_x86(c, flags); +if (ARCH_MIPS) +ff_hpeldsp_init_mips(c, flags); } diff --git a/libavcodec/hpeldsp.h b/libavcodec/hpeldsp.h index 07c293a..1a3cea5 100644 --- a/libavcodec/hpeldsp.h +++ b/libavcodec/hpeldsp.h @@ -99,5 +99,6 @@ void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags); void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags); void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags); void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags); +void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags); #endif /* AVCODEC_HPELDSP_H */ diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 7742eea..b2c8a7b 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -26,6 +26,7 @@ OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o OBJS-$(CONFIG_H264PRED) += mips/h264pred_init_mips.o OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_init_mips.o OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_init_mips.o +OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uni_msa.o\ mips/hevc_mc_uniw_msa.o \ @@ -41,5 +42,6 @@ MSA-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_msa.o MSA-OBJS-$(CONFIG_H264PRED) += mips/h264pred_msa.o MSA-OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_msa.o MSA-OBJS-$(CONFIG_QPELDSP)+= mips/qpeldsp_msa.o +MSA-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/hpeldsp_init_mips.c b/libavcodec/mips/hpeldsp_init_mips.c new file mode 100644 index 000..82f2310 --- /dev/null +++ b/libavcodec/mips/hpeldsp_init_mips.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "../hpeldsp.h" +#include "libavcodec/mips/hpeldsp_mips.h" + +#if HAVE_MSA +static void ff_hpeldsp_init_msa(HpelDSPContext *c, int flags) +{ +c->put_pixels_tab[0][0] = ff_put_pixels16_msa; +c->put_pixels_tab[0][1] = ff_put_pixels16_x2_msa; +c->put_pixels_tab[0][2] = ff_put_pixels16_y2_msa; +c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_msa; + +c->put_pixels_tab[1][0] = ff_put_pixels8_msa; +c->put_pixels_tab[1][1] = ff_put_pixels8_x2_msa; +c->put_pixels_tab[1][2] = ff_put_pixels8_y2_msa; +c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_msa; + +c->put_pixels_tab[2][1] = ff_put_pixels4_x2_msa; +c->put_pixels_tab[2][2] = ff_put_pixels4_y2_msa; +c->put_pixels_tab[2][3] = ff_put_pixels4_xy2_msa; + +c->put_no_rnd_pixels_tab[0][0
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for H263 lpf functions
ffmpeg | branch: master | Shivraj Patil | Sun Jun 14 23:44:22 2015 +0530| [63eaf529bcfa2f685f5978d4ba4d327ac837c2e2] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for H263 lpf functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for H263 lpf functions in new file h263dsp_msa.c Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=63eaf529bcfa2f685f5978d4ba4d327ac837c2e2 --- libavcodec/h263dsp.c|2 + libavcodec/h263dsp.h|1 + libavcodec/mips/Makefile|2 + libavcodec/mips/h263dsp_init_mips.c | 36 libavcodec/mips/h263dsp_mips.h | 36 libavcodec/mips/h263dsp_msa.c | 164 +++ 6 files changed, 241 insertions(+) diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c index a70ff24..b3c0bcd 100644 --- a/libavcodec/h263dsp.c +++ b/libavcodec/h263dsp.c @@ -121,4 +121,6 @@ av_cold void ff_h263dsp_init(H263DSPContext *ctx) if (ARCH_X86) ff_h263dsp_init_x86(ctx); +if (ARCH_MIPS) +ff_h263dsp_init_mips(ctx); } diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h index d2cc2ff..1abea3c 100644 --- a/libavcodec/h263dsp.h +++ b/libavcodec/h263dsp.h @@ -30,5 +30,6 @@ typedef struct H263DSPContext { void ff_h263dsp_init(H263DSPContext *ctx); void ff_h263dsp_init_x86(H263DSPContext *ctx); +void ff_h263dsp_init_mips(H263DSPContext *ctx); #endif /* AVCODEC_H263DSP_H */ diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index d80d4e6..63c7298 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -24,6 +24,7 @@ OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o OBJS-$(CONFIG_H264QPEL) += mips/h264qpel_init_mips.o OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o OBJS-$(CONFIG_H264PRED) += mips/h264pred_init_mips.o +OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uni_msa.o\ mips/hevc_mc_uniw_msa.o \ @@ -37,5 +38,6 @@ MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o\ MSA-OBJS-$(CONFIG_H264QPEL) += mips/h264qpel_msa.o MSA-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_msa.o MSA-OBJS-$(CONFIG_H264PRED) += mips/h264pred_msa.o +MSA-OBJS-$(CONFIG_H263DSP)+= mips/h263dsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/h263dsp_init_mips.c b/libavcodec/mips/h263dsp_init_mips.c new file mode 100644 index 000..09bd937 --- /dev/null +++ b/libavcodec/mips/h263dsp_init_mips.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "h263dsp_mips.h" + +#if HAVE_MSA +static av_cold void h263dsp_init_msa(H263DSPContext *c) +{ +c->h263_h_loop_filter = ff_h263_h_loop_filter_msa; +c->h263_v_loop_filter = ff_h263_v_loop_filter_msa; +} +#endif // #if HAVE_MSA + +av_cold void ff_h263dsp_init_mips(H263DSPContext *c) +{ +#if HAVE_MSA +h263dsp_init_msa(c); +#endif // #if HAVE_MSA +} diff --git a/libavcodec/mips/h263dsp_mips.h b/libavcodec/mips/h263dsp_mips.h new file mode 100644 index 000..99a43cd --- /dev/null +++ b/libavcodec/mips/h263dsp_mips.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILI
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC idct functions
ffmpeg | branch: master | Shivraj Patil | Wed Jun 10 20:06:40 2015 +0530| [fb92f3ecb4d48a5612ee61aa39bd538cac9d08cf] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC idct functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for AVC idct functions in new file h264idct_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fb92f3ecb4d48a5612ee61aa39bd538cac9d08cf --- libavcodec/mips/Makefile|3 +- libavcodec/mips/h264dsp_init_mips.c | 15 ++ libavcodec/mips/h264dsp_mips.h | 24 ++ libavcodec/mips/h264idct_msa.c | 469 +++ libavutil/mips/generic_macros_msa.h | 96 +++ 5 files changed, 606 insertions(+), 1 deletion(-) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 0a97e7c..993c649 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -31,7 +31,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_idct_msa.o \ mips/hevc_lpf_sao_msa.o \ mips/hevcpred_msa.o -MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o +MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o\ + mips/h264idct_msa.o MSA-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_msa.o MSA-OBJS-$(CONFIG_H264PRED) += mips/h264pred_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o diff --git a/libavcodec/mips/h264dsp_init_mips.c b/libavcodec/mips/h264dsp_init_mips.c index d9182f2..7f74adf 100644 --- a/libavcodec/mips/h264dsp_init_mips.c +++ b/libavcodec/mips/h264dsp_init_mips.c @@ -62,6 +62,21 @@ static av_cold void h264dsp_init_msa(H264DSPContext *c, c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_msa; c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_msa; c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels4_8_msa; + +c->h264_idct_add = ff_h264_idct_add_msa; +c->h264_idct8_add = ff_h264_idct8_addblk_msa; +c->h264_idct_dc_add = ff_h264_idct4x4_addblk_dc_msa; +c->h264_idct8_dc_add = ff_h264_idct8_dc_addblk_msa; +c->h264_idct_add16 = ff_h264_idct_add16_msa; +c->h264_idct8_add4 = ff_h264_idct8_add4_msa; + +if (chroma_format_idc <= 1) +c->h264_idct_add8 = ff_h264_idct_add8_msa; +else +c->h264_idct_add8 = ff_h264_idct_add8_422_msa; + +c->h264_idct_add16intra = ff_h264_idct_add16_intra_msa; +c->h264_luma_dc_dequant_idct = ff_h264_deq_idct_luma_dc_msa; } // if (8 == bit_depth) } #endif // #if HAVE_MSA diff --git a/libavcodec/mips/h264dsp_mips.h b/libavcodec/mips/h264dsp_mips.h index ef380fa..0e39057 100644 --- a/libavcodec/mips/h264dsp_mips.h +++ b/libavcodec/mips/h264dsp_mips.h @@ -41,6 +41,30 @@ void ff_h264_h_loop_filter_luma_mbaff_msa(uint8_t *src, int32_t stride, int32_t alpha, int32_t beta, int8_t *tc0); +void ff_h264_idct_add_msa(uint8_t *dst, int16_t *src, int32_t dst_stride); +void ff_h264_idct4x4_addblk_dc_msa(uint8_t *dst, int16_t *src, + int32_t dst_stride); +void ff_h264_deq_idct_luma_dc_msa(int16_t *dst, int16_t *src, + int32_t de_q_val); +void ff_h264_idct_add16_msa(uint8_t *dst, const int32_t *blk_offset, +int16_t *block, int32_t stride, +const uint8_t nnzc[15 * 8]); +void ff_h264_idct_add16_intra_msa(uint8_t *dst, const int32_t *blk_offset, + int16_t *block, int32_t dst_stride, + const uint8_t nnzc[15 * 8]); +void ff_h264_idct_add8_msa(uint8_t **dst, const int32_t *blk_offset, + int16_t *block, int32_t dst_stride, + const uint8_t nnzc[15 * 8]); +void ff_h264_idct_add8_422_msa(uint8_t **dst, const int32_t *blk_offset, + int16_t *block, int32_t dst_stride, + const uint8_t nnzc[15 * 8]); +void ff_h264_idct8_addblk_msa(uint8_t *dst, int16_t *src, int32_t dst_stride); +void ff_h264_idct8_dc_addblk_msa(uint8_t *dst, int16_t *src, + int32_t dst_stride); +void ff_h264_idct8_add4_msa(uint8_t *dst, const int *blk_offset, +int16_t *blk, int dst_stride, +const uint8_t nnzc[15 * 8]); + void ff_h264_h_lpf_luma_intra_msa(uint8_t *src, int s
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC intra prediction functions
ffmpeg | branch: master | Shivraj Patil | Wed Jun 10 20:06:39 2015 +0530| [1d70b6fe1d9d67a35daf2ec4c653ba3eff5d31b7] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC intra prediction functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for AVC intra prediction functions in new file h264pred_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1d70b6fe1d9d67a35daf2ec4c653ba3eff5d31b7 --- libavcodec/h264pred.c|1 + libavcodec/h264pred.h|2 + libavcodec/mips/Makefile |2 + libavcodec/mips/h264dsp_mips.h | 27 ++ libavcodec/mips/h264pred_init_mips.c | 104 + libavcodec/mips/h264pred_msa.c | 723 ++ libavutil/mips/generic_macros_msa.h | 11 + 7 files changed, 870 insertions(+) diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c index 044fc90..497b080 100644 --- a/libavcodec/h264pred.c +++ b/libavcodec/h264pred.c @@ -594,4 +594,5 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id, if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc); if (ARCH_X86) ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc); +if (ARCH_MIPS) ff_h264_pred_init_mips(h, codec_id, bit_depth, chroma_format_idc); } diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index 6708292..edeca91 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -117,5 +117,7 @@ void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_mips(H264PredContext *h, int codec_id, +const int bit_depth, const int chroma_format_idc); #endif /* AVCODEC_H264PRED_H */ diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index aa98774..0a97e7c 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -22,6 +22,7 @@ OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o \ mips/hevcpred_init_mips.o OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o +OBJS-$(CONFIG_H264PRED) += mips/h264pred_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uni_msa.o\ mips/hevc_mc_uniw_msa.o \ @@ -32,5 +33,6 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevcpred_msa.o MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o MSA-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_msa.o +MSA-OBJS-$(CONFIG_H264PRED) += mips/h264pred_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/h264dsp_mips.h b/libavcodec/mips/h264dsp_mips.h index 319f6d3..ef380fa 100644 --- a/libavcodec/mips/h264dsp_mips.h +++ b/libavcodec/mips/h264dsp_mips.h @@ -68,6 +68,33 @@ void ff_weight_h264_pixels8_8_msa(uint8_t *src, int stride, int height, void ff_weight_h264_pixels4_8_msa(uint8_t *src, int stride, int height, int log2_denom, int weight, int offset); +void ff_h264_intra_predict_plane_8x8_msa(uint8_t *src, ptrdiff_t stride); +void ff_h264_intra_predict_dc_4blk_8x8_msa(uint8_t *src, ptrdiff_t stride); +void ff_h264_intra_predict_hor_dc_8x8_msa(uint8_t *src, ptrdiff_t stride); +void ff_h264_intra_predict_vert_dc_8x8_msa(uint8_t *src, ptrdiff_t stride); +void ff_h264_intra_predict_mad_cow_dc_l0t_8x8_msa(uint8_t *src, + ptrdiff_t stride); +void ff_h264_intra_predict_mad_cow_dc_0lt_8x8_msa(uint8_t *src, + ptrdiff_t stride); +void ff_h264_intra_predict_mad_cow_dc_l00_8x8_msa(uint8_t *src, + ptrdiff_t stride); +void ff_h264_intra_predict_mad_cow_dc_0l0_8x8_msa(uint8_t *src, + ptrdiff_t stride); +void ff_h264_intra_predict_plane_16x16_msa(uint8_t *src, ptrdiff_t stride); +void ff_h264_intra_pred_vert_8x8_msa(uint8_t *src, ptrdiff_t stride); +void ff_h264_intra_pred_horiz_8x8_msa(uint8_t *src, ptrdiff_t stride); +void ff_h264_intra_pred_dc_16x16_msa(uint8_t *src, ptrdiff_t stride); +v
[FFmpeg-cvslog] avcodec/mips: Add 'const' to static arrays in HEVC MSA code
ffmpeg | branch: master | Shivraj Patil | Thu Jun 11 14:25:50 2015 +0530| [e4fb8816f99d09aa6436bff0e4e2453a02b053e1] | committer: Michael Niedermayer avcodec/mips: Add 'const' to static arrays in HEVC MSA code Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e4fb8816f99d09aa6436bff0e4e2453a02b053e1 --- libavcodec/mips/hevc_idct_msa.c | 12 ++-- libavcodec/mips/hevc_mc_uni_msa.c |2 +- libavcodec/mips/hevcpred_msa.c|4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libavcodec/mips/hevc_idct_msa.c b/libavcodec/mips/hevc_idct_msa.c index b5a4c5a..975d91f 100644 --- a/libavcodec/mips/hevc_idct_msa.c +++ b/libavcodec/mips/hevc_idct_msa.c @@ -21,18 +21,18 @@ #include "libavutil/mips/generic_macros_msa.h" #include "libavcodec/mips/hevcdsp_mips.h" -static int16_t gt8x8_cnst[16] = { +static const int16_t gt8x8_cnst[16] = { 64, 64, 83, 36, 89, 50, 18, 75, 64, -64, 36, -83, 75, -89, -50, -18 }; -static int16_t gt16x16_cnst[64] = { +static const int16_t gt16x16_cnst[64] = { 64, 83, 64, 36, 89, 75, 50, 18, 90, 80, 57, 25, 70, 87, 9, 43, 64, 36, -64, -83, 75, -18, -89, -50, 87, 9, -80, -70, -43, 57, -25, -90, 64, -36, -64, 83, 50, -89, 18, 75, 80, -70, -25, 90, -87, 9, 43, 57, 64, -83, 64, -36, 18, -50, 75, -89, 70, -87, 90, -80, 9, -43, -57, 25 }; -static int16_t gt32x32_cnst0[256] = { +static const int16_t gt32x32_cnst0[256] = { 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13, 88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22, @@ -51,18 +51,18 @@ static int16_t gt32x32_cnst0[256] = { 4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90 }; -static int16_t gt32x32_cnst1[64] = { +static const int16_t gt32x32_cnst1[64] = { 90, 87, 80, 70, 57, 43, 25, 9, 87, 57, 9, -43, -80, -90, -70, -25, 80, 9, -70, -87, -25, 57, 90, 43, 70, -43, -87, 9, 90, 25, -80, -57, 57, -80, -25, 90, -9, -87, 43, 70, 43, -90, 57, 25, -87, 70, 9, -80, 25, -70, 90, -80, 43, 9, -57, 87, 9, -25, 43, -57, 70, -80, 87, -90 }; -static int16_t gt32x32_cnst2[16] = { +static const int16_t gt32x32_cnst2[16] = { 89, 75, 50, 18, 75, -18, -89, -50, 50, -89, 18, 75, 18, -50, 75, -89 }; -static int16_t gt32x32_cnst3[16] = { +static const int16_t gt32x32_cnst3[16] = { 64, 64, 64, 64, 83, 36, -36, -83, 64, -64, -64, 64, 36, -83, 83, -36 }; diff --git a/libavcodec/mips/hevc_mc_uni_msa.c b/libavcodec/mips/hevc_mc_uni_msa.c index 7d02ce8..61a67c9 100644 --- a/libavcodec/mips/hevc_mc_uni_msa.c +++ b/libavcodec/mips/hevc_mc_uni_msa.c @@ -249,7 +249,7 @@ static void copy_width64_msa(uint8_t *src, int32_t src_stride, copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64); } -uint8_t mc_filt_mask_arr[16 * 3] = { +static const uint8_t mc_filt_mask_arr[16 * 3] = { /* 8 width cases */ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, /* 4 width cases */ diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c index 8a8aa96..5d9299f 100644 --- a/libavcodec/mips/hevcpred_msa.c +++ b/libavcodec/mips/hevcpred_msa.c @@ -22,11 +22,11 @@ #include "libavutil/mips/generic_macros_msa.h" #include "hevcpred_mips.h" -static int8_t intra_pred_angle_up[17] = { +static const int8_t intra_pred_angle_up[17] = { -32, -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32 }; -static int8_t intra_pred_angle_low[16] = { +static const int8_t intra_pred_angle_low[16] = { 32, 26, 21, 17, 13, 9, 5, 2, 0, -2, -5, -9, -13, -17, -21, -26 }; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC chroma mc functions
ffmpeg | branch: master | Shivraj Patil | Thu Jun 11 11:45:01 2015 +0530| [b87dc70c6590556d42ddc21ba0f6e9c790ddd23d] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for AVC chroma mc functions s patch adds MSA (MIPS-SIMD-Arch) optimizations for AVC chroma mc functions in new file h264chroma_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b87dc70c6590556d42ddc21ba0f6e9c790ddd23d --- libavcodec/mips/Makefile |1 + libavcodec/mips/h264chroma_init_mips.c | 21 + libavcodec/mips/h264chroma_mips.h | 12 + libavcodec/mips/h264chroma_msa.c | 2003 libavutil/mips/generic_macros_msa.h| 56 + 5 files changed, 2093 insertions(+) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 82d1d70..aa98774 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -31,5 +31,6 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_lpf_sao_msa.o \ mips/hevcpred_msa.o MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o +MSA-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/h264chroma_init_mips.c b/libavcodec/mips/h264chroma_init_mips.c index 4c10da7..1cc5767 100644 --- a/libavcodec/mips/h264chroma_init_mips.c +++ b/libavcodec/mips/h264chroma_init_mips.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2015 Zhou Xiaoyong + * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com) * * This file is part of FFmpeg. * @@ -20,6 +21,23 @@ #include "h264chroma_mips.h" +#if HAVE_MSA +static av_cold void h264chroma_init_msa(H264ChromaContext *c, int bit_depth) +{ +const int high_bit_depth = bit_depth > 8; + +if (!high_bit_depth) { +c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_msa; +c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_msa; +c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_msa; + +c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_msa; +c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_msa; +c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_msa; +} +} +#endif // #if HAVE_MSA + #if HAVE_LOONGSON3 static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth) { @@ -36,6 +54,9 @@ static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth) av_cold void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth) { +#if HAVE_MSA +h264chroma_init_msa(c, bit_depth); +#endif // #if HAVE_MSA #if HAVE_LOONGSON3 h264chroma_init_mmi(c, bit_depth); #endif /* HAVE_LOONGSON3 */ diff --git a/libavcodec/mips/h264chroma_mips.h b/libavcodec/mips/h264chroma_mips.h index 314e8a3..7a373b8 100644 --- a/libavcodec/mips/h264chroma_mips.h +++ b/libavcodec/mips/h264chroma_mips.h @@ -22,6 +22,18 @@ #define H264_CHROMA_MIPS_H #include "libavcodec/h264.h" +void ff_put_h264_chroma_mc8_msa(uint8_t *dst, uint8_t *src, int stride, +int height, int x, int y); +void ff_put_h264_chroma_mc4_msa(uint8_t *dst, uint8_t *src, int stride, +int height, int x, int y); +void ff_put_h264_chroma_mc2_msa(uint8_t *dst, uint8_t *src, int stride, +int height, int x, int y); +void ff_avg_h264_chroma_mc8_msa(uint8_t *dst, uint8_t *src, int stride, +int height, int x, int y); +void ff_avg_h264_chroma_mc4_msa(uint8_t *dst, uint8_t *src, int stride, +int height, int x, int y); +void ff_avg_h264_chroma_mc2_msa(uint8_t *dst, uint8_t *src, int stride, +int height, int x, int y); void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); diff --git a/libavcodec/mips/h264chroma_msa.c b/libavcodec/mips/h264chroma_msa.c new file mode 100644 index 000..67d0bc1 --- /dev/null +++ b/libavcodec/mips/h264chroma_msa.c @@ -0,0 +1,2003 @@ +/* + * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even th
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC intra prediction functions
ffmpeg | branch: master | Shivraj Patil | Thu Jun 4 13:49:49 2015 +0530| [d6d98237ed01aec7d79e7724d43004c8b9c8d383] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC intra prediction functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC intra predition functions in new file hevcpred_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d6d98237ed01aec7d79e7724d43004c8b9c8d383 --- libavcodec/hevcpred.c|3 + libavcodec/hevcpred.h|1 + libavcodec/mips/Makefile |6 +- libavcodec/mips/hevcpred_init_mips.c | 48 + libavcodec/mips/hevcpred_mips.h | 73 + libavcodec/mips/hevcpred_msa.c | 3084 ++ libavutil/mips/generic_macros_msa.h | 46 + 7 files changed, 3259 insertions(+), 2 deletions(-) diff --git a/libavcodec/hevcpred.c b/libavcodec/hevcpred.c index 4598229..02c1766 100644 --- a/libavcodec/hevcpred.c +++ b/libavcodec/hevcpred.c @@ -74,4 +74,7 @@ void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth) HEVC_PRED(8); break; } + +if (ARCH_MIPS) +ff_hevc_pred_init_mips(hpc, bit_depth); } diff --git a/libavcodec/hevcpred.h b/libavcodec/hevcpred.h index 7f14a76..eb17663 100644 --- a/libavcodec/hevcpred.h +++ b/libavcodec/hevcpred.h @@ -41,5 +41,6 @@ typedef struct HEVCPredContext { } HEVCPredContext; void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth); +void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth); #endif /* AVCODEC_HEVCPRED_H */ diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index b8bb1fe..82d1d70 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -18,7 +18,8 @@ OBJS-$(CONFIG_AAC_DECODER)+= mips/aacdec_mips.o\ mips/aacpsdsp_mips.o MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER) += mips/aaccoder_mips.o MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o -OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o +OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o \ + mips/hevcpred_init_mips.o OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ @@ -27,7 +28,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_bi_msa.o \ mips/hevc_mc_biw_msa.o\ mips/hevc_idct_msa.o \ - mips/hevc_lpf_sao_msa.o + mips/hevc_lpf_sao_msa.o \ + mips/hevcpred_msa.o MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/hevcpred_init_mips.c b/libavcodec/mips/hevcpred_init_mips.c new file mode 100644 index 000..331cfac --- /dev/null +++ b/libavcodec/mips/hevcpred_init_mips.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/hevc.h" +#include "libavcodec/mips/hevcpred_mips.h" + +#if HAVE_MSA +static av_cold void hevc_pred_init_msa(HEVCPredContext *c, const int bit_depth) +{ +if (8 == bit_depth) { +c->intra_pred[2] = ff_intra_pred_8_16x16_msa; +c->intra_pred[3] = ff_intra_pred_8_32x32_msa; +c->pred_planar[0] = ff_hevc_intra_pred_planar_0_msa; +c->pred_planar[1] = ff_hevc_intra_pred_planar_1_msa; +c->pred_planar[2] = ff_hevc_intra_pred_p
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC loop filter and sao functions
ffmpeg | branch: master | Shivraj Patil | Tue Jun 9 21:08:26 2015 +0530| [271195f85bbce284ac80ed31c62fba9b7e74e99d] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC loop filter and sao functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC loop filter and sao functions in new file hevc_lpf_sao_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h In this patch, in comparision with previous patch, duplicated c functions are removed. Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=271195f85bbce284ac80ed31c62fba9b7e74e99d --- libavcodec/mips/Makefile|3 +- libavcodec/mips/hevc_lpf_sao_msa.c | 2088 +++ libavcodec/mips/hevcdsp_init_mips.c | 26 + libavcodec/mips/hevcdsp_mips.h | 30 + libavutil/mips/generic_macros_msa.h | 111 +- 5 files changed, 2256 insertions(+), 2 deletions(-) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 463072a..b8bb1fe 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -26,7 +26,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uniw_msa.o \ mips/hevc_mc_bi_msa.o \ mips/hevc_mc_biw_msa.o\ - mips/hevc_idct_msa.o + mips/hevc_idct_msa.o \ + mips/hevc_lpf_sao_msa.o MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/hevc_lpf_sao_msa.c b/libavcodec/mips/hevc_lpf_sao_msa.c new file mode 100644 index 000..da1db51 --- /dev/null +++ b/libavcodec/mips/hevc_lpf_sao_msa.c @@ -0,0 +1,2088 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mips/generic_macros_msa.h" +#include "libavcodec/mips/hevcdsp_mips.h" + +static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride, + int32_t beta, int32_t *tc, + uint8_t *p_is_pcm, uint8_t *q_is_pcm) +{ +uint8_t *p3 = src - (stride << 2); +uint8_t *p2 = src - ((stride << 1) + stride); +uint8_t *p1 = src - (stride << 1); +uint8_t *p0 = src - stride; +uint8_t *q0 = src; +uint8_t *q1 = src + stride; +uint8_t *q2 = src + (stride << 1); +uint8_t *q3 = src + (stride << 1) + stride; +uint8_t flag0, flag1; +int32_t dp00, dq00, dp30, dq30, d00, d30; +int32_t dp04, dq04, dp34, dq34, d04, d34; +int32_t tc0, p_is_pcm0, q_is_pcm0, beta30, beta20, tc250; +int32_t tc4, p_is_pcm4, q_is_pcm4, tc254, tmp; +uint64_t dst_val0, dst_val1; +v16u8 dst0, dst1, dst2, dst3, dst4, dst5; +v2i64 cmp0, cmp1, cmp2, p_is_pcm_vec, q_is_pcm_vec; +v8u16 temp0, temp1; +v8i16 temp2; +v8i16 tc_pos, tc_neg; +v8i16 diff0, diff1, delta0, delta1, delta2, abs_delta0; +v16i8 zero = { 0 }; +v8u16 p3_src, p2_src, p1_src, p0_src, q0_src, q1_src, q2_src, q3_src; + +dp00 = abs(p2[0] - (p1[0] << 1) + p0[0]); +dq00 = abs(q2[0] - (q1[0] << 1) + q0[0]); +dp30 = abs(p2[3] - (p1[3] << 1) + p0[3]); +dq30 = abs(q2[3] - (q1[3] << 1) + q0[3]); +d00 = dp00 + dq00; +d30 = dp30 + dq30; +p_is_pcm0 = p_is_pcm[0]; +q_is_pcm0 = q_is_pcm[0]; +dp04 = abs(p2[4] - (p1[4] << 1) + p0[4]); +dq04 = abs(q2[4] - (q1[4] << 1) + q0[4]); +dp34 = abs(p2[7] - (p1[7] << 1) + p0[7]); +dq34 = abs(q2[7] - (q1[7] << 1) + q0[7]); +d04 = dp04 + dq04; +d34 = dp34 + dq34; +p_is_pcm4 = p_is_pcm[1]; +q_is_pcm4 = q_is_pcm[1]; + +if (!p_is_pcm0 || !p_is_pcm4 || !q_is_pcm0 || !q_i
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC idct functions
ffmpeg | branch: master | Shivraj Patil | Thu Jun 4 13:49:47 2015 +0530| [a34d902325895a1cecd322cbe94915225c91017a] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC idct functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC idct functions in new file hevc_idct_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a34d902325895a1cecd322cbe94915225c91017a --- libavcodec/mips/Makefile|3 +- libavcodec/mips/hevc_idct_msa.c | 939 +++ libavcodec/mips/hevcdsp_init_mips.c | 14 + libavcodec/mips/hevcdsp_mips.h | 23 + libavutil/mips/generic_macros_msa.h | 195 5 files changed, 1173 insertions(+), 1 deletion(-) diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 691ea35..463072a 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -25,7 +25,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o\ mips/hevc_mc_uni_msa.o\ mips/hevc_mc_uniw_msa.o \ mips/hevc_mc_bi_msa.o \ - mips/hevc_mc_biw_msa.o + mips/hevc_mc_biw_msa.o\ + mips/hevc_idct_msa.o MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o diff --git a/libavcodec/mips/hevc_idct_msa.c b/libavcodec/mips/hevc_idct_msa.c new file mode 100644 index 000..b5a4c5a --- /dev/null +++ b/libavcodec/mips/hevc_idct_msa.c @@ -0,0 +1,939 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mips/generic_macros_msa.h" +#include "libavcodec/mips/hevcdsp_mips.h" + +static int16_t gt8x8_cnst[16] = { +64, 64, 83, 36, 89, 50, 18, 75, 64, -64, 36, -83, 75, -89, -50, -18 +}; + +static int16_t gt16x16_cnst[64] = { +64, 83, 64, 36, 89, 75, 50, 18, 90, 80, 57, 25, 70, 87, 9, 43, +64, 36, -64, -83, 75, -18, -89, -50, 87, 9, -80, -70, -43, 57, -25, -90, +64, -36, -64, 83, 50, -89, 18, 75, 80, -70, -25, 90, -87, 9, 43, 57, +64, -83, 64, -36, 18, -50, 75, -89, 70, -87, 90, -80, 9, -43, -57, 25 +}; + +static int16_t gt32x32_cnst0[256] = { +90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, +90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13, +88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22, +85, 46, -13, -67, -90, -73, -22, 38, 82, 88, 54, -4, -61, -90, -78, -31, +82, 22, -54, -90, -61, 13, 78, 85, 31, -46, -90, -67, 4, 73, 88, 38, +78, -4, -82, -73, 13, 85, 67, -22, -88, -61, 31, 90, 54, -38, -90, -46, +73, -31, -90, -22, 78, 67, -38, -90, -13, 82, 61, -46, -88, -4, 85, 54, +67, -54, -78, 38, 85, -22, -90, 4, 90, 13, -88, -31, 82, 46, -73, -61, +61, -73, -46, 82, 31, -88, -13, 90, -4, -90, 22, 85, -38, -78, 54, 67, +54, -85, -4, 88, -46, -61, 82, 13, -90, 38, 67, -78, -22, 90, -31, -73, +46, -90, 38, 54, -90, 31, 61, -88, 22, 67, -85, 13, 73, -82, 4, 78, +38, -88, 73, -4, -67, 90, -46, -31, 85, -78, 13, 61, -90, 54, 22, -82, +31, -78, 90, -61, 4, 54, -88, 82, -38, -22, 73, -90, 67, -13, -46, 85, +22, -61, 85, -90, 73, -38, -4, 46, -78, 90, -82, 54, -13, -31, 67, -88, +13, -38, 61, -78, 88, -90, 85, -73, 54, -31, 4, 22, -46, 67, -82, 90, +4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90 +}; + +static int16_t gt32x32_cnst1[64] = { +90, 87, 80, 70, 57, 43, 25, 9, 87, 57, 9, -43, -80, -90, -70, -25, +80, 9, -70, -87, -25, 57, 90, 43, 70, -43, -87, 9, 90, 25, -80, -57, +57, -80, -25, 90, -9, -87, 43, 70, 43, -90, 57, 25, -87, 70, 9, -80, +25, -70, 90, -80, 43, 9, -57, 87, 9, -2
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC mc epel functions
ffmpeg | branch: master | Shivraj Patil | Tue Jun 2 14:26:11 2015 +0530| [c96c73b0b0520fc734554d34b4693ca7af4edd02] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC mc epel functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC mc epel functions. Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c96c73b0b0520fc734554d34b4693ca7af4edd02 --- libavcodec/mips/hevcdsp_init_mips.c | 32 + libavcodec/mips/hevcdsp_mips.h | 30 + libavcodec/mips/hevcdsp_msa.c | 2265 +++ 3 files changed, 2327 insertions(+) diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c index 5dc13fb..837c046 100644 --- a/libavcodec/mips/hevcdsp_init_mips.c +++ b/libavcodec/mips/hevcdsp_init_mips.c @@ -62,6 +62,38 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa; c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa; +c->put_hevc_epel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa; +c->put_hevc_epel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa; +c->put_hevc_epel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa; +c->put_hevc_epel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa; +c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa; +c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa; +c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa; + +c->put_hevc_epel[1][0][1] = ff_hevc_put_hevc_epel_h4_8_msa; +c->put_hevc_epel[2][0][1] = ff_hevc_put_hevc_epel_h6_8_msa; +c->put_hevc_epel[3][0][1] = ff_hevc_put_hevc_epel_h8_8_msa; +c->put_hevc_epel[4][0][1] = ff_hevc_put_hevc_epel_h12_8_msa; +c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_8_msa; +c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_8_msa; +c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_msa; + +c->put_hevc_epel[1][1][0] = ff_hevc_put_hevc_epel_v4_8_msa; +c->put_hevc_epel[2][1][0] = ff_hevc_put_hevc_epel_v6_8_msa; +c->put_hevc_epel[3][1][0] = ff_hevc_put_hevc_epel_v8_8_msa; +c->put_hevc_epel[4][1][0] = ff_hevc_put_hevc_epel_v12_8_msa; +c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_8_msa; +c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_8_msa; +c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_msa; + +c->put_hevc_epel[1][1][1] = ff_hevc_put_hevc_epel_hv4_8_msa; +c->put_hevc_epel[2][1][1] = ff_hevc_put_hevc_epel_hv6_8_msa; +c->put_hevc_epel[3][1][1] = ff_hevc_put_hevc_epel_hv8_8_msa; +c->put_hevc_epel[4][1][1] = ff_hevc_put_hevc_epel_hv12_8_msa; +c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_8_msa; +c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_8_msa; +c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_msa; + c->put_hevc_qpel_uni[3][0][0] = ff_hevc_put_hevc_uni_pel_pixels8_8_msa; c->put_hevc_qpel_uni[4][0][0] = ff_hevc_put_hevc_uni_pel_pixels12_8_msa; c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels16_8_msa; diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h index 64605e6..389f025 100644 --- a/libavcodec/mips/hevcdsp_mips.h +++ b/libavcodec/mips/hevcdsp_mips.h @@ -66,6 +66,36 @@ MC(qpel, hv, 32); MC(qpel, hv, 48); MC(qpel, hv, 64); +MC(epel, h, 4); +MC(epel, h, 6); +MC(epel, h, 8); +MC(epel, h, 12); +MC(epel, h, 16); +MC(epel, h, 24); +MC(epel, h, 32); +MC(epel, h, 48); +MC(epel, h, 64); + +MC(epel, v, 4); +MC(epel, v, 6); +MC(epel, v, 8); +MC(epel, v, 12); +MC(epel, v, 16); +MC(epel, v, 24); +MC(epel, v, 32); +MC(epel, v, 48); +MC(epel, v, 64); + +MC(epel, hv, 4); +MC(epel, hv, 6); +MC(epel, hv, 8); +MC(epel, hv, 12); +MC(epel, hv, 16); +MC(epel, hv, 24); +MC(epel, hv, 32); +MC(epel, hv, 48); +MC(epel, hv, 64); + #undef MC #define UNI_MC(PEL, DIR, WIDTH) \ diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c index 1ecef0a..ed3acbb 100644 --- a/libavcodec/mips/hevcdsp_msa.c +++ b/libavcodec/mips/hevcdsp_msa.c @@ -1526,6 +1526,2247 @@ static void hevc_hv_8t_64w_msa(uint8_t *src, int32_t src_stride, filter_x, filter_y, height, 64); } +static void hevc_hz_4t_4x2_msa(uint8_t *src, + int32_t src_stride, + int16_t *dst, + int32_t dst_stride, + const int8_t *filter) +{ +v8i16 filt0, filt1; +v16i8 src0, src1; +v16i8 mask1, vec0, vec1; +v8i16 dst0; +v8
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni mc epel functions
ffmpeg | branch: master | Shivraj Patil | Tue Jun 2 14:26:12 2015 +0530| [aef34ab95048aade062d4c669ea272e0f08b81a4] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni mc epel functions This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC uni mc epel functions. Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aef34ab95048aade062d4c669ea272e0f08b81a4 --- libavcodec/mips/hevc_mc_uni_msa.c | 2331 +-- libavcodec/mips/hevcdsp_init_mips.c | 30 + libavcodec/mips/hevcdsp_mips.h | 30 + libavutil/mips/generic_macros_msa.h | 51 + 4 files changed, 2364 insertions(+), 78 deletions(-) diff --git a/libavcodec/mips/hevc_mc_uni_msa.c b/libavcodec/mips/hevc_mc_uni_msa.c index 09179d2..7d02ce8 100644 --- a/libavcodec/mips/hevc_mc_uni_msa.c +++ b/libavcodec/mips/hevc_mc_uni_msa.c @@ -319,6 +319,44 @@ uint8_t mc_filt_mask_arr[16 * 3] = { res7_m, out0, out1, out2, out3); \ } +#define FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1) \ +( { \ +v8i16 tmp0; \ +\ +tmp0 = __msa_dotp_s_h((v16i8) vec0, (v16i8) filt0); \ +tmp0 = __msa_dpadd_s_h(tmp0, (v16i8) vec1, (v16i8) filt1); \ +\ +tmp0; \ +} ) + +#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, \ + mask0, mask1, filt0, filt1, \ + out0, out1) \ +{ \ +v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \ + \ +VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \ +DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1); \ +VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \ +DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1);\ +} + +#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3,\ + mask0, mask1, filt0, filt1,\ + out0, out1, out2, out3)\ +{ \ +v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \ + \ +VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \ +VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \ +DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \ +out0, out1, out2, out3); \ +VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m); \ +VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m); \ +DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1, \ + out0, out1, out2, out3); \ +} + static void common_hz_8t_4x4_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int8_t *filter, uint8_t rnd_val) @@ -1696,94 +1734,2231 @@ static void hevc_hv_uni_8t_64w_msa(uint8_t *src, filter_x, filter_y, height, 64); } -#define UNI_MC_COPY(WIDTH) \ -void ff_hevc_put_hevc_uni_pel_pixels##WIDTH##_8_msa(uint8_t *dst, \ -ptrdiff_t dst_stride, \ -uint8_t *src, \ -ptrdiff_t src_stride, \ -int height,\ -intptr_t mx, \ -intptr_t my, \ -int width) \ -{ \ -copy_width##WIDTH##_msa(src, src_stride, dst, dst_stride, height); \ +static void common_hz_4t_4x2_msa(uint8_t *src, int32_t src_stride, + uint8_t *dst, int32_t dst_str
[FFmpeg-cvslog] avcodec/mips/hevcdsp_msa: Restructure as per avutil/mips/ generic_macros_msa.h
ffmpeg | branch: master | Shivraj Patil | Thu May 28 20:32:42 2015 +0530| [7b45790771c0db8b5039ff54ee3cfe68d40e1302] | committer: Michael Niedermayer avcodec/mips/hevcdsp_msa: Restructure as per avutil/mips/generic_macros_msa.h This patch modifies HEVC mc MIPS-SIMD optimized code according to improved version of generic macros. Overall, this patch is just upgrading the code with styling changes and will bring it in sync with MIPS-SIMD optimized latest codebase at our end. Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7b45790771c0db8b5039ff54ee3cfe68d40e1302 --- libavcodec/mips/hevcdsp_msa.c | 2428 ++--- 1 file changed, 842 insertions(+), 1586 deletions(-) diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c index fcc344b..1ecef0a 100644 --- a/libavcodec/mips/hevcdsp_msa.c +++ b/libavcodec/mips/hevcdsp_msa.c @@ -20,405 +20,181 @@ #include "libavutil/mips/generic_macros_msa.h" #include "libavcodec/mips/hevcdsp_mips.h" +#include "libavcodec/mips/hevc_macros_msa.h" -#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\ - filt0, filt1, filt2, filt3)\ -( { \ -v4i32 out;\ - \ -out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\ -out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1)); \ -out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2)); \ -out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3)); \ -out; \ -} ) - -#define HEVC_FILT_8TAP_DPADD_H(vec0, vec1, vec2, vec3, \ - filt0, filt1, filt2, filt3, \ - var_in) \ -( { \ -v8i16 out; \ - \ -out = __msa_dpadd_s_h((v8i16) (var_in), (v16i8) (vec0), (v16i8) (filt0)); \ -out = __msa_dpadd_s_h(out, (v16i8) (vec1), (v16i8) (filt1)); \ -out = __msa_dpadd_s_h(out, (v16i8) (vec2), (v16i8) (filt2)); \ -out = __msa_dpadd_s_h(out, (v16i8) (vec3), (v16i8) (filt3)); \ -out; \ -} ) - -static void hevc_copy_4w_msa(uint8_t * __restrict src, int32_t src_stride, - int16_t * __restrict dst, int32_t dst_stride, +static void hevc_copy_4w_msa(uint8_t *src, int32_t src_stride, + int16_t *dst, int32_t dst_stride, int32_t height) { v16i8 zero = { 0 }; if (2 == height) { -uint64_t out0, out1; v16i8 src0, src1; -v8i16 input0; +v8i16 in0; -LOAD_2VECS_SB(src, src_stride, src0, src1); +LD_SB2(src, src_stride, src0, src1); src0 = (v16i8) __msa_ilvr_w((v4i32) src1, (v4i32) src0); - -input0 = (v8i16) __msa_ilvr_b(zero, src0); - -input0 <<= 6; - -out0 = __msa_copy_u_d((v2i64) input0, 0); -out1 = __msa_copy_u_d((v2i64) input0, 1); - -STORE_DWORD(dst, out0); -dst += dst_stride; -STORE_DWORD(dst, out1); +in0 = (v8i16) __msa_ilvr_b(zero, src0); +in0 <<= 6; +ST8x2_UB(in0, dst, 2 * dst_stride); } else if (4 == height) { -uint64_t out0, out1, out2, out3; v16i8 src0, src1, src2, src3; -v8i16 input0, input1; +v8i16 in0, in1; -LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3); +LD_SB4(src, src_stride, src0, src1, src2, src3); -src0 = (v16i8) __msa_ilvr_w((v4i32) src1, (v4i32) src0); -src1 = (v16i8) __msa_ilvr_w((v4i32) src3, (v4i32) src2); - -input0 = (v8i16) __msa_ilvr_b(zero, src0); -input1 = (v8i16) __msa_ilvr_b(zero, src1); - -input0 <<= 6; -input1 <<= 6; - -out0 = __msa_copy_u_d((v2i64) input0, 0); -out1 = __msa_copy_u_d((v2i64) input0, 1); -out2 = __msa_copy_u_d((v2i64) input1, 0); -out3 = __msa_copy_u_d((v2i64) input1, 1); - -STORE_DWORD(dst, out0); -dst += dst_stride; -STORE_DWORD(dst, out1); -dst += dst_stride; -STORE_DWORD(dst, out2); -dst += dst_stride; -STORE_DWORD(dst, out3); +ILVR_W2_SB(src1, src0, src3, src2, src0, src1); +ILVR_B2_SH(zero, src0, zero, src1, in0, in1); +in0 <<= 6; +
[FFmpeg-cvslog] avcodec/mips: Restructure as per avutil/mips/generic_macros_msa.h
ffmpeg | branch: master | Shivraj Patil | Thu May 28 15:01:25 2015 +0530| [bcd7bf7eeb09a395cc01698842d1b8be9af483fc] | committer: Michael Niedermayer avcodec/mips: Restructure as per avutil/mips/generic_macros_msa.h This patch modifies H264 loopfilter, weighted & bi-weighted prediction MIPS-SIMD optimized code according to improved version of generic macros. Also there are minor code alignment changes. Overall, this patch is just upgrading the code with styling changes and will bring it in sync with MIPS-SIMD optimized latest codebase at our end. Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bcd7bf7eeb09a395cc01698842d1b8be9af483fc --- libavcodec/mips/h264dsp_msa.c | 1758 +++-- 1 file changed, 634 insertions(+), 1124 deletions(-) diff --git a/libavcodec/mips/h264dsp_msa.c b/libavcodec/mips/h264dsp_msa.c index f728fcc..fce01ac 100644 --- a/libavcodec/mips/h264dsp_msa.c +++ b/libavcodec/mips/h264dsp_msa.c @@ -21,19 +21,16 @@ #include "libavutil/mips/generic_macros_msa.h" #include "h264dsp_mips.h" -static void avc_wgt_4x2_msa(uint8_t *data, -int32_t stride, -int32_t log2_denom, -int32_t src_weight, +static void avc_wgt_4x2_msa(uint8_t *data, int32_t stride, +int32_t log2_denom, int32_t src_weight, int32_t offset_in) { uint32_t data0, data1; v16u8 zero = { 0 }; v16u8 src0, src1; v4i32 res0, res1; -v8i16 temp0, temp1; -v16u8 vec0, vec1; -v8i16 wgt, denom, offset; +v8i16 temp0, temp1, vec0, vec1, wgt, denom, offset; +v8u16 out0, out1; offset_in <<= (log2_denom); @@ -45,53 +42,40 @@ static void avc_wgt_4x2_msa(uint8_t *data, offset = __msa_fill_h(offset_in); denom = __msa_fill_h(log2_denom); -data0 = LOAD_WORD(data); -data1 = LOAD_WORD(data + stride); +data0 = LW(data); +data1 = LW(data + stride); src0 = (v16u8) __msa_fill_w(data0); src1 = (v16u8) __msa_fill_w(data1); -ILVR_B_2VECS_UB(src0, src1, zero, zero, vec0, vec1); +ILVR_B2_SH(zero, src0, zero, src1, vec0, vec1); +MUL2(wgt, vec0, wgt, vec1, temp0, temp1); +ADDS_SH2_SH(temp0, offset, temp1, offset, temp0, temp1); +MAXI_SH2_SH(temp0, temp1, 0); -temp0 = wgt * (v8i16) vec0; -temp1 = wgt * (v8i16) vec1; +out0 = (v8u16) __msa_srl_h(temp0, denom); +out1 = (v8u16) __msa_srl_h(temp1, denom); -temp0 = __msa_adds_s_h(temp0, offset); -temp1 = __msa_adds_s_h(temp1, offset); - -temp0 = __msa_maxi_s_h(temp0, 0); -temp1 = __msa_maxi_s_h(temp1, 0); - -temp0 = __msa_srl_h(temp0, denom); -temp1 = __msa_srl_h(temp1, denom); - -temp0 = (v8i16) __msa_sat_u_h((v8u16) temp0, 7); -temp1 = (v8i16) __msa_sat_u_h((v8u16) temp1, 7); - -res0 = (v4i32) __msa_pckev_b((v16i8) temp0, (v16i8) temp0); -res1 = (v4i32) __msa_pckev_b((v16i8) temp1, (v16i8) temp1); +SAT_UH2_UH(out0, out1, 7); +PCKEV_B2_SW(out0, out0, out1, out1, res0, res1); data0 = __msa_copy_u_w(res0, 0); data1 = __msa_copy_u_w(res1, 0); - -STORE_WORD(data, data0); +SW(data0, data); data += stride; -STORE_WORD(data, data1); +SW(data1, data); } -static void avc_wgt_4x4multiple_msa(uint8_t *data, -int32_t stride, -int32_t height, -int32_t log2_denom, -int32_t src_weight, -int32_t offset_in) +static void avc_wgt_4x4multiple_msa(uint8_t *data, int32_t stride, +int32_t height, int32_t log2_denom, +int32_t src_weight, int32_t offset_in) { uint8_t cnt; uint32_t data0, data1, data2, data3; v16u8 zero = { 0 }; v16u8 src0, src1, src2, src3; -v8u16 temp0, temp1, temp2, temp3; -v8i16 wgt, denom, offset; +v8u16 temp0, temp1, temp2, temp3, wgt; +v8i16 denom, offset; offset_in <<= (log2_denom); @@ -99,63 +83,47 @@ static void avc_wgt_4x4multiple_msa(uint8_t *data, offset_in += (1 << (log2_denom - 1)); } -wgt = __msa_fill_h(src_weight); +wgt = (v8u16) __msa_fill_h(src_weight); offset = __msa_fill_h(offset_in); denom = __msa_fill_h(log2_denom); for (cnt = height / 4; cnt--;) { -LOAD_4WORDS_WITH_STRIDE(data, stride, data0, data1, data2, data3); +LW4(data, stride, data0, data1, data2, data3); src0 = (v16u8) __msa_fill_w(data0); src1 = (v16u8) __msa_fill_w(data1); src2 = (v16u8) __msa_fill_w(data2); src3 = (v16u8) __msa_fill_w(data3); -ILVR_B_4VECS_UH(src0, src1, src2, src3, zero, zero, zero, zero, -
[FFmpeg-cvslog] avutil/mips: Restructure of generic macros
ffmpeg | branch: master | Shivraj Patil | Thu May 28 14:54:53 2015 +0530| [02a49912301fa6eac68fb790255275897fc8a971] | committer: Michael Niedermayer avutil/mips: Restructure of generic macros This patch includes restructuring of existing macros and addition of more generic macros. This change was necessary to avoid repeated review comments in remaining patches which we were about to submit. Also this patch reduces number of code lines due to maximum use of generic macros, allows better code alignment & readability etc. These modifications in commonly used .libavutil/mips/generic_macros_msa.h. impacts the already accepted code, hence re-submitting it in 2/4,3/4 & 4/4. Overall, this patch set is just upgrading the code with styling changes and will bring it in sync with MIPS-SIMD optimized latest codebase at our end. Signed-off-by: Shivraj Patil Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=02a49912301fa6eac68fb790255275897fc8a971 --- libavutil/mips/generic_macros_msa.h | 2570 +-- 1 file changed, 1279 insertions(+), 1291 deletions(-) diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index 48dc78e..fbe7abf 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -24,1403 +24,1391 @@ #include #include -#define LOAD_UB(psrc) \ -( { \ -v16u8 out_m; \ -out_m = *((v16u8 *) (psrc)); \ -out_m;\ -} ) - -#define LOAD_SB(psrc) \ -( { \ -v16i8 out_m; \ -out_m = *((v16i8 *) (psrc)); \ -out_m;\ -} ) +#define LD_B(RTYPE, psrc) *((RTYPE *)(psrc)) +#define LD_UB(...) LD_B(v16u8, __VA_ARGS__) +#define LD_SB(...) LD_B(v16i8, __VA_ARGS__) -#define LOAD_UH(psrc) *((const v8u16 *)(psrc)) +#define LD_H(RTYPE, psrc) *((RTYPE *)(psrc)) +#define LD_SH(...) LD_H(v8i16, __VA_ARGS__) -#define LOAD_SH(psrc) \ -( { \ -v8i16 out_m; \ -out_m = *((v8i16 *) (psrc)); \ -out_m;\ -} ) +#define LD_W(RTYPE, psrc) *((RTYPE *)(psrc)) +#define LD_SW(...) LD_W(v4i32, __VA_ARGS__) -#define LOAD_SW(psrc) *((const v4i32 *)(psrc)) +#define ST_B(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in) +#define ST_UB(...) ST_B(v16u8, __VA_ARGS__) -#define STORE_UB(vec, pdest) *((v16u8 *)(pdest)) = (vec) -#define STORE_SB(vec, pdest) *((v16i8 *)(pdest)) = (vec) +#define ST_H(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in) +#define ST_SH(...) ST_H(v8i16, __VA_ARGS__) -#define STORE_SH(vec, pdest) \ -{ \ -*((v8i16 *) (pdest)) = (vec); \ -} - -#define STORE_SW(vec, pdest) \ -{ \ -*((v4i32 *) (pdest)) = (vec); \ -} +#define ST_W(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in) +#define ST_SW(...) ST_W(v4i32, __VA_ARGS__) #if (__mips_isa_rev >= 6) -#define LOAD_WORD(psrc) \ -( { \ -uint8_t *src_m = (uint8_t *) (psrc); \ -uint32_t val_m; \ - \ -__asm__ volatile (\ -"lw %[val_m], %[src_m] \n\t" \ - \ -: [val_m] "=r" (val_m)\ -: [src_m] "m" (*src_m)\ -);\ - \ -val_m;\ +#define LW(psrc) \ +( {\ +uint8_t *psrc_m = (uint8_t *) (psrc); \ +uint32_t val_m;\ + \ +__asm__ volatile ( \ +"lw %[val_m], %[psrc_m] \n\t" \ + \ +: [val_m] "=r" (val_m) \ +: [psrc_m] "m" (*psrc_m) \ +); \ + \ +val_m; \ } ) #if (__mips == 64) -#define LOAD_DWORD(psrc) \ -( { \ -uint8_t *src_m = (uint8_t *) (psrc); \ -uint64_t val_m = 0; \ - \ -__asm__ volatile (\ -"ld %[val_m], %[src_m] \n\t" \ - \ -
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni hv mc functions
ffmpeg | branch: master | Shivraj Patil | Fri May 8 13:50:01 2015 +0530| [8252f63d1b982fb8adeb3ac3a79406e3cb422650] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni hv mc functions Signed-off-by: Shivraj Patil Reviewed-by: Nedeljko Babic Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8252f63d1b982fb8adeb3ac3a79406e3cb422650 --- libavcodec/mips/hevcdsp_init_mips.c |9 + libavcodec/mips/hevcdsp_mips.h |9 + libavcodec/mips/hevcdsp_msa.c | 512 +++ 3 files changed, 530 insertions(+) diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c index 1e22f35..d2e3c60 100644 --- a/libavcodec/mips/hevcdsp_init_mips.c +++ b/libavcodec/mips/hevcdsp_init_mips.c @@ -87,6 +87,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_msa; c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_msa; c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_msa; + +c->put_hevc_qpel_uni[1][1][1] = ff_hevc_put_hevc_uni_qpel_hv4_8_msa; +c->put_hevc_qpel_uni[3][1][1] = ff_hevc_put_hevc_uni_qpel_hv8_8_msa; +c->put_hevc_qpel_uni[4][1][1] = ff_hevc_put_hevc_uni_qpel_hv12_8_msa; +c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_8_msa; +c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_8_msa; +c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_8_msa; +c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_8_msa; +c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_8_msa; } } #endif // #if HAVE_MSA diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h index 76a6784..a8c8848 100644 --- a/libavcodec/mips/hevcdsp_mips.h +++ b/libavcodec/mips/hevcdsp_mips.h @@ -106,4 +106,13 @@ UNI_MC(qpel, v, 32); UNI_MC(qpel, v, 48); UNI_MC(qpel, v, 64); +UNI_MC(qpel, hv, 4); +UNI_MC(qpel, hv, 8); +UNI_MC(qpel, hv, 12); +UNI_MC(qpel, hv, 16); +UNI_MC(qpel, hv, 24); +UNI_MC(qpel, hv, 32); +UNI_MC(qpel, hv, 48); +UNI_MC(qpel, hv, 64); + #undef UNI_MC diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c index d0e6f64..781264d 100644 --- a/libavcodec/mips/hevcdsp_msa.c +++ b/libavcodec/mips/hevcdsp_msa.c @@ -46,6 +46,24 @@ out; \ } ) +#define HEVC_RND_W_CLIP_UNSIGNED_CHAR_W_VEC2(vec0_r, vec0_l, \ + vec1_r, vec1_l, \ + out0, out1) \ +{ \ +(vec0_r) = __msa_srari_w((vec0_r), 6); \ +(vec0_l) = __msa_srari_w((vec0_l), 6); \ +(vec1_r) = __msa_srari_w((vec1_r), 6); \ +(vec1_l) = __msa_srari_w((vec1_l), 6); \ + \ +(vec0_r) = CLIP_UNSIGNED_CHAR_W((vec0_r)); \ +(vec0_l) = CLIP_UNSIGNED_CHAR_W((vec0_l)); \ +(vec1_r) = CLIP_UNSIGNED_CHAR_W((vec1_r)); \ +(vec1_l) = CLIP_UNSIGNED_CHAR_W((vec1_l)); \ + \ +out0 = (v4i32) __msa_pckev_h((v8i16) (vec0_l), (v8i16) (vec0_r)); \ +out1 = (v4i32) __msa_pckev_h((v8i16) (vec1_l), (v8i16) (vec1_r)); \ +} + static void hevc_copy_4w_msa(uint8_t * __restrict src, int32_t src_stride, int16_t * __restrict dst, int32_t dst_stride, int32_t height) @@ -2270,6 +2288,469 @@ static void hevc_hv_8t_64w_msa(uint8_t * __restrict src, int32_t src_stride, filter_x, filter_y, height, 64); } +static void hevc_hv_uni_8t_4w_msa(uint8_t * __restrict src, + int32_t src_stride, + uint8_t * __restrict dst, + int32_t dst_stride, + const int8_t * __restrict filter_x, + const int8_t * __restrict filter_y, + int32_t height) +{ +uint32_t loop_cnt; +uint32_t out0, out1; +v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; +v8i16 filt0, filt1, filt2, filt3, filter_vec; +v4i32 filt_h0, filt_h1, filt_h2, filt_h3; +v16i8 mask1, mask2, mask3; +v8u16 const_vec; +v16i8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; +v16i8 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15; +
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni copy, uni horizontal and uni vertical mc functions
ffmpeg | branch: master | Shivraj Patil | Mon May 4 18:09:34 2015 +0530| [7174df44fe7b27c85637438ee0052d9d9ff8f382] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC uni copy, uni horizontal and uni vertical mc functions Signed-off-by: Shivraj Patil Reviewed-by: Nedeljko Babic Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7174df44fe7b27c85637438ee0052d9d9ff8f382 --- libavcodec/mips/hevcdsp_init_mips.c | 26 + libavcodec/mips/hevcdsp_mips.h | 40 + libavcodec/mips/hevcdsp_msa.c | 1822 +++ libavutil/mips/generic_macros_msa.h | 533 ++ 4 files changed, 2421 insertions(+) diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c index 4fec336..1e22f35 100644 --- a/libavcodec/mips/hevcdsp_init_mips.c +++ b/libavcodec/mips/hevcdsp_init_mips.c @@ -61,6 +61,32 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa; c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa; c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa; + +c->put_hevc_qpel_uni[3][0][0] = ff_hevc_put_hevc_uni_pel_pixels8_8_msa; +c->put_hevc_qpel_uni[4][0][0] = ff_hevc_put_hevc_uni_pel_pixels12_8_msa; +c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels16_8_msa; +c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels24_8_msa; +c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_msa; +c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_msa; +c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_msa; + +c->put_hevc_qpel_uni[1][0][1] = ff_hevc_put_hevc_uni_qpel_h4_8_msa; +c->put_hevc_qpel_uni[3][0][1] = ff_hevc_put_hevc_uni_qpel_h8_8_msa; +c->put_hevc_qpel_uni[4][0][1] = ff_hevc_put_hevc_uni_qpel_h12_8_msa; +c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_8_msa; +c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_8_msa; +c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_msa; +c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_msa; +c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_msa; + +c->put_hevc_qpel_uni[1][1][0] = ff_hevc_put_hevc_uni_qpel_v4_8_msa; +c->put_hevc_qpel_uni[3][1][0] = ff_hevc_put_hevc_uni_qpel_v8_8_msa; +c->put_hevc_qpel_uni[4][1][0] = ff_hevc_put_hevc_uni_qpel_v12_8_msa; +c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_8_msa; +c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_8_msa; +c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_msa; +c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_msa; +c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_msa; } } #endif // #if HAVE_MSA diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h index 4f7f273..76a6784 100644 --- a/libavcodec/mips/hevcdsp_mips.h +++ b/libavcodec/mips/hevcdsp_mips.h @@ -67,3 +67,43 @@ MC(qpel, hv, 48); MC(qpel, hv, 64); #undef MC + +#define UNI_MC(PEL, DIR, WIDTH) \ +void ff_hevc_put_hevc_uni_##PEL##_##DIRWIDTH##_8_msa(uint8_t *dst, \ + ptrdiff_t dst_stride, \ + uint8_t *src, \ + ptrdiff_t src_stride, \ + int height, \ + intptr_t mx, \ + intptr_t my, \ + int width) + +UNI_MC(pel, pixels, 4); +UNI_MC(pel, pixels, 6); +UNI_MC(pel, pixels, 8); +UNI_MC(pel, pixels, 12); +UNI_MC(pel, pixels, 16); +UNI_MC(pel, pixels, 24); +UNI_MC(pel, pixels, 32); +UNI_MC(pel, pixels, 48); +UNI_MC(pel, pixels, 64); + +UNI_MC(qpel, h, 4); +UNI_MC(qpel, h, 8); +UNI_MC(qpel, h, 12); +UNI_MC(qpel, h, 16); +UNI_MC(qpel, h, 24); +UNI_MC(qpel, h, 32); +UNI_MC(qpel, h, 48); +UNI_MC(qpel, h, 64); + +UNI_MC(qpel, v, 4); +UNI_MC(qpel, v, 8); +UNI_MC(qpel, v, 12); +UNI_MC(qpel, v, 16); +UNI_MC(qpel, v, 24); +UNI_MC(qpel, v, 32); +UNI_MC(qpel, v, 48); +UNI_MC(qpel, v, 64); + +#undef UNI_MC diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c index fcc344b..d0e6f64 100644 --- a/libavcodec/mips/hevcdsp_msa.c +++ b/libavcodec/mips/hevcdsp_msa.c @@ -2270,6 +2270,1767 @@ static void hevc_hv_8t_64w_msa(uint8_
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions
ffmpeg | branch: master | Shivraj Patil | Wed Apr 22 14:52:44 2015 +0530| [97f074f134048276db2f2c552e6e6b24fe0a6894] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC copy and hv mc functions Incorporated review comment. Removed "__" from volatile. Signed-off-by: Shivraj Patil Reviewed-by: Nedeljko Babic Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=97f074f134048276db2f2c552e6e6b24fe0a6894 --- libavcodec/mips/hevcdsp_init_mips.c | 19 + libavcodec/mips/hevcdsp_mips.h | 20 + libavcodec/mips/hevcdsp_msa.c | 1098 +++ libavutil/mips/generic_macros_msa.h | 133 + 4 files changed, 1270 insertions(+) diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c index 05ed81f..4fec336 100644 --- a/libavcodec/mips/hevcdsp_init_mips.c +++ b/libavcodec/mips/hevcdsp_init_mips.c @@ -25,6 +25,16 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, const int bit_depth) { if (8 == bit_depth) { +c->put_hevc_qpel[1][0][0] = ff_hevc_put_hevc_pel_pixels4_8_msa; +c->put_hevc_qpel[2][0][0] = ff_hevc_put_hevc_pel_pixels6_8_msa; +c->put_hevc_qpel[3][0][0] = ff_hevc_put_hevc_pel_pixels8_8_msa; +c->put_hevc_qpel[4][0][0] = ff_hevc_put_hevc_pel_pixels12_8_msa; +c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_8_msa; +c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_8_msa; +c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_msa; +c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_msa; +c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_msa; + c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa; c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa; c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa; @@ -42,6 +52,15 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa; c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa; c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa; + +c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_msa; +c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_msa; +c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_msa; +c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_msa; +c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_msa; +c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_msa; +c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_msa; +c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_msa; } } #endif // #if HAVE_MSA diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h index 13cdb5b..4f7f273 100644 --- a/libavcodec/mips/hevcdsp_mips.h +++ b/libavcodec/mips/hevcdsp_mips.h @@ -29,6 +29,16 @@ void ff_hevc_put_hevc_##PEL##_##DIRWIDTH##_8_msa(int16_t *dst, \ intptr_t my, \ int width) +MC(pel, pixels, 4); +MC(pel, pixels, 6); +MC(pel, pixels, 8); +MC(pel, pixels, 12); +MC(pel, pixels, 16); +MC(pel, pixels, 24); +MC(pel, pixels, 32); +MC(pel, pixels, 48); +MC(pel, pixels, 64); + MC(qpel, h, 4); MC(qpel, h, 8); MC(qpel, h, 12); @@ -46,4 +56,14 @@ MC(qpel, v, 24); MC(qpel, v, 32); MC(qpel, v, 48); MC(qpel, v, 64); + +MC(qpel, hv, 4); +MC(qpel, hv, 8); +MC(qpel, hv, 12); +MC(qpel, hv, 16); +MC(qpel, hv, 24); +MC(qpel, hv, 32); +MC(qpel, hv, 48); +MC(qpel, hv, 64); + #undef MC diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c index 88e97d6..fcc344b 100644 --- a/libavcodec/mips/hevcdsp_msa.c +++ b/libavcodec/mips/hevcdsp_msa.c @@ -21,6 +21,18 @@ #include "libavutil/mips/generic_macros_msa.h" #include "libavcodec/mips/hevcdsp_mips.h" +#define HEVC_FILT_8TAP_DPADD_W(vec0, vec1, vec2, vec3,\ + filt0, filt1, filt2, filt3)\ +( { \ +v4i32 out;\ + \ +out = __msa_dotp_s_w((v8i16) (vec0), (v8i16) (filt0));\ +out = __msa_dpadd_s_w(out, (v8i16) (vec1), (v8i16) (filt1)); \ +out = __msa_dpadd_s_w(out, (v8i16) (vec2), (v8i16) (filt2)); \ +out = __msa_dpadd_s_w(out, (v8i16) (vec3), (v8i16) (filt3)); \ +out; \ +} ) + #define HEVC_FILT_8TAP_DPADD_H(vec0, v
[FFmpeg-cvslog] Makefile: Add support for MSA (MIPS-SIMD-Arch)
ffmpeg | branch: master | Shivraj Patil | Fri Apr 17 19:00:27 2015 +0530| [35a7170e69b31f0299a85261bf48babdc5d4883f] | committer: Michael Niedermayer Makefile: Add support for MSA (MIPS-SIMD-Arch) Signed-off-by: Shivraj Patil Reviewed-by: Nedeljko Babic Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=35a7170e69b31f0299a85261bf48babdc5d4883f --- Makefile |2 +- arch.mak |1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ca2ce59..fe0e02f 100644 --- a/Makefile +++ b/Makefile @@ -80,7 +80,7 @@ SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS TOOLS \ HEADERS ARCH_HEADERS BUILT_HEADERS SKIPHEADERS\ ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \ ALTIVEC-OBJS MMX-OBJS YASM-OBJS \ - MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSPR1-OBJS\ + MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSPR1-OBJS MSA-OBJS \ OBJS SLIBOBJS HOSTOBJS TESTOBJS define RESET diff --git a/arch.mak b/arch.mak index 48bc2d3..39f2917 100644 --- a/arch.mak +++ b/arch.mak @@ -7,6 +7,7 @@ OBJS-$(HAVE_NEON)+= $(NEON-OBJS)$(NEON-OBJS-yes) OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS)$(MIPSFPU-OBJS-yes) OBJS-$(HAVE_MIPSDSPR1) += $(MIPSDSPR1-OBJS) $(MIPSDSPR1-OBJS-yes) OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes) +OBJS-$(HAVE_MSA) += $(MSA-OBJS)$(MSA-OBJS-yes) OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC horizontal and vertical mc functions
ffmpeg | branch: master | Shivraj Patil | Fri Apr 17 19:00:28 2015 +0530| [4efc0e6451fa77e1e1d5b4b3873917c1916765f1] | committer: Michael Niedermayer avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC horizontal and vertical mc functions Signed-off-by: Shivraj Patil Reviewed-by: Nedeljko Babic Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4efc0e6451fa77e1e1d5b4b3873917c1916765f1 --- libavcodec/hevcdsp.c|2 + libavcodec/hevcdsp.h|1 + libavcodec/mips/Makefile|2 + libavcodec/mips/hevcdsp_init_mips.c | 54 ++ libavcodec/mips/hevcdsp_mips.h | 49 ++ libavcodec/mips/hevcdsp_msa.c | 1259 +++ libavutil/mips/generic_macros_msa.h | 285 7 files changed, 1652 insertions(+) diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index 04af178..be01e92 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -261,4 +261,6 @@ int i = 0; ff_hevc_dsp_init_x86(hevcdsp, bit_depth); if (ARCH_ARM) ff_hevcdsp_init_arm(hevcdsp, bit_depth); +if (ARCH_MIPS) +ff_hevc_dsp_init_mips(hevcdsp, bit_depth); } diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index a891ea7..d2ea867 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -129,4 +129,5 @@ extern const int8_t ff_hevc_qpel_filters[3][16]; void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); void ff_hevcdsp_init_arm(HEVCDSPContext *c, const int bit_depth); +void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth); #endif /* AVCODEC_HEVCDSP_H */ diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 8e2459f..b41d8c7 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -18,3 +18,5 @@ OBJS-$(CONFIG_AAC_DECODER)+= mips/aacdec_mips.o\ mips/aacpsdsp_mips.o MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER) += mips/aaccoder_mips.o MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o +OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o +MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c new file mode 100644 index 000..05ed81f --- /dev/null +++ b/libavcodec/mips/hevcdsp_init_mips.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/mips/hevcdsp_mips.h" + +#if HAVE_MSA +static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c, + const int bit_depth) +{ +if (8 == bit_depth) { +c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_msa; +c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_msa; +c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_msa; +c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_msa; +c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_8_msa; +c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_msa; +c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_msa; +c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_msa; + +c->put_hevc_qpel[1][1][0] = ff_hevc_put_hevc_qpel_v4_8_msa; +c->put_hevc_qpel[3][1][0] = ff_hevc_put_hevc_qpel_v8_8_msa; +c->put_hevc_qpel[4][1][0] = ff_hevc_put_hevc_qpel_v12_8_msa; +c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_8_msa; +c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_8_msa; +c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_msa; +c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_msa; +c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_msa; +} +} +#endif // #if HAVE_MSA + +void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth) +{ +#if HAVE_MSA +hevc_dsp_init_msa(c, bit_depth); +#endif // #if HAVE_MSA +} diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h new file mode 1006