[FFmpeg-devel] 回复: Re: [PATCH 1/7] configure: loongson disable mipsfpu and mipsdsp optimizations

2016-10-10 Thread
Hi,
1.The optimizations of mipsdsp are not supported by all loongson cpu.
2.The optimizations of mipsfpu and mipsdspr2 maybe supported by 
3A2000/3A3000/3A4000 but not tested yet. 3.Loongson only support mmi 
(loongSIMD) optimizations now.






在2016年10月10日 19:20 ,Michael Niedermayer写道:

On Mon, Oct 10, 2016 at 04:07:20PM +0800, 周晓勇 wrote:
> From efccaec2b5b543eb98f0400fdaef88d9147d08c1 Mon Sep 17 00:00:00 2001
> From: Zhou Xiaoyong 
> Date: Mon, 10 Oct 2016 14:19:58 +0800
> Subject: [PATCH 1/7] configure: loongson disable mipsfpu and mipsdsp
>  optimizations

the commit message is missing an explanation for "why?"


[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The real ebay dictionary, page 2
"100% positive feedback" - "All either got their money back or didnt complain"
"Best seller ever, very honest" - "Seller refunded buyer after failed scam"
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 6/7] avcodec/mips: version 1 of wmv2dsp optimizations for loongson mmi

2016-10-10 Thread
From 9b19ea3364d96a6eb0f3441f549c8d20ede36592 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Mon, 10 Oct 2016 15:04:35 +0800
Subject: [PATCH 6/7] avcodec/mips: version 1 of wmv2dsp optimizations for
 loongson mmi


---
 libavcodec/mips/Makefile|   2 +
 libavcodec/mips/wmv2dsp_init_mips.c |  38 +
 libavcodec/mips/wmv2dsp_mips.h  |  29 
 libavcodec/mips/wmv2dsp_mmi.c   | 278 
 libavcodec/wmv2dsp.c|   3 +
 libavcodec/wmv2dsp.h|   1 +
 6 files changed, 351 insertions(+)
 create mode 100644 libavcodec/mips/wmv2dsp_init_mips.c
 create mode 100644 libavcodec/mips/wmv2dsp_mips.h
 create mode 100644 libavcodec/mips/wmv2dsp_mmi.c


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index cf54f38..2b6f37e 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -36,6 +36,7 @@ OBJS-$(CONFIG_MPEGVIDEO)  += 
mips/mpegvideo_init_mips.o
 OBJS-$(CONFIG_MPEGVIDEOENC)   += mips/mpegvideoencdsp_init_mips.o
 OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o
 OBJS-$(CONFIG_MPEG4_DECODER)  += mips/xvididct_init_mips.o
+OBJS-$(CONFIG_WMV2DSP)+= mips/wmv2dsp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -79,3 +80,4 @@ MMI-OBJS-$(CONFIG_PIXBLOCKDSP)+= 
mips/pixblockdsp_mmi.o
 MMI-OBJS-$(CONFIG_H264QPEL)   += mips/h264qpel_mmi.o
 MMI-OBJS-$(CONFIG_VP8_DECODER)+= mips/vp8dsp_mmi.o
 MMI-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_mmi.o
+MMI-OBJS-$(CONFIG_WMV2DSP)+= mips/wmv2dsp_mmi.o
diff --git a/libavcodec/mips/wmv2dsp_init_mips.c 
b/libavcodec/mips/wmv2dsp_init_mips.c
new file mode 100644
index 000..51dd207
--- /dev/null
+++ b/libavcodec/mips/wmv2dsp_init_mips.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "wmv2dsp_mips.h"
+
+#if HAVE_MMI
+static av_cold void wmv2dsp_init_mmi(WMV2DSPContext *c)
+{
+c->idct_add  = ff_wmv2_idct_add_mmi;
+c->idct_put  = ff_wmv2_idct_put_mmi;
+}
+#endif /* HAVE_MMI */
+
+av_cold void ff_wmv2dsp_init_mips(WMV2DSPContext *c)
+{
+#if HAVE_MMI
+wmv2dsp_init_mmi(c);
+#endif /* HAVE_MMI */
+}
diff --git a/libavcodec/mips/wmv2dsp_mips.h b/libavcodec/mips/wmv2dsp_mips.h
new file mode 100644
index 000..22894c5
--- /dev/null
+++ b/libavcodec/mips/wmv2dsp_mips.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_WMV2DSP_MIPS_H
+#define AVCODEC_MIPS_WMV2DSP_MIPS_H
+
+#include "libavcodec/wmv2dsp.h"
+
+void ff_wmv2_idct_add_mmi(uint8_t *dest, int line_size, int16_t *block);
+void ff_wmv2_idct_put_mmi(uint8_t *dest, int line_size, int16_t *block);
+
+#endif /* AVCODEC_MIPS_WMV2DSP_MIPS_H */
diff --git a/libavcodec/mips/wmv2dsp_mmi.c b/libavcodec/mips/wmv2dsp_mmi.c
new file mode 100644
index 000..1f6ccb2
--- /dev/null
+++ b/libavcodec/mips/wmv2dsp_mmi.c
@@ -0,0 +1,278 @@
+/*
+ * WMV2 - DSP functions Loongson MMI-optimized
+ *
+ * Copyright (c) 2016 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the te

[FFmpeg-devel] [PATCH 4/7] avcodec/mips: loongson set xvid as default idct algorithm

2016-10-10 Thread
From e4d648fd34601cbe23a54af5d9489537896c6478 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Mon, 10 Oct 2016 14:42:58 +0800
Subject: [PATCH 4/7] avcodec/mips: loongson set xvid as default idct algorithm


---
 libavcodec/mips/idctdsp_init_mips.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)


diff --git a/libavcodec/mips/idctdsp_init_mips.c 
b/libavcodec/mips/idctdsp_init_mips.c
index 8c26bca..7beb1f6 100644
--- a/libavcodec/mips/idctdsp_init_mips.c
+++ b/libavcodec/mips/idctdsp_init_mips.c
@@ -20,6 +20,7 @@
  */
 
 #include "idctdsp_mips.h"
+#include "xvididct_mips.h"
 
 #if HAVE_MSA
 static av_cold void idctdsp_init_msa(IDCTDSPContext *c, AVCodecContext *avctx,
@@ -49,7 +50,9 @@ static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, 
AVCodecContext *avctx,
 (avctx->bits_per_raw_sample != 10) &&
 (avctx->bits_per_raw_sample != 12) &&
 (avctx->idct_algo == FF_IDCT_AUTO)) {
-c->idct = ff_simple_idct_mmi;
+c->idct_put = ff_xvid_idct_put_mmi;
+c->idct_add = ff_xvid_idct_add_mmi;
+c->idct = ff_xvid_idct_mmi;
 c->perm_type = FF_IDCT_PERM_NONE;
 }
 
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/7] avutil/mips: loongson add mmi utils header file

2016-10-10 Thread
From 51abc8e9981b99d23c7d253bfb603b25aaabf4aa Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Mon, 10 Oct 2016 14:21:55 +0800
Subject: [PATCH 2/7] avutil/mips: loongson add mmi utils header file


1.mmiutils.h defined MMI_ load/store macros for loongson2e/2f/3a
2.mmiutils.h defined some mmi assembly macors
---
 libavutil/mips/mmiutils.h | 241 ++
 1 file changed, 241 insertions(+)
 create mode 100644 libavutil/mips/mmiutils.h


diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h
new file mode 100644
index 000..491579e
--- /dev/null
+++ b/libavutil/mips/mmiutils.h
@@ -0,0 +1,241 @@
+/*
+ * Loongson SIMD utils
+ *
+ * Copyright (c) 2016 Loongson Technology Corporation Limited
+ * Copyright (c) 2016 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_MIPS_MMIUTILS_H
+#define AVUTIL_MIPS_MMIUTILS_H
+
+#include "config.h"
+#include "libavutil/mips/asmdefs.h"
+
+#if HAVE_LOONGSON2
+
+#define DECLARE_VAR_LOW32   int32_t low32
+#define RESTRICT_ASM_LOW32  [low32]"=&r"(low32),
+#define DECLARE_VAR_ALL64   int64_t all64
+#define RESTRICT_ASM_ALL64  [all64]"=&r"(all64),
+#define DECLARE_VAR_ADDRT   mips_reg addrt
+#define RESTRICT_ASM_ADDRT  [addrt]"=&r"(addrt),
+
+#define MMI_LWX(reg, addr, stride, bias)\
+PTR_ADDU"%[addrt],  "#addr","#stride"   \n\t"   \
+"lw "#reg", "#bias"(%[addrt])   \n\t"
+
+#define MMI_SWX(reg, addr, stride, bias)\
+PTR_ADDU"%[addrt],  "#addr","#stride"   \n\t"   \
+"sw "#reg", "#bias"(%[addrt])   \n\t"
+
+#define MMI_LDX(reg, addr, stride, bias)\
+PTR_ADDU"%[addrt],  "#addr","#stride"   \n\t"   \
+"ld "#reg", "#bias"(%[addrt])   \n\t"
+
+#define MMI_SDX(reg, addr, stride, bias)\
+PTR_ADDU"%[addrt],  "#addr","#stride"   \n\t"   \
+"sd "#reg", "#bias"(%[addrt])   \n\t"
+
+#define MMI_LWC1(fp, addr, bias)\
+"lwc1   "#fp",  "#bias"("#addr")\n\t"
+
+#define MMI_ULWC1(fp, addr, bias)   \
+"ulw%[low32],   "#bias"("#addr")\n\t"   \
+"mtc1   %[low32],   "#fp"   \n\t"
+
+#define MMI_LWXC1(fp, addr, stride, bias)   \
+PTR_ADDU"%[addrt],  "#addr","#stride"   \n\t"   \
+MMI_LWC1(fp, %[addrt], bias)
+
+#define MMI_SWC1(fp, addr, bias)\
+"swc1   "#fp",  "#bias"("#addr")\n\t"
+
+#define MMI_USWC1(fp, addr, bias)   \
+"mfc1   %[low32],   "#fp"   \n\t"   \
+"usw%[low32],   "#bias"("#addr")\n\t"
+
+#define MMI_SWXC1(fp, addr, stride, bias)   \
+PTR_ADDU"%[addrt],  "#addr","#stride"   \n\t"   \
+MMI_SWC1(fp, %[addrt], bias)
+
+#define MMI_LDC1(fp, addr, bias)\
+"ldc1   "#fp",  "#bias"("#addr")\n\t"
+
+#define MMI_ULDC1(fp, addr, bias)   \
+"uld%[all64],   "#bias"("#addr")\n\t"   \
+"dmtc1  %[all64],   "#fp"   \n\t"
+
+#define MMI_LDXC1(fp, addr, stride, bias)   \
+PTR_ADDU"%[addrt],  "#addr","#stride"   \n\t"   \
+MMI_LDC1(fp, %[addrt], bias)
+
+#define MMI_SDC1(fp, addr, bias)\
+"sdc1   "#fp",  "#bias"("#addr")\n\t"
+
+#define MMI_USDC1(fp, addr, bias)   \
+"dmfc1  %[all64],   "#fp"

[FFmpeg-devel] [PATCH 1/7] configure: loongson disable mipsfpu and mipsdsp optimizations

2016-10-10 Thread
From efccaec2b5b543eb98f0400fdaef88d9147d08c1 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Mon, 10 Oct 2016 14:19:58 +0800
Subject: [PATCH 1/7] configure: loongson disable mipsfpu and mipsdsp
 optimizations


---
 configure | 3 +++
 1 file changed, 3 insertions(+)


diff --git a/configure b/configure
index e014615..8fc71fb 100755
--- a/configure
+++ b/configure
@@ -4380,6 +4380,9 @@ elif enabled mips; then
 enable fast_cmov
 enable fast_unaligned
 disable aligned_stack
+disable mipsfpu
+disable mipsdsp
+disable mipsdspr2
 case $cpu in
 loongson3*)
 cpuflags="-march=loongson3a -mhard-float 
-fno-expensive-optimizations"
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] 回复: Re:Re:Re: [PATCH 07/11] avcodec/mips: loongson optimize h264qpel with mmi v2

2016-05-30 Thread
please review this patch again, thank you


在2016年05月25日 17:40 ,周晓勇写道:

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 07/11] avcodec/mips: loongson optimize h264qpel with mmi v2

2016-05-24 Thread
these functions couldn't pass fate-h264 test neither O32 nor N64 ABI
but the earlier optimization (version 1) has too much bug in O32 ABI
i will fix the bugs in function put_h264_qpel16_hv_lowpass_mmi and
avg_h264_qpel16_hv_lowpass_mmi in the future


have you installed the lastest fedora21 for loongson?
http://mirror.lemote.com/fedora/live/Fedora-MATE-Live-2.iso
http://mirror.lemote.com/fedora/live/Fedora-MATE-Live-2.iso.md5
use this script to make live-usb installer:
http://mirror.lemote.com/fedora/live/makeliveusb
tips:make sure the /dev/sda1 is ext2 or ext3, as pmon not support ext4 to boot 
kernel








>
>why do these functions not work ?
>
>
>[...]
>-- 
>Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
>The misfortune of the wise is better than the prosperity of the fool.
>-- Epicurus
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 07/11] avcodec/mips: loongson optimize h264qpel with mmi v2

2016-05-23 Thread
that is my fault and thank you for pointing out the mistake, it should be:


diff --git a/libavcodec/mips/h264qpel_mmi.c b/libavcodec/mips/h264qpel_mmi.c
index d641a51..737c68c 100644
--- a/libavcodec/mips/h264qpel_mmi.c
+++ b/libavcodec/mips/h264qpel_mmi.c
@@ -1901,9 +1901,9 @@ static void put_pixels8_l2_shift5_mmi(uint8_t *dst, 
int16_t *src16,
 : "memory"
 );
 
-src8  += 2L * src8Stride;
+src8  += 2 * src8Stride;
 src16 += 48;
-dst   += 2L * dstStride;
+dst   += 2 * dstStride;
 } while (h -= 2);
 }
 
@@ -2260,9 +2260,9 @@ static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, 
int16_t *src16,
 : "memory"
 );
 
-src8  += 2L * src8Stride;
+src8  += 2 * src8Stride;
 src16 += 48;
-dst   += 2L * dstStride;
+dst   += 2 * dstStride;
 } while (b -= 2);
 }










At 2016-05-24 03:47:30, "Michael Niedermayer"  wrote:
>On Tue, May 17, 2016 at 03:08:13PM +0800, 周晓勇 wrote:
>> avcodec/mips/h264qpel_mmi: Version 2 of the optimizations for loongson mmi
>> 
>> 1. no longer use the register names directly and optimized code format
>> 2. to be compatible with O32, specify type of address variable with 
>> mips_reg and handle the address variable with PTR_ operator
>> 3. temporarily annotated func put_(avg_)h264_qpel16_hv_lowpass_mmi and 
>> related funcs which couldn't pass fate testing in O32 ABI
>> 4. use uld and mtc1 to workaround cpu 3A2000 gslwlc1 bug (gslwlc1 
>> instruction extension bug in O32 ABI)
>> 5. put_pixels_ an avg_pixels_ functions use hpeldsp optimizations instead
>
>[...]
>> @@ -1373,161 +1412,589 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t 
>> *dst, const uint8_t *src,
>>  }
>>  }
>>  
>> -static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
>> -int dstStride, int srcStride)
>> -{
>> -int16_t _tmp[104];
>> -int16_t *tmp = _tmp;
>> -int i;
>> -src -= 2*srcStride;
>> +static inline void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
>> +const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int 
>> size)
>> +{
>> +int w = (size + 8) >> 2;
>> +double ftmp[11];
>> +uint64_t tmp0;
>> +uint64_t low32;
>> +
>> +src -= 2 * srcStride + 2;
>[...]
>
>> +src8  += 2L * src8Stride;
>> +src16 += 48;
>> +dst   += 2L * dstStride;
>
>why does this use long types  instead of ints while other code uses
>ints ?
>
>> +} while (h -= 2);
>> +}
>> +
>> +static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t 
>> *src,
>> +const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
>> +{
>> +put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
>> +put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
>> +src2Stride);
>> +
>> +src += 8 * dstStride;
>> +dst += 8 * dstStride;
>> +src2 += 8 * src2Stride;
>
>
>
>[...]
>
>-- 
>Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
>I do not agree with what you have to say, but I'll defend to the death your
>right to say it. -- Voltaire
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 11/11] avcodec/mips: loongson optimize idctdsp with mmi v2

2016-05-17 Thread
avcodec/mips/idctdsp_mmi: Version 2 of the optimizations for loongson mmi

1. no longer use the register names directly and optimized code format
2. to be compatible with O32, specify type of address variable with 
mips_reg and handle the address variable with PTR_ operator






在 2016-05-13 18:07:59,"周晓勇"  写道:

From 37d9b61f529064a6a78c99f86ff371fefa2b357a Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 13 May 2016 16:51:14 +0800
Subject: [PATCH 11/11] avcodec/mips: loongson optimize idctdsp with mmi v2


---
 libavcodec/mips/idctdsp_mmi.c | 290 ++
 1 file changed, 154 insertions(+), 136 deletions(-)


diff --git a/libavcodec/mips/idctdsp_mmi.c b/libavcodec/mips/idctdsp_mmi.c
index 25476f3..24beb62 100644
--- a/libavcodec/mips/idctdsp_mmi.c
+++ b/libavcodec/mips/idctdsp_mmi.c
@@ -23,63 +23,75 @@
 
 #include "idctdsp_mips.h"
 #include "constants.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_put_pixels_clamped_mmi(const int16_t *block,
 uint8_t *av_restrict pixels, ptrdiff_t line_size)
 {
-const int16_t *p;
-uint8_t *pix;
-
-p = block;
-pix = pixels;
+double ftmp[8];
+mips_reg addr[1];
 
 __asm__ volatile (
-"ldc1 $f0, 0+%3 \r\n"
-"ldc1 $f2, 8+%3 \r\n"
-"ldc1 $f4, 16+%3\r\n"
-"ldc1 $f6, 24+%3\r\n"
-"ldc1 $f8, 32+%3\r\n"
-"ldc1 $f10, 40+%3   \r\n"
-"ldc1 $f12, 48+%3   \r\n"
-"ldc1 $f14, 56+%3   \r\n"
-"dadd $10, %0, %1   \r\n"
-"packushb $f0, $f0, $f2 \r\n"
-"packushb $f4, $f4, $f6 \r\n"
-"packushb $f8, $f8, $f10\r\n"
-"packushb $f12, $f12, $f14  \r\n"
-"sdc1 $f0, 0(%0)\r\n"
-"sdc1 $f4, 0($10)   \r\n"
-"gssdxc1 $f8, 0($10, %1)\r\n"
-"gssdxc1 $f12, 0(%0, %2)\r\n"
-::"r"(pix),"r"((int)line_size),
-  "r"((int)line_size*3),"m"(*p)
-: "$10","memory"
+"ldc1   %[ftmp0],   0x00(%[block])  \n\t"
+"ldc1   %[ftmp1],   0x08(%[block])  \n\t"
+"ldc1   %[ftmp2],   0x10(%[block])  \n\t"
+"ldc1   %[ftmp3],   0x18(%[block])  \n\t"
+"ldc1   %[ftmp4],   0x20(%[block])  \n\t"
+"ldc1   %[ftmp5],   0x28(%[block])  \n\t"
+"ldc1   %[ftmp6],   0x30(%[block])  \n\t"
+"ldc1   %[ftmp7],   0x38(%[block])  \n\t"
+PTR_ADDU   "%[addr0],   %[pixels],  %[line_size]\n\t"
+"packushb   %[ftmp0],   %[ftmp0],   %[ftmp1]\n\t"
+"packushb   %[ftmp2],   %[ftmp2],   %[ftmp3]\n\t"
+"packushb   %[ftmp4],   %[ftmp4],   %[ftmp5]\n\t"
+"packushb   %[ftmp6],   %[ftmp6],   %[ftmp7]\n\t"
+"sdc1   %[ftmp0],   0x00(%[pixels]) \n\t"
+"sdc1   %[ftmp2],   0x00(%[addr0])  \n\t"
+"gssdxc1%[ftmp4],   0x00(%[addr0],  %[line_size])   \n\t"
+"gssdxc1%[ftmp6],   0x00(%[pixels], %[line_sizex3]) \n\t"
+: [ftmp0]"=&f"(ftmp[0]),[ftmp1]"=&f"(ftmp[1]),
+  [ftmp2]"=&f"(ftmp[2]),[ftmp3]"=&f"(ftmp[3]),
+  [ftmp4]"=&f"(ftmp[4]),[ftmp5]"=&f"(ftmp[5]),
+  [ftmp6]"=&f"(ftmp[6]),[ftmp7]"=&f"(ftmp[7]),
+  [addr0]"=&r"(addr[0]),
+  [pixels]"+&r"(pixels)
+: [line_size]"r"((mips_reg)line_size),
+  [line_sizex3]"r"((mips_reg)(line_size*3)),
+  [block]"r"(block)
+: "memory"
 );
 
-pix += line_size*4;
-p += 32;
+pixels += line_size*4;
+block += 32;
 
 __asm__ volatile (
-"ldc1 $f0, 0+%3 \r\n"
-"ldc1 $f2, 8+%3 \r\n"
-"ldc1 $f4, 16+%3\r\n"
-"ldc1 $f6, 24+%3\r\n"
-"ldc1 $f8, 32+%3\r\n"

Re: [FFmpeg-devel] [PATCH 10/11] avcodec/mips: loongson optimize pixblockdsp with mmi v2

2016-05-17 Thread
avcodec/mips/pixblockdsp_mmi: Version 2 of the optimizations for loongson mmi

1. no longer use the register names directly and optimized code format
2. to be compatible with O32, specify type of address variable with 
mips_reg and handle the address variable with PTR_ operator






在 2016-05-13 18:07:28,"周晓勇"  写道:

From f5f4ee7744c5354e2f76743d22ac81b1341bb7fb Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 13 May 2016 14:41:33 +0800
Subject: [PATCH 10/11] avcodec/mips: loongson optimize pixblockdsp with mmi v2


---
 libavcodec/mips/pixblockdsp_mmi.c | 101 ++
 1 file changed, 60 insertions(+), 41 deletions(-)


diff --git a/libavcodec/mips/pixblockdsp_mmi.c 
b/libavcodec/mips/pixblockdsp_mmi.c
index 30631d8..3ff84c0 100644
--- a/libavcodec/mips/pixblockdsp_mmi.c
+++ b/libavcodec/mips/pixblockdsp_mmi.c
@@ -22,58 +22,77 @@
  */
 
 #include "pixblockdsp_mips.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
 ptrdiff_t line_size)
 {
+double ftmp[6];
+mips_reg tmp[2];
+
 __asm__ volatile (
-"move $8, $0\n\t"
-"xor $f0, $f0, $f0  \n\t"
-"1: \n\t"
-"gsldlc1 $f2, 7(%1) \n\t"
-"gsldrc1 $f2, 0(%1) \n\t"
-"punpcklbh $f4, $f2, $f0\n\t"
-"punpckhbh $f6, $f2, $f0\n\t"
-"gssdxc1 $f4, 0(%0, $8) \n\t"
-"gssdxc1 $f6, 8(%0, $8) \n\t"
-"daddiu $8, $8, 16  \n\t"
-"daddu %1, %1, %2   \n\t"
-"daddi %3, %3, -1   \n\t"
-"bnez %3, 1b\n\t"
-::"r"((uint8_t *)block),"r"(pixels),"r"(line_size),"r"(8)
-: "$8","memory"
+"li %[tmp1],0x08\n\t"
+"move   %[tmp0],$0  \n\t"
+"xor%[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gsldlc1%[ftmp1],   0x07(%[pixels]) \n\t"
+"gsldrc1%[ftmp1],   0x00(%[pixels]) \n\t"
+"punpcklbh  %[ftmp2],   %[ftmp1],   %[ftmp0]\n\t"
+"punpckhbh  %[ftmp5],   %[ftmp1],   %[ftmp0]\n\t"
+"gssdxc1%[ftmp2],   0x00(%[block],  %[tmp0])\n\t"
+"gssdxc1%[ftmp5],   0x08(%[block],  %[tmp0])\n\t"
+PTR_ADDI   "%[tmp1],%[tmp1],   -0x01\n\t"
+PTR_ADDIU  "%[tmp0],%[tmp0],0x10\n\t"
+PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]\n\t"
+"bnez   %[tmp1],1b  \n\t"
+: [ftmp0]"=&f"(ftmp[0]),[ftmp1]"=&f"(ftmp[1]),
+  [ftmp2]"=&f"(ftmp[2]),[ftmp3]"=&f"(ftmp[3]),
+  [ftmp4]"=&f"(ftmp[4]),[ftmp5]"=&f"(ftmp[5]),
+  [tmp0]"=&r"(tmp[0]),  [tmp1]"=&r"(tmp[1]),
+  [pixels]"+&r"(pixels)
+: [block]"r"((mips_reg)block),  [line_size]"r"((mips_reg)line_size)
+: "memory"
 );
 }
 
 void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
 const uint8_t *src2, int stride)
 {
+double ftmp[5];
+mips_reg tmp[1];
+
 __asm__ volatile (
-"dli $2, 8 \n\t"
-"xor $f14, $f14, $f14  \n\t"
-"1:\n\t"
-"gsldlc1 $f0, 7(%1)\n\t"
-"gsldrc1 $f0, 0(%1)\n\t"
-"or $f2, $f0, $f0  \n\t"
-"gsldlc1 $f4, 7(%2)\n\t"
-"gsldrc1 $f4, 0(%2)\n\t"
-"or $f6, $f4, $f4  \n\t"
-"punpcklbh $f0, $f0, $f14  \n\t"
-"punpckhbh $f2, $f2, $f14  \n\t"
-"punpcklbh $f4, $f4, $f14  \n\t"
-"punpckhbh $f6, $f6, $f14  \n\t"
-"psubh $f0, $f0, $f4   \n\t"
-"psubh $f2, $f2, $f6   \n\t"
-"gssdlc1 $f0, 7(%0)\n\t"
- 

Re: [FFmpeg-devel] [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2

2016-05-17 Thread
avcodec/mips/blockdsp_mmi: Version 2 of the optimizations for loongson mmi

1. no longer use the register names directly and optimized code format
2. to be compatible with O32, specify type of address variable with 
mips_reg and handle the address variable with PTR_ operator







在 2016-05-13 18:06:56,"周晓勇"  写道:

From 9e5ade4c99eb23f72a89f0054f8b5626c9acceb3 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 13 May 2016 14:30:10 +0800
Subject: [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2


---
 libavcodec/mips/blockdsp_mmi.c | 195 ++---
 1 file changed, 103 insertions(+), 92 deletions(-)


diff --git a/libavcodec/mips/blockdsp_mmi.c b/libavcodec/mips/blockdsp_mmi.c
index 63eaf69..6eb2bd7 100644
--- a/libavcodec/mips/blockdsp_mmi.c
+++ b/libavcodec/mips/blockdsp_mmi.c
@@ -22,126 +22,137 @@
  */
 
 #include "blockdsp_mips.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h)
 {
+double ftmp[1];
+
 __asm__ volatile (
-"move $8, %3\r\n"
-"move $9, %0\r\n"
-"dmtc1 %1, $f2  \r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7($9) \r\n"
-"gssdrc1 $f2, 0($9) \r\n"
-"gssdlc1 $f2, 15($9)\r\n"
-"gssdrc1 $f2, 8($9) \r\n"
-"daddi $8, $8, -1   \r\n"
-"daddu $9, $9, %2   \r\n"
-"bnez $8, 1b\r\n"
-::"r"(block),"r"(value),"r"(line_size),"r"(h)
-: "$8","$9"
+"mtc1   %[value],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gssdlc1%[ftmp0],   0x07(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x00(%[block])  \n\t"
+PTR_ADDI"%[h],  %[h],   -0x01   \n\t"
+"gssdlc1%[ftmp0],   0x0f(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x08(%[block])  \n\t"
+PTR_ADDU   "%[block],   %[block],   %[line_size]\n\t"
+"bnez   %[h],   1b  \n\t"
+: [block]"+&r"(block),  [h]"+&r"(h),
+  [ftmp0]"=&f"(ftmp[0])
+: [value]"r"(value),[line_size]"r"((mips_reg)line_size)
+: "memory"
 );
 }
 
 void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h)
 {
+double ftmp0;
+
 __asm__ volatile (
-"move $8, %3\r\n"
-"move $9, %0\r\n"
-"dmtc1 %1, $f2  \r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7($9) \r\n"
-"gssdrc1 $f2, 0($9) \r\n"
-"daddi $8, $8, -1   \r\n"
-"daddu $9, $9, %2   \r\n"
-"bnez $8, 1b\r\n"
-::"r"(block),"r"(value),"r"(line_size),"r"(h)
-: "$8","$9"
+"mtc1   %[value],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gssdlc1%[ftmp0],   0x07(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x00(%[block])  \n\t"
+PTR_ADDI   "%[h],   %[h],   -0x01   \n\t"
+PTR_ADDU   "%[block],   %[block],   

Re: [FFmpeg-devel] [PATCH 08/11] avcodec/mips: loongson optimize h264pred with mmi v3

2016-05-17 Thread
avcodec/mips: loongson optimize h264pred with mmi v3

1. no longer use the register names directly and optimized code format
2. to be compatible with O32, specify type of address variable with 
mips_reg and handle the address variable with PTR_ operator
3. ff_pred16x16_plane_ functions only support N64 ABI now








在 2016-05-13 18:06:23,"周晓勇"  写道:

From 0a8c479860dad3220eb00e057f200e21c0521899 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Thu, 12 May 2016 01:45:34 +0800
Subject: [PATCH 08/11] avcodec/mips: loongson optimize h264pred with mmi v3


---
 libavcodec/mips/constants.c  |1 +
 libavcodec/mips/constants.h  |1 +
 libavcodec/mips/h264pred_init_mips.c |5 +-
 libavcodec/mips/h264pred_mmi.c   | 1498 +++---
 4 files changed, 859 insertions(+), 646 deletions(-)


diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
index f8130d9..3503fad 100644
--- a/libavcodec/mips/constants.c
+++ b/libavcodec/mips/constants.c
@@ -24,6 +24,7 @@
 #include "constants.h"
 
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_1) =   {0x0001000100010001ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_2) =   {0x0002000200020002ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) =   {0x0003000300030003ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) =   {0x0004000400040004ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) =   {0x0005000500050005ULL};
diff --git a/libavcodec/mips/constants.h b/libavcodec/mips/constants.h
index 0a4effd..19d2d73 100644
--- a/libavcodec/mips/constants.h
+++ b/libavcodec/mips/constants.h
@@ -25,6 +25,7 @@
 #include 
 
 extern const uint64_t ff_pw_1;
+extern const uint64_t ff_pw_2;
 extern const uint64_t ff_pw_3;
 extern const uint64_t ff_pw_4;
 extern const uint64_t ff_pw_5;
diff --git a/libavcodec/mips/h264pred_init_mips.c 
b/libavcodec/mips/h264pred_init_mips.c
index 93a2409..c33d8f7 100644
--- a/libavcodec/mips/h264pred_init_mips.c
+++ b/libavcodec/mips/h264pred_init_mips.c
@@ -115,23 +115,22 @@ static av_cold void h264_pred_init_mmi(H264PredContext 
*h, int codec_id,
 h->pred8x8l [TOP_DC_PRED] = ff_pred8x8l_top_dc_8_mmi;
 h->pred8x8l [DC_PRED] = ff_pred8x8l_dc_8_mmi;
 
+#if ARCH_MIPS64
 switch (codec_id) {
 case AV_CODEC_ID_SVQ3:
 h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmi;
-;
 break;
 case AV_CODEC_ID_RV40:
 h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmi;
-;
 break;
 case AV_CODEC_ID_VP7:
 case AV_CODEC_ID_VP8:
-;
 break;
 default:
 h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmi;
 break;
 }
+#endif
 
 if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
 if (chroma_format_idc == 1) {
diff --git a/libavcodec/mips/h264pred_mmi.c b/libavcodec/mips/h264pred_mmi.c
index e949d11..bb795a1 100644
--- a/libavcodec/mips/h264pred_mmi.c
+++ b/libavcodec/mips/h264pred_mmi.c
@@ -23,87 +23,134 @@
  */
 
 #include "h264pred_mips.h"
+#include "libavcodec/bit_depth_template.c"
+#include "libavutil/mips/asmdefs.h"
+#include "constants.h"
 
 void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
+double ftmp[2];
+uint64_t tmp[1];
+
 __asm__ volatile (
-"dli $8, 16 \r\n"
-"gsldlc1 $f2, 7(%[srcA])\r\n"
-"gsldrc1 $f2, 0(%[srcA])\r\n"
-"gsldlc1 $f4, 15(%[srcA])   \r\n"
-"gsldrc1 $f4, 8(%[srcA])\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7(%[src]) \r\n"
-"gssdrc1 $f2, 0(%[src]) \r\n"
-"gssdlc1 $f4, 15(%[src])\r\n"
-"gssdrc1 $f4, 8(%[src]) \r\n"
-"daddu %[src], %[src], %[stride]\r\n"
-"daddi $8, $8, -1   \r\n"
-"bnez $8, 1b\r\n"
-: [src]"+&r"(src)
-: [stride]"r"(stride),[srcA]"r"(src-stride)
-: "$8","$f2","$f4"
+"dli%[tmp0],0x08\n\t"
+"gsldlc1%[ftmp0],   0x07(%[srcA])   \n\t"
+"gsldrc1%[ftmp0],   0x00(%[srcA])   \n\t"
+"gsldlc1%[ftmp1],   0x0f(%[srcA])   \n\t"
+"gsldrc1%[ftmp1],   0x08(%[srcA])   \n\t"
+"1:   

Re: [FFmpeg-devel] [PATCH 06/11] avcodec/mips: loongson optimize hpeldsp with mmi v1

2016-05-16 Thread
avcodec/mips: loongson optimize hpeldsp with mmi v1
1.the codes are compatible with O32 ABI
2.use uld and mtc1 to workaround cpu 3A2000 gslwlc1 bug (gslwlc1 instruction 
extension bug in O32 ABI)






在 2016-05-13 18:05:07,"周晓勇"  写道:

From 8212b9b5beecb6e2ba3f05a2a4c7f1704220c911 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Thu, 12 May 2016 01:59:03 +0800
Subject: [PATCH 06/11] avcodec/mips: loongson optimize hpeldsp with mmi v1


---
 libavcodec/mips/Makefile|1 +
 libavcodec/mips/hpeldsp_init_mips.c |   49 ++
 libavcodec/mips/hpeldsp_mips.h  |   87 +++
 libavcodec/mips/hpeldsp_mmi.c   | 1257 +++
 4 files changed, 1394 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index f66017a..3c43600 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -77,3 +77,4 @@ MMI-OBJS-$(CONFIG_MPEG4_DECODER)  += 
mips/xvid_idct_mmi.o
 MMI-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_mmi.o
 MMI-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_mmi.o
 MMI-OBJS-$(CONFIG_H264QPEL)   += mips/h264qpel_mmi.o
+MMI-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_mmi.o
diff --git a/libavcodec/mips/hpeldsp_init_mips.c 
b/libavcodec/mips/hpeldsp_init_mips.c
index 82f2310..363a045 100644
--- a/libavcodec/mips/hpeldsp_init_mips.c
+++ b/libavcodec/mips/hpeldsp_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ * Copyright (c) 2016 Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -65,9 +66,57 @@ static void ff_hpeldsp_init_msa(HpelDSPContext *c, int flags)
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static void ff_hpeldsp_init_mmi(HpelDSPContext *c, int flags)
+{
+c->put_pixels_tab[0][0] = ff_put_pixels16_8_mmi;
+c->put_pixels_tab[0][1] = ff_put_pixels16_x2_8_mmi;
+c->put_pixels_tab[0][2] = ff_put_pixels16_y2_8_mmi;
+c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_8_mmi;
+
+c->put_pixels_tab[1][0] = ff_put_pixels8_8_mmi;
+c->put_pixels_tab[1][1] = ff_put_pixels8_x2_8_mmi;
+c->put_pixels_tab[1][2] = ff_put_pixels8_y2_8_mmi;
+c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_8_mmi;
+
+c->put_pixels_tab[2][0] = ff_put_pixels4_8_mmi;
+c->put_pixels_tab[2][1] = ff_put_pixels4_x2_8_mmi;
+c->put_pixels_tab[2][2] = ff_put_pixels4_y2_8_mmi;
+c->put_pixels_tab[2][3] = ff_put_pixels4_xy2_8_mmi;
+
+c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_8_mmi;
+c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_8_mmi;
+c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_8_mmi;
+c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_8_mmi;
+
+c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_8_mmi;
+c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_8_mmi;
+c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_8_mmi;
+c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_8_mmi;
+
+c->avg_pixels_tab[0][0] = ff_avg_pixels16_8_mmi;
+c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_8_mmi;
+c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_8_mmi;
+c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_8_mmi;
+
+c->avg_pixels_tab[1][0] = ff_avg_pixels8_8_mmi;
+c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_8_mmi;
+c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_8_mmi;
+c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_8_mmi;
+
+c->avg_pixels_tab[2][0] = ff_avg_pixels4_8_mmi;
+c->avg_pixels_tab[2][1] = ff_avg_pixels4_x2_8_mmi;
+c->avg_pixels_tab[2][2] = ff_avg_pixels4_y2_8_mmi;
+c->avg_pixels_tab[2][3] = ff_avg_pixels4_xy2_8_mmi;
+}
+#endif  // #if HAVE_MMI
+
 void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags)
 {
 #if HAVE_MSA
 ff_hpeldsp_init_msa(c, flags);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+ff_hpeldsp_init_mmi(c, flags);
+#endif  // #if HAVE_MMI
 }
diff --git a/libavcodec/mips/hpeldsp_mips.h b/libavcodec/mips/hpeldsp_mips.h
index f4ab53e..f527c1d 100644
--- a/libavcodec/mips/hpeldsp_mips.h
+++ b/libavcodec/mips/hpeldsp_mips.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ * Copyright (c) 2016 Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -84,4 +85,90 @@ void ff_avg_pixels4_y2_msa(uint8_t *block, const uint8_t 
*pixels,
 void ff_avg_pixels4_xy2_msa(uint8_t *block, const uint8_t *pixels,
 ptrdiff_t line_size, int32_t h);
 
+void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
+const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+int h);
+void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
+const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+int h);
+void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
+const uint8_t *src2, int dst

Re: [FFmpeg-devel] [PATCH 05/11] avcodec/mips: loongson optimize mpegvideo with mmi v2

2016-05-16 Thread
avcodec/mips/mpegvideo_mmi: Version 2 of the optimizations for loongson mmi

1. no longer use the register names directly and optimized code format
2. to be compatible with O32, specify type of address variable with 
mips_reg and handle the address variable with PTR_ operator








在 2016-05-13 18:04:34,"周晓勇"  写道:

From cb8887caf25b300ef2f307f930593e9edf394977 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Thu, 12 May 2016 01:48:03 +0800
Subject: [PATCH 05/11] avcodec/mips: loongson optimize mpegvideo with mmi v2


---
 libavcodec/mips/mpegvideo_mmi.c | 667 +---
 1 file changed, 358 insertions(+), 309 deletions(-)


diff --git a/libavcodec/mips/mpegvideo_mmi.c b/libavcodec/mips/mpegvideo_mmi.c
index 94781e6..450a18c 100644
--- a/libavcodec/mips/mpegvideo_mmi.c
+++ b/libavcodec/mips/mpegvideo_mmi.c
@@ -23,11 +23,14 @@
  */
 
 #include "mpegvideo_mips.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
 int n, int qscale)
 {
 int64_t level, qmul, qadd, nCoeffs;
+double ftmp[6];
+mips_reg addr[1];
 
 qmul = qscale << 1;
 av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
@@ -49,48 +52,50 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, 
int16_t *block,
 nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
 
 __asm__ volatile (
-"xor $f12, $f12, $f12   \r\n"
-"lwc1 $f12, %1  \n\r"
-"xor $f10, $f10, $f10   \r\n"
-"lwc1 $f10, %2  \r\n"
-"xor $f14, $f14, $f14   \r\n"
-"packsswh $f12, $f12, $f12  \r\n"
-"packsswh $f12, $f12, $f12  \r\n"
-"packsswh $f10, $f10, $f10  \r\n"
-"packsswh $f10, $f10, $f10  \r\n"
-"psubh $f14, $f14, $f10 \r\n"
-"xor $f8, $f8, $f8  \r\n"
-".p2align 4 \r\n"
-"1: \r\n"
-"daddu $8, %0, %3   \r\n"
-"gsldlc1 $f0, 7($8) \r\n"
-"gsldrc1 $f0, 0($8) \r\n"
-"gsldlc1 $f2, 15($8)\r\n"
-"gsldrc1 $f2, 8($8) \r\n"
-"mov.d $f4, $f0 \r\n"
-"mov.d $f6, $f2 \r\n"
-"pmullh $f0, $f0, $f12  \r\n"
-"pmullh $f2, $f2, $f12  \r\n"
-"pcmpgth $f4, $f4, $f8  \r\n"
-"pcmpgth $f6, $f6, $f8  \r\n"
-"xor $f0, $f0, $f4  \r\n"
-"xor $f2, $f2, $f6  \r\n"
-"paddh $f0, $f0, $f14   \r\n"
-"paddh $f2, $f2, $f14   \r\n"
-"xor $f4, $f4, $f0  \r\n"
-"xor $f6, $f6, $f2  \r\n"
-"pcmpeqh $f0, $f0, $f14 \r\n"
-"pcmpeqh $f2, $f2, $f14 \r\n"
-"pandn $f0, $f0, $f4\r\n"
-"pandn $f2, $f2, $f6\r\n"
-"gssdlc1 $f0, 7($8) \r\n"
-"gssdrc1 $f0, 0($8) \r\n"
-"gssdlc1 $f2, 15($8)\r\n"
-"gssdrc1 $f2, 8($8) \r\n"
-"addi %3, %3, 16\r\n"
-"blez %3, 1b\r\n"
-::"r"(block+nCoeffs),"m"(qmul),"m"(qadd),"r"(2*(-nCoeffs))
-:"$8","memory"
+"xor%[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"packsswh   %[qmul],%[qmul],%[qmul] \n\t"
+"packsswh   %[qmul],%[qmul],%[qmul] \n\t"
+"packsswh   %[qadd],%[qadd],%[qadd] \n\t"
+"packsswh   %[qadd],%[qadd],%[qadd] \n\t"
+"psubh  %[ftmp0],   %[ftmp0],   %[qadd] \n\t"
+"xor%[ftmp5],   %[ftmp5],   %[ftmp5]\n\t"
+".p2align   4   \n\t"
+"1: \n\t"
+PTR_ADDU   "%[addr0],   %[block],   %[nCoeffs]  \n\t"
+"gsldlc1%[ftmp1],   0x07(%[addr0])  \n\t"
+"gsldrc1%[ftmp1],   0x00(%[addr0])  \n\t"
+"

Re: [FFmpeg-devel] [PATCH 04/11] avcodec/mips: loongson optimize h264chroma with mmi v2

2016-05-16 Thread
avcodec/mips/h264chroma_mmi: Version 2 of the optimizations for loongson mmi

1. no longer use the register names directly and optimized code format
2. to be compatible with O32, specify type of address variable with 
mips_reg and handle the address variable with PTR_ operator
3. use uld and mtc1 to workaround cpu 3A2000 gslwlc1 bug (gslwlc1 
instruction extension bug in O32 ABI)








在 2016-05-13 18:04:02,"周晓勇"  写道:

From 157e001724cdb1461ecfff2f02d0a7b0d6335943 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Sat, 7 May 2016 14:20:49 +0800
Subject: [PATCH 04/11] avcodec/mips: loongson optimize h264chroma with mmi v2


---
 libavcodec/mips/h264chroma_mmi.c | 1123 +-
 1 file changed, 629 insertions(+), 494 deletions(-)


diff --git a/libavcodec/mips/h264chroma_mmi.c b/libavcodec/mips/h264chroma_mmi.c
index ef29476..3dd123d 100644
--- a/libavcodec/mips/h264chroma_mmi.c
+++ b/libavcodec/mips/h264chroma_mmi.c
@@ -23,6 +23,8 @@
  */
 
 #include "h264chroma_mips.h"
+#include "constants.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
 int h, int x, int y)
@@ -32,171 +34,177 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t 
*src, int stride,
 const int C = (8 - x) * y;
 const int D = x * y;
 const int E = B + C;
-int i;
-
-av_assert2(x<8 && y<8 && x>=0 && y>=0);
+double ftmp[10];
+uint64_t tmp[1];
+mips_reg addr[1];
 
 if (D) {
-for (i=0; i=0 && y>=0);
+double ftmp[10];
+uint64_t tmp[1];
+mips_reg addr[1];
 
 if (D) {
-for (i=0; i=0 && y>=0);
+double ftmp[8];
+uint64_t tmp[1];
+mips_reg addr[1];
+uint64_t low32;
 
 if (D) {
-for (i=0; i=0 && y>=0);
+const int E = B + C;
+double ftmp[8];
+uint64_t tmp[1];
+mips_reg addr[1];
+uint64_t low32;
 
 if (D) {
-for (i=0; ihttp://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] 回复: Re: [PATCH 03/11] avcodec/mips: loongson optimize h264dsp with mmi v2

2016-05-16 Thread
it's for most of all the optimization patches, so should i recommit all patches 
with expatiation?





在2016年05月16日 22:46 ,Michael Niedermayer写道:

On Mon, May 16, 2016 at 12:50:15PM +0800, 周晓勇 wrote:
> 1.no longer use register name directly and optimized code format
> 2.to be compatibal with O32, specify type of address variable with mips_reg 
> and handle the address varialbe with PTR_ operator
> 3.optimize some unalignment faults in load and store
> 4.use uld and mtc1 to workaround cpu 3A2000 gslwlc1 bug (gslwlc1 instruction 
> extension bug in O32 ABI)

does this apply to just this patch or all patches or what should
be done with the others ?
every needs a good commit message

[...]

--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I have often repented speaking, but never of holding my tongue.
-- Xenocrates
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 03/11] avcodec/mips: loongson optimize h264dsp with mmi v2

2016-05-15 Thread
1.no longer use register name directly and optimized code format
2.to be compatibal with O32, specify type of address variable with mips_reg and 
handle the address varialbe with PTR_ operator
3.optimize some unalignment faults in load and store
4.use uld and mtc1 to workaround cpu 3A2000 gslwlc1 bug (gslwlc1 instruction 
extension bug in O32 ABI)







At 2016-05-15 01:56:34, "Michael Niedermayer"  wrote:
>On Fri, May 13, 2016 at 06:03:27PM +0800, 周晓勇 wrote:
>> From 4adf70c0eb9a85fe6cbedb043ed8ce08024c48dc Mon Sep 17 00:00:00 2001
>> From: ZhouXiaoyong 
>> Date: Sat, 7 May 2016 14:16:28 +0800
>
>> Subject: [PATCH 03/11] avcodec/mips: loongson optimize h264dsp with mmi v2
>
>please provide more verbose commit messages
>A commit message should state
>what is changed
>why it is changed
>how it is changed
>as well as what effects that has on user, compatibility, performance, ...
>
>[...]
>-- 
>Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
>Those who are best at talking, realize last or never when they are wrong.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 11/11] avcodec/mips: loongson optimize idctdsp with mmi v2

2016-05-13 Thread
From 37d9b61f529064a6a78c99f86ff371fefa2b357a Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 13 May 2016 16:51:14 +0800
Subject: [PATCH 11/11] avcodec/mips: loongson optimize idctdsp with mmi v2


---
 libavcodec/mips/idctdsp_mmi.c | 290 ++
 1 file changed, 154 insertions(+), 136 deletions(-)


diff --git a/libavcodec/mips/idctdsp_mmi.c b/libavcodec/mips/idctdsp_mmi.c
index 25476f3..24beb62 100644
--- a/libavcodec/mips/idctdsp_mmi.c
+++ b/libavcodec/mips/idctdsp_mmi.c
@@ -23,63 +23,75 @@
 
 #include "idctdsp_mips.h"
 #include "constants.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_put_pixels_clamped_mmi(const int16_t *block,
 uint8_t *av_restrict pixels, ptrdiff_t line_size)
 {
-const int16_t *p;
-uint8_t *pix;
-
-p = block;
-pix = pixels;
+double ftmp[8];
+mips_reg addr[1];
 
 __asm__ volatile (
-"ldc1 $f0, 0+%3 \r\n"
-"ldc1 $f2, 8+%3 \r\n"
-"ldc1 $f4, 16+%3\r\n"
-"ldc1 $f6, 24+%3\r\n"
-"ldc1 $f8, 32+%3\r\n"
-"ldc1 $f10, 40+%3   \r\n"
-"ldc1 $f12, 48+%3   \r\n"
-"ldc1 $f14, 56+%3   \r\n"
-"dadd $10, %0, %1   \r\n"
-"packushb $f0, $f0, $f2 \r\n"
-"packushb $f4, $f4, $f6 \r\n"
-"packushb $f8, $f8, $f10\r\n"
-"packushb $f12, $f12, $f14  \r\n"
-"sdc1 $f0, 0(%0)\r\n"
-"sdc1 $f4, 0($10)   \r\n"
-"gssdxc1 $f8, 0($10, %1)\r\n"
-"gssdxc1 $f12, 0(%0, %2)\r\n"
-::"r"(pix),"r"((int)line_size),
-  "r"((int)line_size*3),"m"(*p)
-: "$10","memory"
+"ldc1   %[ftmp0],   0x00(%[block])  \n\t"
+"ldc1   %[ftmp1],   0x08(%[block])  \n\t"
+"ldc1   %[ftmp2],   0x10(%[block])  \n\t"
+"ldc1   %[ftmp3],   0x18(%[block])  \n\t"
+"ldc1   %[ftmp4],   0x20(%[block])  \n\t"
+"ldc1   %[ftmp5],   0x28(%[block])  \n\t"
+"ldc1   %[ftmp6],   0x30(%[block])  \n\t"
+"ldc1   %[ftmp7],   0x38(%[block])  \n\t"
+PTR_ADDU   "%[addr0],   %[pixels],  %[line_size]\n\t"
+"packushb   %[ftmp0],   %[ftmp0],   %[ftmp1]\n\t"
+"packushb   %[ftmp2],   %[ftmp2],   %[ftmp3]\n\t"
+"packushb   %[ftmp4],   %[ftmp4],   %[ftmp5]\n\t"
+"packushb   %[ftmp6],   %[ftmp6],   %[ftmp7]\n\t"
+"sdc1   %[ftmp0],   0x00(%[pixels]) \n\t"
+"sdc1   %[ftmp2],   0x00(%[addr0])  \n\t"
+"gssdxc1%[ftmp4],   0x00(%[addr0],  %[line_size])   \n\t"
+"gssdxc1%[ftmp6],   0x00(%[pixels], %[line_sizex3]) \n\t"
+: [ftmp0]"=&f"(ftmp[0]),[ftmp1]"=&f"(ftmp[1]),
+  [ftmp2]"=&f"(ftmp[2]),[ftmp3]"=&f"(ftmp[3]),
+  [ftmp4]"=&f"(ftmp[4]),[ftmp5]"=&f"(ftmp[5]),
+  [ftmp6]"=&f"(ftmp[6]),[ftmp7]"=&f"(ftmp[7]),
+  [addr0]"=&r"(addr[0]),
+  [pixels]"+&r"(pixels)
+: [line_size]"r"((mips_reg)line_size),
+  [line_sizex3]"r"((mips_reg)(line_size*3)),
+  [block]"r"(block)
+: "memory"
 );
 
-pix += line_size*4;
-p += 32;
+pixels += line_size*4;
+block += 32;
 
 __asm__ volatile (
-"ldc1 $f0, 0+%3 \r\n"
-"ldc1 $f2, 8+%3 \r\n"
-"ldc1 $f4, 16+%3\r\n"
-"ldc1 $f6, 24+%3\r\n"
-"ldc1 $f8, 32+%3\r\n"
-"ldc1 $f10, 40+%3   \r\n"
-"ldc1 $f12, 48+%3   \r\n"
-"ldc1 $f14, 56+%3   \r\n"
-"dadd $10, %0, %1   \r\n"
-"packushb $f0, $f0, $f2 \r\n"
-"packushb $f4, $f4, $f6 \r\n"
-"packushb $f8, $f8, $f10\r\n"
-"packushb $f12, $f12, $f14  \r\n"
-"sdc1 $f0, 0(%0)\r\n"
-"sdc1 $f4, 0($10)   \r\n"
-"gssdxc1 $f8, 0($10, %1)\r\n"
-"gssdxc1 $f12, 0(%0, %2)\r\n"
-::"r"(pix),"r"((int)line_size),
-  "r"((int)line_size*3),"m"(*p)
-: "$10","memory"
+"ldc1   %[ftmp0],   0x00(%[block])  \n\t"
+"ldc1   %[ftmp1],   0x08(%[block])  \n\t"
+"ldc1   %[ftmp2],   0x10(%[block])  \n\t"
+"ldc1   %[ftmp3],   0x18(%[block])  \n\t"
+"ldc1  

[FFmpeg-devel] [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2

2016-05-13 Thread
From 9e5ade4c99eb23f72a89f0054f8b5626c9acceb3 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 13 May 2016 14:30:10 +0800
Subject: [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2


---
 libavcodec/mips/blockdsp_mmi.c | 195 ++---
 1 file changed, 103 insertions(+), 92 deletions(-)


diff --git a/libavcodec/mips/blockdsp_mmi.c b/libavcodec/mips/blockdsp_mmi.c
index 63eaf69..6eb2bd7 100644
--- a/libavcodec/mips/blockdsp_mmi.c
+++ b/libavcodec/mips/blockdsp_mmi.c
@@ -22,126 +22,137 @@
  */
 
 #include "blockdsp_mips.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h)
 {
+double ftmp[1];
+
 __asm__ volatile (
-"move $8, %3\r\n"
-"move $9, %0\r\n"
-"dmtc1 %1, $f2  \r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7($9) \r\n"
-"gssdrc1 $f2, 0($9) \r\n"
-"gssdlc1 $f2, 15($9)\r\n"
-"gssdrc1 $f2, 8($9) \r\n"
-"daddi $8, $8, -1   \r\n"
-"daddu $9, $9, %2   \r\n"
-"bnez $8, 1b\r\n"
-::"r"(block),"r"(value),"r"(line_size),"r"(h)
-: "$8","$9"
+"mtc1   %[value],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gssdlc1%[ftmp0],   0x07(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x00(%[block])  \n\t"
+PTR_ADDI"%[h],  %[h],   -0x01   \n\t"
+"gssdlc1%[ftmp0],   0x0f(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x08(%[block])  \n\t"
+PTR_ADDU   "%[block],   %[block],   %[line_size]\n\t"
+"bnez   %[h],   1b  \n\t"
+: [block]"+&r"(block),  [h]"+&r"(h),
+  [ftmp0]"=&f"(ftmp[0])
+: [value]"r"(value),[line_size]"r"((mips_reg)line_size)
+: "memory"
 );
 }
 
 void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h)
 {
+double ftmp0;
+
 __asm__ volatile (
-"move $8, %3\r\n"
-"move $9, %0\r\n"
-"dmtc1 %1, $f2  \r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7($9) \r\n"
-"gssdrc1 $f2, 0($9) \r\n"
-"daddi $8, $8, -1   \r\n"
-"daddu $9, $9, %2   \r\n"
-"bnez $8, 1b\r\n"
-::"r"(block),"r"(value),"r"(line_size),"r"(h)
-: "$8","$9"
+"mtc1   %[value],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gssdlc1%[ftmp0],   0x07(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x00(%[block])  \n\t"
+PTR_ADDI   "%[h],   %[h],   -0x01   \n\t"
+PTR_ADDU   "%[block],   %[block],   %[line_size]\n\t"
+"bnez   %[h],   1b  \n\t"
+: [block]"+&r"(block),  [h]"+&r"(h),
+  [ftmp0]"=&f"(ftmp0)
+: [value]"r"(value),[line_size]"r"((mips_reg)line_size)
+: "memory"
 );
 }
 
 void ff_clear_block_mmi(int16_t *block)
 {
+double ftmp[2];
+
 __asm__ volatile (
-"xor $f0, $f0, $f0  \r\n"
-"xor $f2, $f2, $f2  \r\n"
-"gssqc1 $f0, $f2,   0(%0)   \r\n"
-"gssqc1 $f0, $f2,  16(%0)   \r\n"
-"gssqc1 $f0, $f2,  32(%0)   \r\n"
-"gssqc1 $f0, $f2,  48(%0)   \r\n"
-"gssqc1 $f0, $f2,  64(%0)   \r\n"
-"gssqc1 $f0, $f2,  80(%0)   \r\n"
-"gssqc1 $f0, $f2,  96(%0)   \r\n"
-"gssqc1 $f0, $f2, 112(%0)   \r\n"
-::"r"(block)
+"xor%[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"xor%[ftmp1],   %[ftmp1],  

[FFmpeg-devel] [PATCH 10/11] avcodec/mips: loongson optimize pixblockdsp with mmi v2

2016-05-13 Thread
From f5f4ee7744c5354e2f76743d22ac81b1341bb7fb Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 13 May 2016 14:41:33 +0800
Subject: [PATCH 10/11] avcodec/mips: loongson optimize pixblockdsp with mmi v2


---
 libavcodec/mips/pixblockdsp_mmi.c | 101 ++
 1 file changed, 60 insertions(+), 41 deletions(-)


diff --git a/libavcodec/mips/pixblockdsp_mmi.c 
b/libavcodec/mips/pixblockdsp_mmi.c
index 30631d8..3ff84c0 100644
--- a/libavcodec/mips/pixblockdsp_mmi.c
+++ b/libavcodec/mips/pixblockdsp_mmi.c
@@ -22,58 +22,77 @@
  */
 
 #include "pixblockdsp_mips.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
 ptrdiff_t line_size)
 {
+double ftmp[6];
+mips_reg tmp[2];
+
 __asm__ volatile (
-"move $8, $0\n\t"
-"xor $f0, $f0, $f0  \n\t"
-"1: \n\t"
-"gsldlc1 $f2, 7(%1) \n\t"
-"gsldrc1 $f2, 0(%1) \n\t"
-"punpcklbh $f4, $f2, $f0\n\t"
-"punpckhbh $f6, $f2, $f0\n\t"
-"gssdxc1 $f4, 0(%0, $8) \n\t"
-"gssdxc1 $f6, 8(%0, $8) \n\t"
-"daddiu $8, $8, 16  \n\t"
-"daddu %1, %1, %2   \n\t"
-"daddi %3, %3, -1   \n\t"
-"bnez %3, 1b\n\t"
-::"r"((uint8_t *)block),"r"(pixels),"r"(line_size),"r"(8)
-: "$8","memory"
+"li %[tmp1],0x08\n\t"
+"move   %[tmp0],$0  \n\t"
+"xor%[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gsldlc1%[ftmp1],   0x07(%[pixels]) \n\t"
+"gsldrc1%[ftmp1],   0x00(%[pixels]) \n\t"
+"punpcklbh  %[ftmp2],   %[ftmp1],   %[ftmp0]\n\t"
+"punpckhbh  %[ftmp5],   %[ftmp1],   %[ftmp0]\n\t"
+"gssdxc1%[ftmp2],   0x00(%[block],  %[tmp0])\n\t"
+"gssdxc1%[ftmp5],   0x08(%[block],  %[tmp0])\n\t"
+PTR_ADDI   "%[tmp1],%[tmp1],   -0x01\n\t"
+PTR_ADDIU  "%[tmp0],%[tmp0],0x10\n\t"
+PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]\n\t"
+"bnez   %[tmp1],1b  \n\t"
+: [ftmp0]"=&f"(ftmp[0]),[ftmp1]"=&f"(ftmp[1]),
+  [ftmp2]"=&f"(ftmp[2]),[ftmp3]"=&f"(ftmp[3]),
+  [ftmp4]"=&f"(ftmp[4]),[ftmp5]"=&f"(ftmp[5]),
+  [tmp0]"=&r"(tmp[0]),  [tmp1]"=&r"(tmp[1]),
+  [pixels]"+&r"(pixels)
+: [block]"r"((mips_reg)block),  [line_size]"r"((mips_reg)line_size)
+: "memory"
 );
 }
 
 void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
 const uint8_t *src2, int stride)
 {
+double ftmp[5];
+mips_reg tmp[1];
+
 __asm__ volatile (
-"dli $2, 8 \n\t"
-"xor $f14, $f14, $f14  \n\t"
-"1:\n\t"
-"gsldlc1 $f0, 7(%1)\n\t"
-"gsldrc1 $f0, 0(%1)\n\t"
-"or $f2, $f0, $f0  \n\t"
-"gsldlc1 $f4, 7(%2)\n\t"
-"gsldrc1 $f4, 0(%2)\n\t"
-"or $f6, $f4, $f4  \n\t"
-"punpcklbh $f0, $f0, $f14  \n\t"
-"punpckhbh $f2, $f2, $f14  \n\t"
-"punpcklbh $f4, $f4, $f14  \n\t"
-"punpckhbh $f6, $f6, $f14  \n\t"
-"psubh $f0, $f0, $f4   \n\t"
-"psubh $f2, $f2, $f6   \n\t"
-"gssdlc1 $f0, 7(%0)\n\t"
-"gssdrc1 $f0, 0(%0)\n\t"
-"gssdlc1 $f2, 15(%0)   \n\t"
-"gssdrc1 $f2, 8(%0)\n\t"
-"daddi %0, %0, 16  \n\t"
-"daddu %1, %1, %3  \n\t"
-"daddu %2, %2, %3  \n\t"
-"daddi $2, $2, -1  \n\t"
-"bgtz $2, 1b   \n\t"
-::"r"(block),"r"(src1),"r"(src2),"r"(stride)
-: "$2","memory"
+"li %[tmp0],0x08\n\t"
+"xor%[ftmp4],   %[ftmp4],   %[ftmp4]\n\t"
+"1: \n\t"
+"gsldlc1%[ftmp0],   0x07(%[src1])   \n\t"
+"gsldrc1%[ftmp0],   0x00(%[src1])   \n\t"
+"or %[ftmp1],   %[ftmp0],   %[ftmp0]\n\t"
+"gsldlc1%[ftmp2],   0x07(%[src2])   \n\t"
+"gsldrc1%[ftmp2],  

[FFmpeg-devel] [PATCH 08/11] avcodec/mips: loongson optimize h264pred with mmi v3

2016-05-13 Thread
From 0a8c479860dad3220eb00e057f200e21c0521899 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Thu, 12 May 2016 01:45:34 +0800
Subject: [PATCH 08/11] avcodec/mips: loongson optimize h264pred with mmi v3


---
 libavcodec/mips/constants.c  |1 +
 libavcodec/mips/constants.h  |1 +
 libavcodec/mips/h264pred_init_mips.c |5 +-
 libavcodec/mips/h264pred_mmi.c   | 1498 +++---
 4 files changed, 859 insertions(+), 646 deletions(-)


diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
index f8130d9..3503fad 100644
--- a/libavcodec/mips/constants.c
+++ b/libavcodec/mips/constants.c
@@ -24,6 +24,7 @@
 #include "constants.h"
 
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_1) =   {0x0001000100010001ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_2) =   {0x0002000200020002ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) =   {0x0003000300030003ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) =   {0x0004000400040004ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) =   {0x0005000500050005ULL};
diff --git a/libavcodec/mips/constants.h b/libavcodec/mips/constants.h
index 0a4effd..19d2d73 100644
--- a/libavcodec/mips/constants.h
+++ b/libavcodec/mips/constants.h
@@ -25,6 +25,7 @@
 #include 
 
 extern const uint64_t ff_pw_1;
+extern const uint64_t ff_pw_2;
 extern const uint64_t ff_pw_3;
 extern const uint64_t ff_pw_4;
 extern const uint64_t ff_pw_5;
diff --git a/libavcodec/mips/h264pred_init_mips.c 
b/libavcodec/mips/h264pred_init_mips.c
index 93a2409..c33d8f7 100644
--- a/libavcodec/mips/h264pred_init_mips.c
+++ b/libavcodec/mips/h264pred_init_mips.c
@@ -115,23 +115,22 @@ static av_cold void h264_pred_init_mmi(H264PredContext 
*h, int codec_id,
 h->pred8x8l [TOP_DC_PRED] = ff_pred8x8l_top_dc_8_mmi;
 h->pred8x8l [DC_PRED] = ff_pred8x8l_dc_8_mmi;
 
+#if ARCH_MIPS64
 switch (codec_id) {
 case AV_CODEC_ID_SVQ3:
 h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmi;
-;
 break;
 case AV_CODEC_ID_RV40:
 h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmi;
-;
 break;
 case AV_CODEC_ID_VP7:
 case AV_CODEC_ID_VP8:
-;
 break;
 default:
 h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmi;
 break;
 }
+#endif
 
 if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
 if (chroma_format_idc == 1) {
diff --git a/libavcodec/mips/h264pred_mmi.c b/libavcodec/mips/h264pred_mmi.c
index e949d11..bb795a1 100644
--- a/libavcodec/mips/h264pred_mmi.c
+++ b/libavcodec/mips/h264pred_mmi.c
@@ -23,87 +23,134 @@
  */
 
 #include "h264pred_mips.h"
+#include "libavcodec/bit_depth_template.c"
+#include "libavutil/mips/asmdefs.h"
+#include "constants.h"
 
 void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
+double ftmp[2];
+uint64_t tmp[1];
+
 __asm__ volatile (
-"dli $8, 16 \r\n"
-"gsldlc1 $f2, 7(%[srcA])\r\n"
-"gsldrc1 $f2, 0(%[srcA])\r\n"
-"gsldlc1 $f4, 15(%[srcA])   \r\n"
-"gsldrc1 $f4, 8(%[srcA])\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7(%[src]) \r\n"
-"gssdrc1 $f2, 0(%[src]) \r\n"
-"gssdlc1 $f4, 15(%[src])\r\n"
-"gssdrc1 $f4, 8(%[src]) \r\n"
-"daddu %[src], %[src], %[stride]\r\n"
-"daddi $8, $8, -1   \r\n"
-"bnez $8, 1b\r\n"
-: [src]"+&r"(src)
-: [stride]"r"(stride),[srcA]"r"(src-stride)
-: "$8","$f2","$f4"
+"dli%[tmp0],0x08\n\t"
+"gsldlc1%[ftmp0],   0x07(%[srcA])   \n\t"
+"gsldrc1%[ftmp0],   0x00(%[srcA])   \n\t"
+"gsldlc1%[ftmp1],   0x0f(%[srcA])   \n\t"
+"gsldrc1%[ftmp1],   0x08(%[srcA])   \n\t"
+"1: \n\t"
+"gssdlc1%[ftmp0],   0x07(%[src])\n\t"
+"gssdrc1%[ftmp0],   0x00(%[src])\n\t"
+"gssdlc1%[ftmp1],   0x0f(%[src])\n\t"
+"gssdrc1%[ftmp1],   0x08(%[src])\n\t"
+PTR_ADDU   "%[src], %[src], %[stride]   \n\t"
+"gssdlc1%[ftmp0],   0x07(%[src])\n\t"
+"gssdrc1%[ftmp0],   0x00(%[src])\n\t"
+"gssdlc1%[ftmp1],   0x0f(%[src])\n\t"
+"gssdrc1%[ftmp1],   0x08(%[s

[FFmpeg-devel] [PATCH 06/11] avcodec/mips: loongson optimize hpeldsp with mmi v1

2016-05-13 Thread
From 8212b9b5beecb6e2ba3f05a2a4c7f1704220c911 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Thu, 12 May 2016 01:59:03 +0800
Subject: [PATCH 06/11] avcodec/mips: loongson optimize hpeldsp with mmi v1


---
 libavcodec/mips/Makefile|1 +
 libavcodec/mips/hpeldsp_init_mips.c |   49 ++
 libavcodec/mips/hpeldsp_mips.h  |   87 +++
 libavcodec/mips/hpeldsp_mmi.c   | 1257 +++
 4 files changed, 1394 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index f66017a..3c43600 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -77,3 +77,4 @@ MMI-OBJS-$(CONFIG_MPEG4_DECODER)  += 
mips/xvid_idct_mmi.o
 MMI-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_mmi.o
 MMI-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_mmi.o
 MMI-OBJS-$(CONFIG_H264QPEL)   += mips/h264qpel_mmi.o
+MMI-OBJS-$(CONFIG_HPELDSP)+= mips/hpeldsp_mmi.o
diff --git a/libavcodec/mips/hpeldsp_init_mips.c 
b/libavcodec/mips/hpeldsp_init_mips.c
index 82f2310..363a045 100644
--- a/libavcodec/mips/hpeldsp_init_mips.c
+++ b/libavcodec/mips/hpeldsp_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ * Copyright (c) 2016 Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -65,9 +66,57 @@ static void ff_hpeldsp_init_msa(HpelDSPContext *c, int flags)
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static void ff_hpeldsp_init_mmi(HpelDSPContext *c, int flags)
+{
+c->put_pixels_tab[0][0] = ff_put_pixels16_8_mmi;
+c->put_pixels_tab[0][1] = ff_put_pixels16_x2_8_mmi;
+c->put_pixels_tab[0][2] = ff_put_pixels16_y2_8_mmi;
+c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_8_mmi;
+
+c->put_pixels_tab[1][0] = ff_put_pixels8_8_mmi;
+c->put_pixels_tab[1][1] = ff_put_pixels8_x2_8_mmi;
+c->put_pixels_tab[1][2] = ff_put_pixels8_y2_8_mmi;
+c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_8_mmi;
+
+c->put_pixels_tab[2][0] = ff_put_pixels4_8_mmi;
+c->put_pixels_tab[2][1] = ff_put_pixels4_x2_8_mmi;
+c->put_pixels_tab[2][2] = ff_put_pixels4_y2_8_mmi;
+c->put_pixels_tab[2][3] = ff_put_pixels4_xy2_8_mmi;
+
+c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_8_mmi;
+c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_8_mmi;
+c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_8_mmi;
+c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_8_mmi;
+
+c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_8_mmi;
+c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_8_mmi;
+c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_8_mmi;
+c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_8_mmi;
+
+c->avg_pixels_tab[0][0] = ff_avg_pixels16_8_mmi;
+c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_8_mmi;
+c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_8_mmi;
+c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_8_mmi;
+
+c->avg_pixels_tab[1][0] = ff_avg_pixels8_8_mmi;
+c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_8_mmi;
+c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_8_mmi;
+c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_8_mmi;
+
+c->avg_pixels_tab[2][0] = ff_avg_pixels4_8_mmi;
+c->avg_pixels_tab[2][1] = ff_avg_pixels4_x2_8_mmi;
+c->avg_pixels_tab[2][2] = ff_avg_pixels4_y2_8_mmi;
+c->avg_pixels_tab[2][3] = ff_avg_pixels4_xy2_8_mmi;
+}
+#endif  // #if HAVE_MMI
+
 void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags)
 {
 #if HAVE_MSA
 ff_hpeldsp_init_msa(c, flags);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+ff_hpeldsp_init_mmi(c, flags);
+#endif  // #if HAVE_MMI
 }
diff --git a/libavcodec/mips/hpeldsp_mips.h b/libavcodec/mips/hpeldsp_mips.h
index f4ab53e..f527c1d 100644
--- a/libavcodec/mips/hpeldsp_mips.h
+++ b/libavcodec/mips/hpeldsp_mips.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ * Copyright (c) 2016 Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -84,4 +85,90 @@ void ff_avg_pixels4_y2_msa(uint8_t *block, const uint8_t 
*pixels,
 void ff_avg_pixels4_xy2_msa(uint8_t *block, const uint8_t *pixels,
 ptrdiff_t line_size, int32_t h);
 
+void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
+const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+int h);
+void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
+const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+int h);
+void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
+const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+int h);
+void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
+const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+int h);
+void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
+const uint8_t *src2, int dst_stride, int src_stride1, int src_s

[FFmpeg-devel] [PATCH 05/11] avcodec/mips: loongson optimize mpegvideo with mmi v2

2016-05-13 Thread
From cb8887caf25b300ef2f307f930593e9edf394977 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Thu, 12 May 2016 01:48:03 +0800
Subject: [PATCH 05/11] avcodec/mips: loongson optimize mpegvideo with mmi v2


---
 libavcodec/mips/mpegvideo_mmi.c | 667 +---
 1 file changed, 358 insertions(+), 309 deletions(-)


diff --git a/libavcodec/mips/mpegvideo_mmi.c b/libavcodec/mips/mpegvideo_mmi.c
index 94781e6..450a18c 100644
--- a/libavcodec/mips/mpegvideo_mmi.c
+++ b/libavcodec/mips/mpegvideo_mmi.c
@@ -23,11 +23,14 @@
  */
 
 #include "mpegvideo_mips.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
 int n, int qscale)
 {
 int64_t level, qmul, qadd, nCoeffs;
+double ftmp[6];
+mips_reg addr[1];
 
 qmul = qscale << 1;
 av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
@@ -49,48 +52,50 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, 
int16_t *block,
 nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
 
 __asm__ volatile (
-"xor $f12, $f12, $f12   \r\n"
-"lwc1 $f12, %1  \n\r"
-"xor $f10, $f10, $f10   \r\n"
-"lwc1 $f10, %2  \r\n"
-"xor $f14, $f14, $f14   \r\n"
-"packsswh $f12, $f12, $f12  \r\n"
-"packsswh $f12, $f12, $f12  \r\n"
-"packsswh $f10, $f10, $f10  \r\n"
-"packsswh $f10, $f10, $f10  \r\n"
-"psubh $f14, $f14, $f10 \r\n"
-"xor $f8, $f8, $f8  \r\n"
-".p2align 4 \r\n"
-"1: \r\n"
-"daddu $8, %0, %3   \r\n"
-"gsldlc1 $f0, 7($8) \r\n"
-"gsldrc1 $f0, 0($8) \r\n"
-"gsldlc1 $f2, 15($8)\r\n"
-"gsldrc1 $f2, 8($8) \r\n"
-"mov.d $f4, $f0 \r\n"
-"mov.d $f6, $f2 \r\n"
-"pmullh $f0, $f0, $f12  \r\n"
-"pmullh $f2, $f2, $f12  \r\n"
-"pcmpgth $f4, $f4, $f8  \r\n"
-"pcmpgth $f6, $f6, $f8  \r\n"
-"xor $f0, $f0, $f4  \r\n"
-"xor $f2, $f2, $f6  \r\n"
-"paddh $f0, $f0, $f14   \r\n"
-"paddh $f2, $f2, $f14   \r\n"
-"xor $f4, $f4, $f0  \r\n"
-"xor $f6, $f6, $f2  \r\n"
-"pcmpeqh $f0, $f0, $f14 \r\n"
-"pcmpeqh $f2, $f2, $f14 \r\n"
-"pandn $f0, $f0, $f4\r\n"
-"pandn $f2, $f2, $f6\r\n"
-"gssdlc1 $f0, 7($8) \r\n"
-"gssdrc1 $f0, 0($8) \r\n"
-"gssdlc1 $f2, 15($8)\r\n"
-"gssdrc1 $f2, 8($8) \r\n"
-"addi %3, %3, 16\r\n"
-"blez %3, 1b\r\n"
-::"r"(block+nCoeffs),"m"(qmul),"m"(qadd),"r"(2*(-nCoeffs))
-:"$8","memory"
+"xor%[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"packsswh   %[qmul],%[qmul],%[qmul] \n\t"
+"packsswh   %[qmul],%[qmul],%[qmul] \n\t"
+"packsswh   %[qadd],%[qadd],%[qadd] \n\t"
+"packsswh   %[qadd],%[qadd],%[qadd] \n\t"
+"psubh  %[ftmp0],   %[ftmp0],   %[qadd] \n\t"
+"xor%[ftmp5],   %[ftmp5],   %[ftmp5]\n\t"
+".p2align   4   \n\t"
+"1: \n\t"
+PTR_ADDU   "%[addr0],   %[block],   %[nCoeffs]  \n\t"
+"gsldlc1%[ftmp1],   0x07(%[addr0])  \n\t"
+"gsldrc1%[ftmp1],   0x00(%[addr0])  \n\t"
+"gsldlc1%[ftmp2],   0x0f(%[addr0])  \n\t"
+"gsldrc1%[ftmp2],   0x08(%[addr0])  \n\t"
+"mov.d  %[ftmp3],   %[ftmp1]\n\t"
+"mov.d  %[ftmp4],   %[ftmp2]\n\t"
+"pmullh %[ftmp1],   %[ftmp1],   %[qmul] \n\t"
+"pmullh %[ftmp2],   %[ftmp2],   %[qmul] \n\t"
+"pcmpgth%[ftmp3],   %[ftmp3],   %[ftmp5]\n\t"
+"pcmpgth%[ftmp4],   %[ftmp4],   %[ftmp5]\n\t"
+"xor%[ftmp1],   %[ftmp1],   %[ftmp3]\n\t"
+"xor%[ftmp2],   %[ftmp2],   %[ftmp4]\n\t"
+"paddh  %[ftmp1],   %[ftmp1],   %[ftmp0]\n\t"
+"paddh  %[ftmp2],   %[ftmp2],   %[ftmp0]\n\t"
+"xor%[ftmp3],   

[FFmpeg-devel] [PATCH 04/11] avcodec/mips: loongson optimize h264chroma with mmi v2

2016-05-13 Thread
From 157e001724cdb1461ecfff2f02d0a7b0d6335943 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Sat, 7 May 2016 14:20:49 +0800
Subject: [PATCH 04/11] avcodec/mips: loongson optimize h264chroma with mmi v2


---
 libavcodec/mips/h264chroma_mmi.c | 1123 +-
 1 file changed, 629 insertions(+), 494 deletions(-)


diff --git a/libavcodec/mips/h264chroma_mmi.c b/libavcodec/mips/h264chroma_mmi.c
index ef29476..3dd123d 100644
--- a/libavcodec/mips/h264chroma_mmi.c
+++ b/libavcodec/mips/h264chroma_mmi.c
@@ -23,6 +23,8 @@
  */
 
 #include "h264chroma_mips.h"
+#include "constants.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
 int h, int x, int y)
@@ -32,171 +34,177 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t 
*src, int stride,
 const int C = (8 - x) * y;
 const int D = x * y;
 const int E = B + C;
-int i;
-
-av_assert2(x<8 && y<8 && x>=0 && y>=0);
+double ftmp[10];
+uint64_t tmp[1];
+mips_reg addr[1];
 
 if (D) {
-for (i=0; i=0 && y>=0);
+double ftmp[10];
+uint64_t tmp[1];
+mips_reg addr[1];
 
 if (D) {
-for (i=0; i=0 && y>=0);
+double ftmp[8];
+uint64_t tmp[1];
+mips_reg addr[1];
+uint64_t low32;
 
 if (D) {
-for (i=0; i=0 && y>=0);
+const int E = B + C;
+double ftmp[8];
+uint64_t tmp[1];
+mips_reg addr[1];
+uint64_t low32;
 
 if (D) {
-for (i=0; ihttp://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 02/11] avutil/mips: header asmdefs.h add some PTR_ macros for loongson

2016-05-13 Thread
From f4c3f97c7dc130433b7acf674ff03d7500c7236a Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Sat, 7 May 2016 13:56:17 +0800
Subject: [PATCH 02/11] avutil/mips: header asmdefs.h add some PTR_ macros for
 loongson


---
 libavutil/mips/asmdefs.h | 12 
 1 file changed, 12 insertions(+)


diff --git a/libavutil/mips/asmdefs.h b/libavutil/mips/asmdefs.h
index fdf82a0..7481199 100644
--- a/libavutil/mips/asmdefs.h
+++ b/libavutil/mips/asmdefs.h
@@ -28,19 +28,31 @@
 #define AVUTIL_MIPS_ASMDEFS_H
 
 #if defined(_ABI64) && _MIPS_SIM == _ABI64
+# define mips_reg   int64_t
 # define PTRSIZE" 8 "
 # define PTRLOG " 3 "
 # define PTR_ADDU   "daddu "
 # define PTR_ADDIU  "daddiu "
+# define PTR_ADDI   "daddi "
 # define PTR_SUBU   "dsubu "
 # define PTR_L  "ld "
+# define PTR_S  "sd "
+# define PTR_SRA"dsra "
+# define PTR_SRL"dsrl "
+# define PTR_SLL"dsll "
 #else
+# define mips_reg   int32_t
 # define PTRSIZE" 4 "
 # define PTRLOG " 2 "
 # define PTR_ADDU   "addu "
 # define PTR_ADDIU  "addiu "
+# define PTR_ADDI   "addi "
 # define PTR_SUBU   "subu "
 # define PTR_L  "lw "
+# define PTR_S  "sw "
+# define PTR_SRA"sra "
+# define PTR_SRL"srl "
+# define PTR_SLL"sll "
 #endif
 
 #endif /* AVCODEC_MIPS_ASMDEFS_H */
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 01/11] configure: remove option -fno-expensive-optimizations for loongson

2016-05-13 Thread
From c0f8c4e1bbe39216ba8f6df118703ad2c72a45e1 Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Thu, 12 May 2016 17:45:08 +0800
Subject: [PATCH 01/11] configure: remove option -fno-expensive-optimizations
 for loongson


1. gcc-4.9.3-5.fc21.loongson.2.mips64el in Fedora21 for loongson has
fix the bug in commit 2c34389551b6ba3d90f8783806c337e387e603cd.
2. the latest release version of Fedora for loongson is
http://mirror.lemote.com/fedora/live/Fedora-MATE-Live-2.iso
---
 configure | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)


diff --git a/configure b/configure
index df82eb2..0b9fdf3 100755
--- a/configure
+++ b/configure
@@ -4292,13 +4292,13 @@ elif enabled mips; then
 disable aligned_stack
 case $cpu in
 loongson3*)
-cpuflags="-march=loongson3a -mhard-float 
-fno-expensive-optimizations"
+cpuflags="-march=loongson3a -mhard-float"
 ;;
 loongson2e)
-cpuflags="-march=loongson2e -mhard-float 
-fno-expensive-optimizations"
+cpuflags="-march=loongson2e -mhard-float"
 ;;
 loongson2f)
-cpuflags="-march=loongson2f -mhard-float 
-fno-expensive-optimizations"
+cpuflags="-march=loongson2f -mhard-float"
 ;;
 esac
 ;;
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec: loongson optimize h264dsp idct and loop filter with mmi

2015-10-15 Thread
Hi,
i am making rpm package on fedora21-loongson
but find out the master contain this patch, but neither in release/2.8 or n2.8.1


git log master:


commit f7e9b9d260f9c7f065791c70d9ca6b89b5164742
Author: 周晓勇 
Date:   Wed Sep 2 18:02:24 2015 +0800


avcodec: loongson optimize h264dsp idct and loop filter with mmi <---

Change-Id: Ic87fb8f5cd22a502ff9dbbc5a5a8ea97cfc8a1dd
Signed-off-by: ZhouXiaoyong 
Signed-off-by: Michael Niedermayer 



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] configure: loongson revert expensive optimizations in gcc O3 optimization

2015-09-09 Thread
Sorry, there is no need to test and verify this patch.
my colleague do a wrong test with a wrong configure.


> -原始邮件-
> 发件人: "周晓勇" 
> 发送时间: 2015年9月9日 星期三
> 收件人: ffmpeg-devel 
> 抄送: 
> 主题: [FFmpeg-devel] [PATCH] configure: loongson revert expensive optimizations 
> in gcc O3 optimization
> 
> Hi, Michael
> 
> 
> the yum repo maybe changed since last time i offered the fedora21.
> you could get some resource you may need from this address:
> http://mirror.lemote.com/fedora/
> http://www.loongnix.org/cgit
> 
> 
> ---
> From f751d5e88a3f1dac956983d4df49e6797bd33e73 Mon Sep 17 00:00:00 2001
> From: ZhouXiaoyong 
> Date: Wed, 9 Sep 2015 16:00:23 +0800
> Subject: [PATCH] configure: loongson revert expensive optimizations in gcc O3
>  optimization
> 
> 
> The failure of fate-acodec-dca or dca2 tests are caused by ccache.
> At the same time enable ccache and expensive-optimizations, dca parser
> would be compiled incorrectly. So there is no bug in loongson gcc-4.9.x.
> 

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] configure: loongson revert expensive optimizations in gcc O3 optimization

2015-09-09 Thread
Hi, Michael


the yum repo maybe changed since last time i offered the fedora21.
you could get some resource you may need from this address:
http://mirror.lemote.com/fedora/
http://www.loongnix.org/cgit


---
From f751d5e88a3f1dac956983d4df49e6797bd33e73 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Wed, 9 Sep 2015 16:00:23 +0800
Subject: [PATCH] configure: loongson revert expensive optimizations in gcc O3
 optimization


The failure of fate-acodec-dca or dca2 tests are caused by ccache.
At the same time enable ccache and expensive-optimizations, dca parser
would be compiled incorrectly. So there is no bug in loongson gcc-4.9.x.


Signed-off-by: ZhouXiaoyong 
---
 configure | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)


diff --git a/configure b/configure
index cd0c22a..7d5dae7 100755
--- a/configure
+++ b/configure
@@ -4070,13 +4070,13 @@ elif enabled mips; then
 disable aligned_stack
 case $cpu in
 loongson3*)
-cpuflags="-march=loongson3a -mhard-float 
-fno-expensive-optimizations"
+cpuflags="-march=loongson3a -mhard-float"
 ;;
 loongson2e)
-cpuflags="-march=loongson2e -mhard-float 
-fno-expensive-optimizations"
+cpuflags="-march=loongson2e -mhard-float"
 ;;
 loongson2f)
-cpuflags="-march=loongson2f -mhard-float 
-fno-expensive-optimizations"
+cpuflags="-march=loongson2f -mhard-float"
 ;;
 esac
 ;;
-- 
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/3] avcodec: loongson delete invalid simple idct put and add optimization

2015-09-05 Thread



> -原始邮件-
> 发件人: "周晓勇" 
> 发送时间: 2015年9月3日 星期四
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 1/3] avcodec: loongson delete invalid simple 
> idct put and add optimization
> 
> 
> 
> 
> > -原始邮件-
> > 发件人: "Michael Niedermayer" 
> > 发送时间: 2015年9月2日 星期三
> > 收件人: "FFmpeg development discussions and patches" 
> > 抄送: 
> > 主题: Re: [FFmpeg-devel] [PATCH 1/3] avcodec: loongson delete invalid simple 
> > idct put and add optimization
> > 
> > On Wed, Sep 02, 2015 at 06:00:24PM +0800, 周晓勇 wrote:
> > > From e7dc62a7edbf985268908adf942e838548c09ef0 Mon Sep 17 00:00:00 2001
> > > From: ZhouXiaoyong 
> > > Date: Tue, 1 Sep 2015 09:34:18 +0800
> > > Subject: [PATCH 1/3] avcodec: loongson delete invalid simple idct put and 
> > > add
> > >  optimization
> > 
> > please document in the commit message why they are invalid
> > 
> > [...]
> > -- 

as default idct mathod is simple, so using -idct simple get wrong decode, but 
xvid right
the test video address:
http://loongnix.org/ftp/multimedia/testing/nanocore_720p_24fps_mpeg4_ac3_short.avi

please review the third patch optimized h264 idct and loopfilter, test result 
on loongson-3A1000(which you have now):

1.no patch
frame= 1253 fps= 24 q=-0.0 Lsize= 3805988kB time=00:00:52.20 
bitrate=597196.8kbits/s
video:3805988kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB 
muxing overhead: 0.00%
real0m53.112s
user3m16.336s
sys 0m1.453s

2.with patch
frame= 1253 fps= 27 q=-0.0 Lsize= 3805988kB time=00:00:52.20 
bitrate=597196.8kbits/s
video:3805988kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB 
muxing overhead: 0.00%
real0m47.923s
user2m53.797s
sys 0m1.398s
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/3] avcodec: loongson delete invalid simple idct put and add optimization

2015-09-02 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年9月2日 星期三
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 1/3] avcodec: loongson delete invalid simple 
> idct put and add optimization
> 
> On Wed, Sep 02, 2015 at 06:00:24PM +0800, 周晓勇 wrote:
> > From e7dc62a7edbf985268908adf942e838548c09ef0 Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Tue, 1 Sep 2015 09:34:18 +0800
> > Subject: [PATCH 1/3] avcodec: loongson delete invalid simple idct put and 
> > add
> >  optimization
> 
> please document in the commit message why they are invalid
> 
> [...]
> -- 

i find there is one mpeg4 video decoded incorrectly when using simple idct
but the fate-mpeg4 test couldn't check the error out
i don't know how to add the video into the fate-test
so disable the invalid functions first, and i offer the ftp download address of 
the video later
beijing are holding parade these days and the corp have holidays
i will upload the video three days later
you can checkout the other two patchs

i feel so sad about your decision to resign, Sir.
please don't care about the criticism, history will prove everything !
loongson support you, Sir, and support FFmpeg !
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/3] avcodec: loongson delete invalid simple idct put and add optimization

2015-09-02 Thread
From e7dc62a7edbf985268908adf942e838548c09ef0 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Tue, 1 Sep 2015 09:34:18 +0800
Subject: [PATCH 1/3] avcodec: loongson delete invalid simple idct put and add
 optimization


Change-Id: I23a36c65915f01a1cf20e317c14b8eaaa62958b4
Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/idctdsp_init_mips.c |  2 --
 libavcodec/mips/simple_idct_mmi.c   | 19 +--
 2 files changed, 1 insertion(+), 20 deletions(-)


diff --git a/libavcodec/mips/idctdsp_init_mips.c 
b/libavcodec/mips/idctdsp_init_mips.c
index ac21669..8c26bca 100644
--- a/libavcodec/mips/idctdsp_init_mips.c
+++ b/libavcodec/mips/idctdsp_init_mips.c
@@ -49,8 +49,6 @@ static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, 
AVCodecContext *avctx,
 (avctx->bits_per_raw_sample != 10) &&
 (avctx->bits_per_raw_sample != 12) &&
 (avctx->idct_algo == FF_IDCT_AUTO)) {
-c->idct_put = ff_simple_idct_put_mmi;
-c->idct_add = ff_simple_idct_add_mmi;
 c->idct = ff_simple_idct_mmi;
 c->perm_type = FF_IDCT_PERM_NONE;
 }
diff --git a/libavcodec/mips/simple_idct_mmi.c 
b/libavcodec/mips/simple_idct_mmi.c
index 3c1a4f7..628e13f 100644
--- a/libavcodec/mips/simple_idct_mmi.c
+++ b/libavcodec/mips/simple_idct_mmi.c
@@ -54,7 +54,7 @@ DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
   C3, -C1,   C3, -C1
 };
 
-static void simple_idct_mmi(int16_t *block)
+void ff_simple_idct_mmi(int16_t *block)
 {
 DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
 int16_t * const temp= (int16_t*)align_tmp;
@@ -814,20 +814,3 @@ static void simple_idct_mmi(int16_t *block)
 : "$10","$11"
 );
 }
-
-void ff_simple_idct_mmi(int16_t *block)
-{
-simple_idct_mmi(block);
-}
-
-void ff_simple_idct_put_mmi(uint8_t *dest, int32_t line_size, int16_t *block)
-{
-simple_idct_mmi(block);
-ff_put_pixels_clamped_mmi(block, dest, line_size);
-}
-
-void ff_simple_idct_add_mmi(uint8_t *dest, int32_t line_size, int16_t *block)
-{
-simple_idct_mmi(block);
-ff_add_pixels_clamped_mmi(block, dest, line_size);
-}
-- 
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/3] avcodec: loongson optimize mpeg2 dct unquantize intra and denoise dct

2015-09-02 Thread
From b34ee3cd2569c0982af7bad704352c6db89f00ec Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Tue, 1 Sep 2015 20:26:53 +0800
Subject: [PATCH 2/3] avcodec: loongson optimize mpeg2 dct unquantize intra and
 denoise dct


Change-Id: I2f391ae912a079fb32f0703841dca86358aac72a
Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/mpegvideo_init_mips.c |   5 ++
 libavcodec/mips/mpegvideo_mips.h  |   3 +
 libavcodec/mips/mpegvideo_mmi.c   | 140 ++
 3 files changed, 148 insertions(+)


diff --git a/libavcodec/mips/mpegvideo_init_mips.c 
b/libavcodec/mips/mpegvideo_init_mips.c
index 85a833c..e83aec5 100644
--- a/libavcodec/mips/mpegvideo_init_mips.c
+++ b/libavcodec/mips/mpegvideo_init_mips.c
@@ -37,6 +37,11 @@ static av_cold void dct_unquantize_init_mmi(MpegEncContext 
*s)
 s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_mmi;
 s->dct_unquantize_mpeg1_intra = ff_dct_unquantize_mpeg1_intra_mmi;
 s->dct_unquantize_mpeg1_inter = ff_dct_unquantize_mpeg1_inter_mmi;
+
+if (!(s->avctx->flags & AV_CODEC_FLAG_BITEXACT))
+  s->dct_unquantize_mpeg2_intra = ff_dct_unquantize_mpeg2_intra_mmi;
+
+s->denoise_dct= ff_denoise_dct_mmi;
 }
 #endif /* HAVE_MMI */
 
diff --git a/libavcodec/mips/mpegvideo_mips.h b/libavcodec/mips/mpegvideo_mips.h
index dbcea6a..decacd4c 100644
--- a/libavcodec/mips/mpegvideo_mips.h
+++ b/libavcodec/mips/mpegvideo_mips.h
@@ -31,5 +31,8 @@ void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, 
int16_t *block,
 int n, int qscale);
 void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
 int n, int qscale);
+void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block);
 
 #endif /* MPEGVIDEO_MIPS_H */
diff --git a/libavcodec/mips/mpegvideo_mmi.c b/libavcodec/mips/mpegvideo_mmi.c
index c8b1e16..94781e6 100644
--- a/libavcodec/mips/mpegvideo_mmi.c
+++ b/libavcodec/mips/mpegvideo_mmi.c
@@ -301,3 +301,143 @@ void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, 
int16_t *block,
 :"$8","$10","memory"
 );
 }
+
+void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block)
+{
+const int intra = s->mb_intra;
+int *sum = s->dct_error_sum[intra];
+uint16_t *offset = s->dct_offset[intra];
+
+s->dct_count[intra]++;
+
+__asm__ volatile(
+"xor $f14, $f14, $f14   \r\n"
+"1: \r\n"
+"ldc1 $f4, 0(%[block])  \r\n"
+"xor $f0, $f0, $f0  \r\n"
+"ldc1 $f6, 8(%[block])  \r\n"
+"xor $f2, $f2, $f2  \r\n"
+"pcmpgth $f0, $f0, $f4  \r\n"
+"pcmpgth $f2, $f2, $f6  \r\n"
+"xor $f4, $f4, $f0  \r\n"
+"xor $f6, $f6, $f2  \r\n"
+"psubh $f4, $f4, $f0\r\n"
+"psubh $f6, $f6, $f2\r\n"
+"ldc1 $f12, 0(%[offset])\r\n"
+"mov.d $f8, $f4 \r\n"
+"psubush $f4, $f4, $f12 \r\n"
+"ldc1 $f12, 8(%[offset])\r\n"
+"mov.d $f10, $f6\r\n"
+"psubush $f6, $f6, $f12 \r\n"
+"xor $f4, $f4, $f0  \r\n"
+"xor $f6, $f6, $f2  \r\n"
+"psubh $f4, $f4, $f0\r\n"
+"psubh $f6, $f6, $f2\r\n"
+"sdc1 $f4, 0(%[block])  \r\n"
+"sdc1 $f6, 8(%[block])  \r\n"
+"mov.d $f4, $f8 \r\n"
+"mov.d $f6, $f10\r\n"
+"punpcklhw $f8, $f8, $f14   \r\n"
+"punpckhhw $f4, $f4, $f14   \r\n"
+"punpcklhw $f10, $f10, $f14 \r\n"
+"punpckhhw $f6, $f6, $f14   \r\n"
+"ldc1 $f0, 0(%[sum])\r\n"
+"paddw $f8, $f8, $f0\r\n"
+"ldc1 $f0, 8(%[sum])\r\n"
+"paddw $f4, $f4, $f0\r\n"
+"ldc1 $f0, 16(%[sum])   \r\n"
+"paddw $f10, $f10, $f0  \r\n"
+"ldc1 $f0, 24(%[sum])   \r\n"
+"paddw $f6, $f6, $f0\r\n"
+"sdc1 $f8, 0(%[sum])\r\n"
+"sdc1 $f4, 8(%[sum])\r\n"
+"sdc1 $f10, 16(%[sum])  \r\n"
+"sdc1 $f6, 24(%[sum])   \r\n"
+"daddiu %[block], %[block], 16  \r\n"
+"daddiu %[sum], %[sum], 32  \r\n"
+"daddiu %[offset], %[offset], 16\r\n"
+"dsubu $8, %[block1], %[block]  \r\n"
+"bgtz $8, 1b\r\n"
+: [block]"+r"(block),[sum]"+r"(sum),[offset]"+r"(offset)
+: [block1]"r"(block+64)
+: "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14"
+);
+}
+
+void ff_dct_u

Re: [FFmpeg-devel] [PATCH] avcodec: use looking up crop table method when do clip

2015-08-11 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年8月11日 星期二
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH] avcodec: use looking up crop table method when 
> do clip
> 
> On Tue, Aug 11, 2015 at 10:12:25AM +0800, 周晓勇 wrote:
> > 
> > 
> > 
> > > -原始邮件-
> > > 发件人: "Michael Niedermayer" 
> > > 发送时间: 2015年8月11日 星期二
> > > 收件人: "FFmpeg development discussions and patches" 
> > > 
> > > 抄送: 
> > > 主题: Re: [FFmpeg-devel] [PATCH] avcodec: use looking up crop table method 
> > > when do clip
> > > 
> > > On Fri, Aug 07, 2015 at 05:30:01PM +0800, 周晓勇 wrote:
> > > > based on last h264qpel optimization patch i have pushed
> > > > do i need separate this patch to double? cause one file to change 
> > > > loongson arch
> > > > use looking up crop table method may boost up decode on loongson, and 
> > > > with this patch x86 pass fate too
> > > 
> > > > but i have not tested on other arch
> > > 
> > > as this also changes x86, benchmarks on x86 are required
> > 
> > test on Ubuntu 15.04 and cpu:
> > time ./ffmepg -i 1280x720.mp4 -f rawvideo -an -vframes 4096 -y /dev/null
> 
> where can i find the 1280x720.mp4 file ?

movie download addr:
http://mirror.lemote.com/archls/testing/movies/h264/1280x720.mp4
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: use looking up crop table method when do clip

2015-08-10 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年8月11日 星期二
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH] avcodec: use looking up crop table method when 
> do clip
> 
> On Fri, Aug 07, 2015 at 05:30:01PM +0800, 周晓勇 wrote:
> > based on last h264qpel optimization patch i have pushed
> > do i need separate this patch to double? cause one file to change loongson 
> > arch
> > use looking up crop table method may boost up decode on loongson, and with 
> > this patch x86 pass fate too
> 
> > but i have not tested on other arch
> 
> as this also changes x86, benchmarks on x86 are required

test on Ubuntu 15.04 and cpu:
time ./ffmepg -i 1280x720.mp4 -f rawvideo -an -vframes 4096 -y /dev/null

Architecture:  x86_64
CPU 运行模式:32-bit, 64-bit
Byte Order:Little Endian
CPU(s):4
On-line CPU(s) list:   0-3
每个核的线程数:2
每个座的核数:  2
Socket(s): 1
NUMA 节点: 1
厂商 ID:   GenuineIntel
CPU 系列:  6
型号:  58
Model name:Intel(R) Core(TM) i3-3217U CPU @ 1.80GHz
步进:  9
CPU MHz: 1285.804
CPU max MHz:   1800.
CPU min MHz:   800.
BogoMIPS:  3591.56
虚拟化:   VT-x
L1d 缓存:  32K
L1i 缓存:  32K
L2 缓存:   256K
L3 缓存:   3072K
NUMA node0 CPU(s): 0-3

no patch:
real0m14.362s
user0m48.380s
sys 0m0.416s

with patch:
real0m13.536s
user0m48.484s
sys 0m0.476s

test 3 times, almost same

have out-of-array read error been fixed up?
how to recurrence?
could i try to fix the out-of-array read error, or redefine CLIP() in loongson 
branch?

> 
> 
> > 
> > 
> > test on loongson-3b
> > time ./ffmepg -i 1280x720.mp4 -f rawvideo -an -vframes 4096 -y /dev/null
> > 
> > 
> > no patch:
> > 
> > 
> > real 0m58.2s
> > user 4m59.1s
> > sys 0m5.8s
> > 
> > 
> > with patch:
> > 
> > 
> > real 0m53.9s
> > user 4m33.2s
> > sys 0m6.2s
> > 
> > 


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec: use looking up crop table method when do clip

2015-08-07 Thread
based on last h264qpel optimization patch i have pushed
do i need separate this patch to double? cause one file to change loongson arch
use looking up crop table method may boost up decode on loongson, and with this 
patch x86 pass fate too
but i have not tested on other arch


test on loongson-3b
time ./ffmepg -i 1280x720.mp4 -f rawvideo -an -vframes 4096 -y /dev/null


no patch:


real 0m58.2s
user 4m59.1s
sys 0m5.8s


with patch:


real 0m53.9s
user 4m33.2s
sys 0m6.2s


---
From 68e88b17d113875d829a9936284d3551fd499139 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 7 Aug 2015 16:33:10 +0800
Subject: [PATCH] avcodec: use looking up crop table method when do clip


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/bit_depth_template.c | 4 ++--
 libavcodec/mips/h264qpel_mmi.c  | 4 
 2 files changed, 6 insertions(+), 2 deletions(-)


diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c
index 8018489..759cd30 100644
--- a/libavcodec/bit_depth_template.c
+++ b/libavcodec/bit_depth_template.c
@@ -72,7 +72,7 @@
 #   define pixel4 uint32_t
 #   define dctcoef int16_t
 
-#   define INIT_CLIP
+#   define INIT_CLIP const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
 #   define no_rnd_avg_pixel4 no_rnd_avg32
 #   definernd_avg_pixel4rnd_avg32
 #   define AV_RN2P  AV_RN16
@@ -84,7 +84,7 @@
 #   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
 
 #   define av_clip_pixel(a) av_clip_uint8(a)
-#   define CLIP(a) av_clip_uint8(a)
+#   define CLIP(a) cm[a]
 #endif
 
 #define FUNC3(a, b, c)  a ## _ ## b ## c
diff --git a/libavcodec/mips/h264qpel_mmi.c b/libavcodec/mips/h264qpel_mmi.c
index e04a2d5..ebb21c7 100644
--- a/libavcodec/mips/h264qpel_mmi.c
+++ b/libavcodec/mips/h264qpel_mmi.c
@@ -1308,6 +1308,7 @@ static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, 
const uint8_t *src,
 static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
 int dstStride, int srcStride)
 {
+INIT_CLIP
 int i;
 int16_t _tmp[36];
 int16_t *tmp = _tmp;
@@ -1376,6 +1377,7 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, 
const uint8_t *src,
 static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
 int dstStride, int srcStride)
 {
+INIT_CLIP
 int16_t _tmp[104];
 int16_t *tmp = _tmp;
 int i;
@@ -1479,6 +1481,7 @@ static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, 
const uint8_t *src,
 static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
 int dstStride, int srcStride)
 {
+INIT_CLIP
 int i;
 int16_t _tmp[36];
 int16_t *tmp = _tmp;
@@ -1549,6 +1552,7 @@ static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, 
const uint8_t *src,
 static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
 int dstStride, int srcStride)
 {
+INIT_CLIP
 int16_t _tmp[104];
 int16_t *tmp = _tmp;
 int i;
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/2] avcodec: loongson optimized h264pred with mmi v2

2015-08-05 Thread
this is just another implement using C1 float registers, and the patch make 
functions more readable.
i think using C1 registers may reduce the load of general registers.
gsldlc1 and gsldrc1 are similar to ldl and ldr only different with which 
register to use.


在2015-08-06 05:29:58,周晓勇写道:
> Hi,
> 
> On Tue, Aug 4, 2015 at 8:05 AM, 周晓勇  wrote:
> 
> > From 71478e642fac00b12b313723ee83acdfef732fd1 Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Tue, 4 Aug 2015 16:28:02 +0800
> > Subject: [PATCH 1/2] avcodec: loongson optimized h264pred with mmi v2
> >
> >
> > Signed-off-by: ZhouXiaoyong 
> > ---
> >  libavcodec/mips/h264pred_init_mips.c |   1 -
> >  libavcodec/mips/h264pred_mips.h  |   7 +-
> >  libavcodec/mips/h264pred_mmi.c   | 459
> > +--
> >  3 files changed, 226 insertions(+), 241 deletions(-)
> 
>  [..]
> 
> > void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
> >  {
> >  __asm__ volatile (
> > -"dsubu $2, %0, %1   \r\n"
> > -"daddu $3, %0, $0   \r\n"
> > -"ldl $4, 7($2)  \r\n"
> > -"ldr $4, 0($2)  \r\n"
> > -"ldl $5, 15($2) \r\n"
> > -"ldr $5, 8($2)  \r\n"
> > -"dli $6, 0x10   \r\n"
> > +"dli $8, 16 \r\n"
> > +"gsldlc1 $f2, 7(%[srcA])\r\n"
> > +"gsldrc1 $f2, 0(%[srcA])\r\n"
> > +"gsldlc1 $f4, 15(%[srcA])   \r\n"
> > +"gsldrc1 $f4, 8(%[srcA])\r\n"
> >  "1: \r\n"
> > -"sdl $4, 7($3)  \r\n"
> > -"sdr $4, 0($3)  \r\n"
> > -"sdl $5, 15($3) \r\n"
> > -"sdr $5, 8($3)  \r\n"
> > -"daddu $3, %1   \r\n"
> > -"daddiu $6, -1  \r\n"
> > -"bnez $6, 1b\r\n"
> > -::"r"(src),"r"(stride)
> > -: "$2","$3","$4","$5","$6","memory"
> > +"gssdlc1 $f2, 7(%[src]) \r\n"
> > +"gssdrc1 $f2, 0(%[src]) \r\n"
> > +"gssdlc1 $f4, 15(%[src])\r\n"
> > +"gssdrc1 $f4, 8(%[src]) \r\n"
> > +"daddu %[src], %[src], %[stride]\r\n"
> > +"daddi $8, $8, -1   \r\n"
> > +"bnez $8, 1b\r\n"
> > +: [src]"+&r"(src)
> > +: [stride]"r"(stride),[srcA]"r"(src-stride)
> > +: "$8","$f2","$f4"
> >  );
> >  }
> 
> 
> So... I'm confused. You're replacing one type of optimizations with
> another. What happened? Was the old optimization bad? Was it for an old cpu
> type and is yours for a newer one? Something else?
> 
> Ronald
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/2] avcodec: loongson optimized h264pred with mmi v2

2015-08-04 Thread
From 71478e642fac00b12b313723ee83acdfef732fd1 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Tue, 4 Aug 2015 16:28:02 +0800
Subject: [PATCH 1/2] avcodec: loongson optimized h264pred with mmi v2


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/h264pred_init_mips.c |   1 -
 libavcodec/mips/h264pred_mips.h  |   7 +-
 libavcodec/mips/h264pred_mmi.c   | 459 +--
 3 files changed, 226 insertions(+), 241 deletions(-)


diff --git a/libavcodec/mips/h264pred_init_mips.c 
b/libavcodec/mips/h264pred_init_mips.c
index a2124ec..93a2409 100644
--- a/libavcodec/mips/h264pred_init_mips.c
+++ b/libavcodec/mips/h264pred_init_mips.c
@@ -114,7 +114,6 @@ static av_cold void h264_pred_init_mmi(H264PredContext *h, 
int codec_id,
 h->pred16x16[HOR_PRED8x8] = ff_pred16x16_horizontal_8_mmi;
 h->pred8x8l [TOP_DC_PRED] = ff_pred8x8l_top_dc_8_mmi;
 h->pred8x8l [DC_PRED] = ff_pred8x8l_dc_8_mmi;
-h->pred8x8l [HOR_PRED   ] = ff_pred8x8l_horizontal_8_mmi;
 
 switch (codec_id) {
 case AV_CODEC_ID_SVQ3:
diff --git a/libavcodec/mips/h264pred_mips.h b/libavcodec/mips/h264pred_mips.h
index 16bf6fc..d7d12c5 100644
--- a/libavcodec/mips/h264pred_mips.h
+++ b/libavcodec/mips/h264pred_mips.h
@@ -21,11 +21,8 @@
 #ifndef H264_PRED_MIPS_H
 #define H264_PRED_MIPS_H
 
-#include "libavutil/attributes.h"
-#include "libavutil/avassert.h"
-#include "libavcodec/avcodec.h"
+#include "constants.h"
 #include "libavcodec/h264pred.h"
-#include "libavcodec/bit_depth_template.c"
 
 void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride);
 void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride);
@@ -34,8 +31,6 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, 
int has_topright,
 ptrdiff_t stride);
 void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
 ptrdiff_t stride);
-void ff_pred8x8l_horizontal_8_mmi(uint8_t *src, int has_topleft,
-int has_topright, ptrdiff_t stride);
 void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
 int has_topright, ptrdiff_t stride);
 void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
diff --git a/libavcodec/mips/h264pred_mmi.c b/libavcodec/mips/h264pred_mmi.c
index c5ae796..e949d11 100644
--- a/libavcodec/mips/h264pred_mmi.c
+++ b/libavcodec/mips/h264pred_mmi.c
@@ -23,68 +23,66 @@
  */
 
 #include "h264pred_mips.h"
-#include "constants.h"
 
 void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
 __asm__ volatile (
-"dsubu $2, %0, %1   \r\n"
-"daddu $3, %0, $0   \r\n"
-"ldl $4, 7($2)  \r\n"
-"ldr $4, 0($2)  \r\n"
-"ldl $5, 15($2) \r\n"
-"ldr $5, 8($2)  \r\n"
-"dli $6, 0x10   \r\n"
+"dli $8, 16 \r\n"
+"gsldlc1 $f2, 7(%[srcA])\r\n"
+"gsldrc1 $f2, 0(%[srcA])\r\n"
+"gsldlc1 $f4, 15(%[srcA])   \r\n"
+"gsldrc1 $f4, 8(%[srcA])\r\n"
 "1: \r\n"
-"sdl $4, 7($3)  \r\n"
-"sdr $4, 0($3)  \r\n"
-"sdl $5, 15($3) \r\n"
-"sdr $5, 8($3)  \r\n"
-"daddu $3, %1   \r\n"
-"daddiu $6, -1  \r\n"
-"bnez $6, 1b\r\n"
-::"r"(src),"r"(stride)
-: "$2","$3","$4","$5","$6","memory"
+"gssdlc1 $f2, 7(%[src]) \r\n"
+"gssdrc1 $f2, 0(%[src]) \r\n"
+"gssdlc1 $f4, 15(%[src])\r\n"
+"gssdrc1 $f4, 8(%[src]) \r\n"
+"daddu %[src], %[src], %[stride]\r\n"
+"daddi $8, $8, -1   \r\n"
+"bnez $8, 1b\r\n"
+: [src]"+&r"(src)
+: [stride]"r"(stride),[srcA]"r"(src-stride)
+: "$8","$f2","$f4"
 );
 }
 
 void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
 __asm__ volatile (
-"daddiu $2, %0, -1  \r\n"
-"daddu $3, %0, $0   \r\n"
+"daddiu $2, %[src], -1  \r\n"
+"daddu $3, %[src], $0   \r\n"
 "dli $6, 0x10   \r\n"
 "1: \r\n"
 "lbu $4, 0($2)  \r\n"
-"dmul $5, $4, %2\r\n"
+"dmul $5, $4, %[ff_pb_1]\r\n"
 "sdl $5, 7($3)  \r\n"
 "sdr $5, 0($3)  \r\n"
 "sdl $5, 15($3) \r\n"
 "sdr $5, 8($3)  \r\n"
-"daddu $2, %1   \r\n"
-"dadd

[FFmpeg-devel] [PATCH 2/2] avcodec/mips: h264qpel init add missing mc00 msa optimization

2015-08-04 Thread
From 734eabc92df1b6ca26a943f9723e47a838d859f7 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Tue, 4 Aug 2015 19:39:51 +0800
Subject: [PATCH 2/2] avcodec/mips: h264qpel init add missing mc00 msa
 optimization


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/h264qpel_init_mips.c | 1 +
 1 file changed, 1 insertion(+)


diff --git a/libavcodec/mips/h264qpel_init_mips.c 
b/libavcodec/mips/h264qpel_init_mips.c
index cfa5854..72797f1 100644
--- a/libavcodec/mips/h264qpel_init_mips.c
+++ b/libavcodec/mips/h264qpel_init_mips.c
@@ -59,6 +59,7 @@ static av_cold void h264qpel_init_msa(H264QpelContext *c, int 
bit_depth)
 c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_msa;
 c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_msa;
 
+c->put_h264_qpel_pixels_tab[2][0] = ff_put_h264_qpel4_mc00_msa;
 c->put_h264_qpel_pixels_tab[2][1] = ff_put_h264_qpel4_mc10_msa;
 c->put_h264_qpel_pixels_tab[2][2] = ff_put_h264_qpel4_mc20_msa;
 c->put_h264_qpel_pixels_tab[2][3] = ff_put_h264_qpel4_mc30_msa;
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec: loongson optimize pixblockdsp with mmi

2015-07-22 Thread
From acd96895d5f5a67a7883f5e48b56ac905ade268b Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Wed, 22 Jul 2015 14:29:39 +0800
Subject: [PATCH] avcodec: loongson optimize pixblockdsp with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile|  1 +
 libavcodec/mips/pixblockdsp_init_mips.c | 16 +++
 libavcodec/mips/pixblockdsp_mips.h  |  6 +++
 libavcodec/mips/pixblockdsp_mmi.c   | 79 +
 4 files changed, 102 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index da91608..1c4991a 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -68,3 +68,4 @@ MMI-OBJS-$(CONFIG_IDCTDSP)+= 
mips/idctdsp_mmi.o   \
  mips/simple_idct_mmi.o
 MMI-OBJS-$(CONFIG_MPEG4_DECODER)  += mips/xvid_idct_mmi.o
 MMI-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_mmi.o
+MMI-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_mmi.o
diff --git a/libavcodec/mips/pixblockdsp_init_mips.c 
b/libavcodec/mips/pixblockdsp_init_mips.c
index 0f2fb15..1b3741e 100644
--- a/libavcodec/mips/pixblockdsp_init_mips.c
+++ b/libavcodec/mips/pixblockdsp_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -44,10 +45,25 @@ static av_cold void pixblockdsp_init_msa(PixblockDSPContext 
*c,
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static av_cold void pixblockdsp_init_mmi(PixblockDSPContext *c,
+AVCodecContext *avctx, unsigned high_bit_depth)
+{
+c->diff_pixels = ff_diff_pixels_mmi;
+
+if (!high_bit_depth || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
+c->get_pixels = ff_get_pixels_8_mmi;
+}
+}
+#endif /* HAVE_MMI */
+
 void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
   unsigned high_bit_depth)
 {
 #if HAVE_MSA
 pixblockdsp_init_msa(c, avctx, high_bit_depth);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+pixblockdsp_init_mmi(c, avctx, high_bit_depth);
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/pixblockdsp_mips.h 
b/libavcodec/mips/pixblockdsp_mips.h
index 3eee6e0..7f8cc96 100644
--- a/libavcodec/mips/pixblockdsp_mips.h
+++ b/libavcodec/mips/pixblockdsp_mips.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -30,4 +31,9 @@ void ff_get_pixels_16_msa(int16_t *restrict dst, const 
uint8_t *src,
 void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src,
  ptrdiff_t stride);
 
+void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
+ptrdiff_t line_size);
+void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
+const uint8_t *src2, int stride);
+
 #endif  // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
diff --git a/libavcodec/mips/pixblockdsp_mmi.c 
b/libavcodec/mips/pixblockdsp_mmi.c
new file mode 100644
index 000..30631d8
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_mmi.c
@@ -0,0 +1,79 @@
+/*
+ * Loongson SIMD optimized pixblockdsp
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "pixblockdsp_mips.h"
+
+void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
+ptrdiff_t line_size)
+{
+__asm__ volatile (
+"move $8, $0\n\t"
+"xor $f0, $f0, $f0  \n\t"
+"1: \n\t"
+"gsldlc1 $f2, 7(%1) \n\t"
+"gsldrc1 $f2, 0(%1) \n\t"
+"punpcklbh $f4, $f2, $f0\n\t"
+"punpckhbh $f6, $f2, $f0\n\t"
+"gssdxc1 $f4, 0(%0, $8) \n\t"
+"gssdxc1 $f6, 8(%0, $8) \n\t"
+"daddiu $8, $8, 16  \n\t"
+"daddu %1, %1, %2   \n\t"
+"daddi %3, %3, -1   \n\t"
+"bnez %3, 1b\n\t"
+::"r"((uint8_t *)block),"r"(pixels),"r"(line_size),"r"(8)
+ 

[FFmpeg-devel] [PATCH] avcodec: loongson optimize blockdsp with mmi

2015-07-21 Thread
From 431c8fe5d418d79d5c7cb137499a26e88e6c84dc Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Tue, 21 Jul 2015 20:55:51 +0800
Subject: [PATCH] avcodec: loongson optimize blockdsp with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile |   1 +
 libavcodec/mips/blockdsp_init_mips.c |  16 
 libavcodec/mips/blockdsp_mips.h  |   6 ++
 libavcodec/mips/blockdsp_mmi.c   | 147 +++
 4 files changed, 170 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index a105661..da91608 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -67,3 +67,4 @@ MMI-OBJS-$(CONFIG_MPEGVIDEO)  += 
mips/mpegvideo_mmi.o
 MMI-OBJS-$(CONFIG_IDCTDSP)+= mips/idctdsp_mmi.o   \
  mips/simple_idct_mmi.o
 MMI-OBJS-$(CONFIG_MPEG4_DECODER)  += mips/xvid_idct_mmi.o
+MMI-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_mmi.o
diff --git a/libavcodec/mips/blockdsp_init_mips.c 
b/libavcodec/mips/blockdsp_init_mips.c
index 99ae316..2278613 100644
--- a/libavcodec/mips/blockdsp_init_mips.c
+++ b/libavcodec/mips/blockdsp_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ *Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -32,9 +33,24 @@ static av_cold void blockdsp_init_msa(BlockDSPContext *c,
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static av_cold void blockdsp_init_mmi(BlockDSPContext *c,
+unsigned high_bit_depth)
+{
+c->clear_block = ff_clear_block_mmi;
+c->clear_blocks = ff_clear_blocks_mmi;
+
+c->fill_block_tab[0] = ff_fill_block16_mmi;
+c->fill_block_tab[1] = ff_fill_block8_mmi;
+}
+#endif /* HAVE_MMI */
+
 void ff_blockdsp_init_mips(BlockDSPContext *c, unsigned high_bit_depth)
 {
 #if HAVE_MSA
 blockdsp_init_msa(c, high_bit_depth);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+blockdsp_init_mmi(c, high_bit_depth);
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/blockdsp_mips.h b/libavcodec/mips/blockdsp_mips.h
index 0b6bb67..9559d40 100644
--- a/libavcodec/mips/blockdsp_mips.h
+++ b/libavcodec/mips/blockdsp_mips.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ *Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -28,4 +29,9 @@ void ff_fill_block8_msa(uint8_t *src, uint8_t val, int 
stride, int height);
 void ff_clear_block_msa(int16_t *block);
 void ff_clear_blocks_msa(int16_t *block);
 
+void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h);
+void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h);
+void ff_clear_block_mmi(int16_t *block);
+void ff_clear_blocks_mmi(int16_t *block);
+
 #endif  // #ifndef AVCODEC_MIPS_BLOCKDSP_MIPS_H
diff --git a/libavcodec/mips/blockdsp_mmi.c b/libavcodec/mips/blockdsp_mmi.c
new file mode 100644
index 000..63eaf69
--- /dev/null
+++ b/libavcodec/mips/blockdsp_mmi.c
@@ -0,0 +1,147 @@
+/*
+ * Loongson SIMD optimized blockdsp
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "blockdsp_mips.h"
+
+void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h)
+{
+__asm__ volatile (
+"move $8, %3\r\n"
+"move $9, %0\r\n"
+"dmtc1 %1, $f2  \r\n"
+"punpcklbh $f2, $f2, $f2\r\n"
+"punpcklbh $f2, $f2, $f2\r\n"
+"punpcklbh $f2, $f2, $f2\r\n"
+"1: \r\n"
+"gssdlc1 $f2, 7($9) \r\n"
+"gssdrc1 $f2, 0($9) \r\n"
+"gssdlc1 $f2, 15($9)\r\n"
+"gssdrc1 $f2, 8($9) \r\n"
+"daddi $8, $8, -1   \r\n"
+"daddu $9, $9, %2   \r\n"
+"bnez $8, 1b\r\n"
+::"r"(block),"r"(value),"r"(line_size),"r"(h)
+: "$8","$9"
+);
+}
+
+void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h)
+{
+__asm__ volatile (
+"move $8, %3\r\n"
+"move $9, %0 

[FFmpeg-devel] [PATCH 1/2] avcodec: loongson move simple idct functions to a separate file

2015-07-20 Thread
From f90a2009bd7fc6832cd9c1df174e52e7a1431c0e Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Tue, 21 Jul 2015 10:08:21 +0800
Subject: [PATCH 1/2] avcodec: loongson move simple idct functions to a
 separate file


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile|   3 +-
 libavcodec/mips/idctdsp_init_mips.c |   4 +-
 libavcodec/mips/idctdsp_mmi.c   | 811 ---
 libavcodec/mips/simple_idct_mmi.c   | 833 
 4 files changed, 837 insertions(+), 814 deletions(-)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index c2996c2..1d27edd 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -63,4 +63,5 @@ MMI-OBJS-$(CONFIG_H264DSP)+= 
mips/h264dsp_mmi.o
 MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
 MMI-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_mmi.o
 MMI-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_mmi.o
-MMI-OBJS-$(CONFIG_IDCTDSP)+= mips/idctdsp_mmi.o
+MMI-OBJS-$(CONFIG_IDCTDSP)+= mips/idctdsp_mmi.o   \
+ mips/simple_idct_mmi.o
diff --git a/libavcodec/mips/idctdsp_init_mips.c 
b/libavcodec/mips/idctdsp_init_mips.c
index 3d2192e..ac21669 100644
--- a/libavcodec/mips/idctdsp_init_mips.c
+++ b/libavcodec/mips/idctdsp_init_mips.c
@@ -43,7 +43,7 @@ static av_cold void idctdsp_init_msa(IDCTDSPContext *c, 
AVCodecContext *avctx,
 
 #if HAVE_MMI
 static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, AVCodecContext *avctx,
- unsigned high_bit_depth)
+unsigned high_bit_depth)
 {
 if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
 (avctx->bits_per_raw_sample != 10) &&
@@ -61,7 +61,7 @@ static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, 
AVCodecContext *avctx,
 }
 #endif /* HAVE_MMI */
 
-void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
+av_cold void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
   unsigned high_bit_depth)
 {
 #if HAVE_MSA
diff --git a/libavcodec/mips/idctdsp_mmi.c b/libavcodec/mips/idctdsp_mmi.c
index 83afb8a..25476f3 100644
--- a/libavcodec/mips/idctdsp_mmi.c
+++ b/libavcodec/mips/idctdsp_mmi.c
@@ -24,800 +24,6 @@
 #include "idctdsp_mips.h"
 #include "constants.h"
 
-#define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
-#define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C6 8867  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C7 4520  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-
-#define ROW_SHIFT 11
-#define COL_SHIFT 20
-
-DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
-1<<(ROW_SHIFT-1),   0, 1<<(ROW_SHIFT-1),   0,
-1<<(ROW_SHIFT-1),   1, 1<<(ROW_SHIFT-1),   0,
-  C4,  C4,   C4,  C4,
-  C4, -C4,   C4, -C4,
-  C2,  C6,   C2,  C6,
-  C6, -C2,   C6, -C2,
-  C1,  C3,   C1,  C3,
-  C5,  C7,   C5,  C7,
-  C3, -C7,   C3, -C7,
- -C1, -C5,  -C1, -C5,
-  C5, -C1,   C5, -C1,
-  C7,  C3,   C7,  C3,
-  C7, -C5,   C7, -C5,
-  C3, -C1,   C3, -C1
-};
-
-static void simple_idct_mmi(int16_t *block)
-{
-DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
-int16_t * const temp= (int16_t*)align_tmp;
-
-__asm__ volatile(
-#undef  DC_COND_IDCT
-#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift) \
-"ldc1 $f0, " #src0 "\n\t" /* R4 R0  r4  r0 */\
-"ldc1 $f2, " #src4 "\n\t" /* R6 R2  r6  r2 */\
-"ldc1 $f4, " #src1 "\n\t" /* R3 R1  r3  r1 */\
-"ldc1 $f6, " #src5 "\n\t" /* R7 R5  r7  r5 */\
-"ldc1 $f8, %3   \n\t"\
-"and  $f8, $f8, $f0 \n\t"\
-"or $f8, $f8, $f2   \n\t"\
-"or $f8, $f8, $f4   \n\t"\
-"or $f8, $f8, $f6   \n\t"\
-"packsswh $f8, $f8, $f8 \n\t"\
-"li $11, " #shift " \n\t"\
-"mfc1 $10, $f8  \n\t"\
-"mtc1 $11, $f18 \n\t"\
-"beqz $10, 1f   \n\t"\
-"ldc1 $f8, 16(%2)   \n\t" /* C4 C4  C4  C4 */\
-"pmaddhw $f8, $f8, $f0  \n\t" /* C4R4+C4R0  C4r4+C4r0 */\
-"ldc1 $f10, 24(%2)  \n\t" /* -C4C4

[FFmpeg-devel] [PATCH 2/2] avcodec: loongson optimize xvid idct with mmi

2015-07-20 Thread
From 0e387e3057deb1390adc1d12e738d7c91b59be18 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Tue, 21 Jul 2015 10:14:40 +0800
Subject: [PATCH 2/2] avcodec: loongson optimize xvid idct with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile |   2 +
 libavcodec/mips/xvid_idct_mmi.c  | 253 +++
 libavcodec/mips/xvididct_init_mips.c |  45 +++
 libavcodec/mips/xvididct_mips.h  |  30 +
 libavcodec/xvididct.c|   2 +
 libavcodec/xvididct.h|   2 +
 6 files changed, 334 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 1d27edd..a105661 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -34,6 +34,7 @@ OBJS-$(CONFIG_IDCTDSP)+= 
mips/idctdsp_init_mips.o
 OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_init_mips.o
 OBJS-$(CONFIG_MPEGVIDEOENC)   += mips/mpegvideoencdsp_init_mips.o
 OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o
+OBJS-$(CONFIG_MPEG4_DECODER)  += mips/xvididct_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_uniw_msa.o   \
@@ -65,3 +66,4 @@ MMI-OBJS-$(CONFIG_H264PRED)   += 
mips/h264pred_mmi.o
 MMI-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_mmi.o
 MMI-OBJS-$(CONFIG_IDCTDSP)+= mips/idctdsp_mmi.o   \
  mips/simple_idct_mmi.o
+MMI-OBJS-$(CONFIG_MPEG4_DECODER)  += mips/xvid_idct_mmi.o
diff --git a/libavcodec/mips/xvid_idct_mmi.c b/libavcodec/mips/xvid_idct_mmi.c
new file mode 100644
index 000..d3f9acb
--- /dev/null
+++ b/libavcodec/mips/xvid_idct_mmi.c
@@ -0,0 +1,253 @@
+/*
+ * Loongson SIMD optimized xvid idct
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "idctdsp_mips.h"
+#include "xvididct_mips.h"
+
+#define BITS_INV_ACC5   // 4 or 5 for IEEE
+#define SHIFT_INV_ROW   (16 - BITS_INV_ACC) //11
+#define SHIFT_INV_COL   (1 + BITS_INV_ACC)  //6
+#define RND_INV_ROW (1024 * (6 - BITS_INV_ACC))
+#define RND_INV_COL (16 * (BITS_INV_ACC - 3))
+#define RND_INV_CORR(RND_INV_COL - 1)
+
+#define BITS_FRW_ACC3   // 2 or 3 for accuracy
+#define SHIFT_FRW_COL   BITS_FRW_ACC
+#define SHIFT_FRW_ROW   (BITS_FRW_ACC + 17)
+#define RND_FRW_ROW (262144*(BITS_FRW_ACC - 1))
+
+DECLARE_ALIGNED(8, static const int16_t, tg_1_16)[4*4] = {
+ 13036, 13036, 13036, 13036,//  tg * (2<<16) + 0.5
+ 27146, 27146, 27146, 27146,//  tg * (2<<16) + 0.5
+-21746,-21746,-21746,-21746,//  tg * (2<<16) + 0.5
+ 23170, 23170, 23170, 23170 // cos * (2<<15) + 0.5
+};
+
+DECLARE_ALIGNED(8, static const int32_t, rounder_0)[2*8] = {
+65536,65536,
+ 3597, 3597,
+ 2260, 2260,
+ 1203, 1203,
+0,0,
+  120,  120,
+  512,  512,
+  512,  512
+};
+
+DECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmi)[32*4] = {
+ 16384, 21407, 16384,  8867,// w05 w04 w01 w00
+ 16384,  8867,-16384,-21407,// w07 w06 w03 w02
+ 16384, -8867, 16384,-21407,// w13 w12 w09 w08
+-16384, 21407, 16384, -8867,// w15 w14 w11 w10
+ 22725, 19266, 19266, -4520,// w21 w20 w17 w16
+ 12873,  4520,-22725,-12873,// w23 w22 w19 w18
+ 12873,-22725,  4520,-12873,// w29 w28 w25 w24
+  4520, 19266, 19266,-22725,// w31 w30 w27 w26
+
+ 22725, 29692, 22725, 12299,// w05 w04 w01 w00
+ 22725, 12299,-22725,-29692,// w07 w06 w03 w02
+ 22725,-12299, 22725,-29692,// w13 w12 w09 w08
+-22725, 29692, 22725,-12299,// w15 w14 w11 w10
+ 31521, 26722, 26722, -6270,// w21 w20 w17 w16
+ 17855,  6270,-31521,-17855,// w23 w22 w19 w18
+ 17855,-31521,  6270,-17855,// w29 w28 w25 w24
+  6270, 26722, 26722,-31521,// w31 w30 w27 w26
+
+ 21407, 27969, 21407, 11585,// w05 w04 w01 w00
+   

Re: [FFmpeg-devel] [PATCH 2/2] avcodec: loongson relocate constants of idctdsp and h264pred

2015-07-19 Thread
sorry, the last patch aborded, please review this one.
because it could avoid to use load when use immediate value ff_pb_80 in 
idctdsp_mmi.c.

---
From 40399677fd67087db950c7f0f8ca382e5bc2cfd2 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Mon, 20 Jul 2015 11:07:05 +0800
Subject: [PATCH 2/2] avcodec: loongson relocate constants of idctdsp and
 h264pred

Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/constants.c|  5 
 libavcodec/mips/constants.h|  5 
 libavcodec/mips/h264pred_mmi.c | 61 --
 libavcodec/mips/idctdsp_mmi.c  |  8 +++---
 4 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
index 84841c2..a25fd24 100644
--- a/libavcodec/mips/constants.c
+++ b/libavcodec/mips/constants.c
@@ -42,6 +42,8 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) =
{0x0004000300020001ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) ={0x0008000700060005ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) ={0x000300020001ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) ={0x0007000600050004ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) ={0x000b000a00090008ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) ={0x000f000e000d000cULL};
 
 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) =   {0x0101010101010101ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) =   {0x0303030303030303ULL};
@@ -51,3 +53,6 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) =  
{0xA1A1A1A1A1A1A1A1ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_rnd) ={0x0004000400040004ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) =   {0x0040004000400040ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) =   {0x0020002000200020ULL};
+
+DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) = {0xULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_d4) = {0x0004ULL};
diff --git a/libavcodec/mips/constants.h b/libavcodec/mips/constants.h
index 8f5292e..571002f 100644
--- a/libavcodec/mips/constants.h
+++ b/libavcodec/mips/constants.h
@@ -43,6 +43,8 @@ extern const uint64_t ff_pw_1to4;
 extern const uint64_t ff_pw_5to8;
 extern const uint64_t ff_pw_0to3;
 extern const uint64_t ff_pw_4to7;
+extern const uint64_t ff_pw_8tob;
+extern const uint64_t ff_pw_ctof;
 
 extern const uint64_t ff_pb_1;
 extern const uint64_t ff_pb_3;
@@ -53,4 +55,7 @@ extern const uint64_t ff_rnd;
 extern const uint64_t ff_rnd2;
 extern const uint64_t ff_rnd3;
 
+extern const uint64_t ff_wm1010;
+extern const uint64_t ff_d4;
+
 #endif /* AVCODEC_MIPS_CONSTANTS_H */
diff --git a/libavcodec/mips/h264pred_mmi.c b/libavcodec/mips/h264pred_mmi.c
index b8c0676..c5ae796 100644
--- a/libavcodec/mips/h264pred_mmi.c
+++ b/libavcodec/mips/h264pred_mmi.c
@@ -23,6 +23,7 @@
  */
 
 #include "h264pred_mips.h"
+#include "constants.h"
 
 void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
@@ -50,14 +51,12 @@ void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t 
stride)
 void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
 __asm__ volatile (
-".set arch=loongson3a   \r\n"
 "daddiu $2, %0, -1  \r\n"
 "daddu $3, %0, $0   \r\n"
 "dli $6, 0x10   \r\n"
-"dli $7, 0x0101010101010101 \r\n"
 "1: \r\n"
 "lbu $4, 0($2)  \r\n"
-"dmul $5, $4, $7\r\n"
+"dmul $5, $4, %2\r\n"
 "sdl $5, 7($3)  \r\n"
 "sdr $5, 0($3)  \r\n"
 "sdl $5, 15($3) \r\n"
@@ -66,7 +65,7 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t 
stride)
 "daddu $3, %1   \r\n"
 "daddiu $6, -1  \r\n"
 "bnez $6, 1b\r\n"
-::"r"(src),"r"(stride)
+::"r"(src),"r"(stride),"r"(ff_pb_1)
 : "$2","$3","$4","$5","$6","memory"
 );
 }
@@ -74,7 +73,6 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t 
stride)
 void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
 __asm__ volatile (
-".set arch=loongson3a   \r\n"
 "daddiu $2, %0, -1  \r\n"
 "dli $6, 0x10   \r\n"
 "xor $8, $8, $8 \r\n"
@@ -93,10 +91,9 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
 "daddiu $2, $2, 1   \r\n"
 "daddiu $6, $6, -1  \r\n"
 "bnez $6, 2b\r\n"
-"dli $7, 0x0101010101010101 \r\n"
 "daddiu $8, $8, 0x10\r\n"
 "dsra $8, 5 \r\n"
-"dmul $5, $8, $7\r\n"
+"dmul $5, $8, %2\r\n"
 

[FFmpeg-devel] [PATCH 1/2] avcodec: loongson constants redefined with macros

2015-07-19 Thread
From 9c95155e90ff5d083e56cdd2565792c9e314302a Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Mon, 20 Jul 2015 10:58:30 +0800
Subject: [PATCH 1/2] avcodec: loongson constants redefined with macros


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/constants.c | 52 ++---
 libavcodec/mips/constants.h | 52 ++---
 2 files changed, 52 insertions(+), 52 deletions(-)


diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
index 135b9d4..84841c2 100644
--- a/libavcodec/mips/constants.c
+++ b/libavcodec/mips/constants.c
@@ -23,31 +23,31 @@
 #include "libavutil/mem.h"
 #include "constants.h"
 
-const uint64_t __attribute__ ((aligned(8))) ff_pw_1 =   
{0x0001000100010001ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_4 =   
{0x0004000400040004ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_5 =   
{0x0005000500050005ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_3 =   
{0x0003000300030003ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_8 =   
{0x0008000800080008ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_9 =   
{0x0009000900090009ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_16 =  
{0x0010001000100010ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_18 =  
{0x0012001200120012ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_28 =  
{0x001C001C001C001CULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_32 =  
{0x0020002000200020ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_53 =  
{0x0035003500350035ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_64 =  
{0x0040004000400040ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_128 = 
{0x0080008000800080ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_m8tom5 =  
{0xFFFBFFFAFFF9FFF8ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_m4tom1 =  
{0xFFFEFFFDFFFCULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_1to4 =
{0x0004000300020001ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_5to8 =
{0x0008000700060005ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_0to3 =
{0x000300020001ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pw_4to7 =
{0x0007000600050004ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_1) =   {0x0001000100010001ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) =   {0x0004000400040004ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) =   {0x0005000500050005ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) =   {0x0003000300030003ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) =   {0x0008000800080008ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) =   {0x0009000900090009ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_16) =  {0x0010001000100010ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_18) =  {0x0012001200120012ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_28) =  {0x001C001C001C001CULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_32) =  {0x0020002000200020ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) =  {0x0035003500350035ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_64) =  {0x0040004000400040ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = {0x0080008000800080ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_m8tom5) =  {0xFFFBFFFAFFF9FFF8ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_m4tom1) =  {0xFFFEFFFDFFFCULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) ={0x0004000300020001ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) ={0x0008000700060005ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) ={0x000300020001ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) ={0x0007000600050004ULL};
 
-const uint64_t __attribute__ ((aligned(8))) ff_pb_1 =   
{0x0101010101010101ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pb_3 =   
{0x0303030303030303ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pb_80 =  
{0x8080808080808080ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_pb_A1 =  
{0xA1A1A1A1A1A1A1A1ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) =   {0x0101010101010101ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) =   {0x0303030303030303ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_80) =  {0x8080808080808080ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) =  {0xA1A1A1A1A1A1A1A1ULL};
 
-const uint64_t __attribute__ ((aligned(8))) ff_rnd =
{0x0004000400040004ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_rnd2 =   
{0x0040004000400040ULL};
-const uint64_t __attribute__ ((aligned(8))) ff_rnd3 =   
{0x0020002000200020ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_rnd) ={0x0004000400040004ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) =   {0x0040004000400040ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) =   {0x0020002000200020ULL};
diff --git a/

[FFmpeg-devel] [PATCH 2/2] avcodec: loongson relocate constants of idctdsp and h264pred

2015-07-19 Thread
From 9eac81d24472916636f2b0ad21cf8560f0acf20a Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Mon, 20 Jul 2015 11:07:05 +0800
Subject: [PATCH 2/2] avcodec: loongson relocate constants of idctdsp and
 h264pred


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/constants.c|  5 
 libavcodec/mips/constants.h|  5 
 libavcodec/mips/h264pred_mmi.c | 61 --
 libavcodec/mips/idctdsp_mmi.c  |  4 +--
 4 files changed, 34 insertions(+), 41 deletions(-)


diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
index 84841c2..a25fd24 100644
--- a/libavcodec/mips/constants.c
+++ b/libavcodec/mips/constants.c
@@ -42,6 +42,8 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) =
{0x0004000300020001ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) ={0x0008000700060005ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) ={0x000300020001ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) ={0x0007000600050004ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) ={0x000b000a00090008ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) ={0x000f000e000d000cULL};
 
 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) =   {0x0101010101010101ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) =   {0x0303030303030303ULL};
@@ -51,3 +53,6 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) =  
{0xA1A1A1A1A1A1A1A1ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_rnd) ={0x0004000400040004ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) =   {0x0040004000400040ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) =   {0x0020002000200020ULL};
+
+DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) = {0xULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_d4) = {0x0004ULL};
diff --git a/libavcodec/mips/constants.h b/libavcodec/mips/constants.h
index 8f5292e..571002f 100644
--- a/libavcodec/mips/constants.h
+++ b/libavcodec/mips/constants.h
@@ -43,6 +43,8 @@ extern const uint64_t ff_pw_1to4;
 extern const uint64_t ff_pw_5to8;
 extern const uint64_t ff_pw_0to3;
 extern const uint64_t ff_pw_4to7;
+extern const uint64_t ff_pw_8tob;
+extern const uint64_t ff_pw_ctof;
 
 extern const uint64_t ff_pb_1;
 extern const uint64_t ff_pb_3;
@@ -53,4 +55,7 @@ extern const uint64_t ff_rnd;
 extern const uint64_t ff_rnd2;
 extern const uint64_t ff_rnd3;
 
+extern const uint64_t ff_wm1010;
+extern const uint64_t ff_d4;
+
 #endif /* AVCODEC_MIPS_CONSTANTS_H */
diff --git a/libavcodec/mips/h264pred_mmi.c b/libavcodec/mips/h264pred_mmi.c
index b8c0676..c5ae796 100644
--- a/libavcodec/mips/h264pred_mmi.c
+++ b/libavcodec/mips/h264pred_mmi.c
@@ -23,6 +23,7 @@
  */
 
 #include "h264pred_mips.h"
+#include "constants.h"
 
 void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
@@ -50,14 +51,12 @@ void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t 
stride)
 void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
 __asm__ volatile (
-".set arch=loongson3a   \r\n"
 "daddiu $2, %0, -1  \r\n"
 "daddu $3, %0, $0   \r\n"
 "dli $6, 0x10   \r\n"
-"dli $7, 0x0101010101010101 \r\n"
 "1: \r\n"
 "lbu $4, 0($2)  \r\n"
-"dmul $5, $4, $7\r\n"
+"dmul $5, $4, %2\r\n"
 "sdl $5, 7($3)  \r\n"
 "sdr $5, 0($3)  \r\n"
 "sdl $5, 15($3) \r\n"
@@ -66,7 +65,7 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t 
stride)
 "daddu $3, %1   \r\n"
 "daddiu $6, -1  \r\n"
 "bnez $6, 1b\r\n"
-::"r"(src),"r"(stride)
+::"r"(src),"r"(stride),"r"(ff_pb_1)
 : "$2","$3","$4","$5","$6","memory"
 );
 }
@@ -74,7 +73,6 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t 
stride)
 void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
 __asm__ volatile (
-".set arch=loongson3a   \r\n"
 "daddiu $2, %0, -1  \r\n"
 "dli $6, 0x10   \r\n"
 "xor $8, $8, $8 \r\n"
@@ -93,10 +91,9 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
 "daddiu $2, $2, 1   \r\n"
 "daddiu $6, $6, -1  \r\n"
 "bnez $6, 2b\r\n"
-"dli $7, 0x0101010101010101 \r\n"
 "daddiu $8, $8, 0x10\r\n"
 "dsra $8, 5 \r\n"
-"dmul $5, $8, $7\r\n"
+"dmul $5, $8, %2\r\n"
 "daddu $2, %0, $0   \r\n"
 "dli $6, 0x10   \r\n"
 "3: \r\

[FFmpeg-devel] [PATCH 2/2] avcodec: loongson optimized idctdsp with mmi

2015-07-15 Thread
From ff6de02986fa6693376bee60f9f886e06310d0b6 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Thu, 16 Jul 2015 13:23:36 +0800
Subject: [PATCH 2/2] avcodec: loongson optimized idctdsp with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile|1 +
 libavcodec/mips/idctdsp_init_mips.c |   24 +
 libavcodec/mips/idctdsp_mips.h  |   11 +
 libavcodec/mips/idctdsp_mmi.c   | 1003 +++
 4 files changed, 1039 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 93ac2dc..480541d 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -61,3 +61,4 @@ MMI-OBJS-$(CONFIG_H264DSP)+= 
mips/h264dsp_mmi.o
 MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
 MMI-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_mmi.o
 MMI-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_mmi.o
+MMI-OBJS-$(CONFIG_IDCTDSP)+= mips/idctdsp_mmi.o
diff --git a/libavcodec/mips/idctdsp_init_mips.c 
b/libavcodec/mips/idctdsp_init_mips.c
index c964340..3d2192e 100644
--- a/libavcodec/mips/idctdsp_init_mips.c
+++ b/libavcodec/mips/idctdsp_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -40,10 +41,33 @@ static av_cold void idctdsp_init_msa(IDCTDSPContext *c, 
AVCodecContext *avctx,
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
+(avctx->bits_per_raw_sample != 10) &&
+(avctx->bits_per_raw_sample != 12) &&
+(avctx->idct_algo == FF_IDCT_AUTO)) {
+c->idct_put = ff_simple_idct_put_mmi;
+c->idct_add = ff_simple_idct_add_mmi;
+c->idct = ff_simple_idct_mmi;
+c->perm_type = FF_IDCT_PERM_NONE;
+}
+
+c->put_pixels_clamped = ff_put_pixels_clamped_mmi;
+c->add_pixels_clamped = ff_add_pixels_clamped_mmi;
+c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmi;
+}
+#endif /* HAVE_MMI */
+
 void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
   unsigned high_bit_depth)
 {
 #if HAVE_MSA
 idctdsp_init_msa(c, avctx, high_bit_depth);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+idctdsp_init_mmi(c, avctx, high_bit_depth);
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/idctdsp_mips.h b/libavcodec/mips/idctdsp_mips.h
index 191652e..19267e6 100644
--- a/libavcodec/mips/idctdsp_mips.h
+++ b/libavcodec/mips/idctdsp_mips.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Manojkumar Bhosale (manojkumar.bhos...@imgtec.com)
+ *Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -39,4 +40,14 @@ void ff_simple_idct_msa(int16_t *block);
 void ff_simple_idct_put_msa(uint8_t *dest, int32_t stride_dst, int16_t *block);
 void ff_simple_idct_add_msa(uint8_t *dest, int32_t stride_dst, int16_t *block);
 
+void ff_put_pixels_clamped_mmi(const int16_t *block,
+uint8_t *av_restrict pixels, ptrdiff_t line_size);
+void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
+uint8_t *av_restrict pixels, ptrdiff_t line_size);
+void ff_add_pixels_clamped_mmi(const int16_t *block,
+uint8_t *av_restrict pixels, ptrdiff_t line_size);
+void ff_simple_idct_mmi(int16_t *block);
+void ff_simple_idct_put_mmi(uint8_t *dest, int32_t line_size, int16_t *block);
+void ff_simple_idct_add_mmi(uint8_t *dest, int32_t line_size, int16_t *block);
+
 #endif  // #ifndef AVCODEC_MIPS_IDCTDSP_MIPS_H
diff --git a/libavcodec/mips/idctdsp_mmi.c b/libavcodec/mips/idctdsp_mmi.c
new file mode 100644
index 000..015032f
--- /dev/null
+++ b/libavcodec/mips/idctdsp_mmi.c
@@ -0,0 +1,1003 @@
+/*
+ * Loongson SIMD optimized idctdsp
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "idctdsp_mips.h"
+#include "constants.h"
+
+#define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(

[FFmpeg-devel] [PATCH 1/2] avcodec: loongson add constant definition

2015-07-15 Thread
From 2e7ab3de30e342a42f00fe85ba3e70c350b05f6f Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Thu, 16 Jul 2015 13:22:06 +0800
Subject: [PATCH 1/2] avcodec: loongson add constant definition


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile|  1 +
 libavcodec/mips/constants.c | 53 ++
 libavcodec/mips/constants.h | 56 +
 3 files changed, 110 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 9eb815a..93ac2dc 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -56,6 +56,7 @@ MSA-OBJS-$(CONFIG_IDCTDSP)+= 
mips/idctdsp_msa.o   \
 MSA-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_msa.o
 MSA-OBJS-$(CONFIG_MPEGVIDEOENC)   += mips/mpegvideoencdsp_msa.o
 MSA-OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_msa.o
+MMI-OBJS  += mips/constants.o
 MMI-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_mmi.o
 MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
 MMI-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_mmi.o
diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
new file mode 100644
index 000..135b9d4
--- /dev/null
+++ b/libavcodec/mips/constants.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/mem.h"
+#include "constants.h"
+
+const uint64_t __attribute__ ((aligned(8))) ff_pw_1 =   
{0x0001000100010001ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_4 =   
{0x0004000400040004ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_5 =   
{0x0005000500050005ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_3 =   
{0x0003000300030003ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_8 =   
{0x0008000800080008ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_9 =   
{0x0009000900090009ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_16 =  
{0x0010001000100010ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_18 =  
{0x0012001200120012ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_28 =  
{0x001C001C001C001CULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_32 =  
{0x0020002000200020ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_53 =  
{0x0035003500350035ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_64 =  
{0x0040004000400040ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_128 = 
{0x0080008000800080ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_m8tom5 =  
{0xFFFBFFFAFFF9FFF8ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_m4tom1 =  
{0xFFFEFFFDFFFCULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_1to4 =
{0x0004000300020001ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_5to8 =
{0x0008000700060005ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_0to3 =
{0x000300020001ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pw_4to7 =
{0x0007000600050004ULL};
+
+const uint64_t __attribute__ ((aligned(8))) ff_pb_1 =   
{0x0101010101010101ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pb_3 =   
{0x0303030303030303ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pb_80 =  
{0x8080808080808080ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_pb_A1 =  
{0xA1A1A1A1A1A1A1A1ULL};
+
+const uint64_t __attribute__ ((aligned(8))) ff_rnd =
{0x0004000400040004ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_rnd2 =   
{0x0040004000400040ULL};
+const uint64_t __attribute__ ((aligned(8))) ff_rnd3 =   
{0x0020002000200020ULL};
diff --git a/libavcodec/mips/constants.h b/libavcodec/mips/constants.h
new file mode 100644
index 000..26b19f0
--- /dev/null
+++ b/libavcodec/mips/constants.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms

[FFmpeg-devel] [PATCH] configure: loongson disable expensive optimizations in gcc O3 optimization

2015-07-15 Thread
From 18f5219dd892432c21485f2e26b91c565bde4070 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Tue, 14 Jul 2015 13:39:04 +0800
Subject: [PATCH] configure: loongson disable expensive optimizations in gcc O3
 optimization


With gcc-4.9.2 loongson faild in test fate-dca, this is caused by option
-fexpensive-optimizations in -O3 optimization. We disable it temporarily
before the bug been fixed up.


Signed-off-by: ZhouXiaoyong 
---
 configure | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)


diff --git a/configure b/configure
index 88ee936..9e64aba 100755
--- a/configure
+++ b/configure
@@ -3997,13 +3997,13 @@ elif enabled mips; then
 disable aligned_stack
 case $cpu in
 loongson3*)
-cpuflags="-march=loongson3a -mhard-float"
+cpuflags="-march=loongson3a -mhard-float 
-fno-expensive-optimizations"
 ;;
 loongson2e)
-cpuflags="-march=loongson2e -mhard-float"
+cpuflags="-march=loongson2e -mhard-float 
-fno-expensive-optimizations"
 ;;
 loongson2f)
-cpuflags="-march=loongson2f -mhard-float"
+cpuflags="-march=loongson2f -mhard-float 
-fno-expensive-optimizations"
 ;;
 esac
 ;;
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with mmi

2015-07-13 Thread
> > building libavcodec/dca_parser.c with
> > -O0
> > fixes the fate-dca-core and fate-dca-xll failure
> 
> looks like a compiler bug:
> 
> @@ -66,6 +66,7 @@ static int dca_find_frame_end(DCAParseContext *pc1, const 
> uint8_t *buf,
>  for (i = 0; i < buf_size; i++) {
>  state = (state << 8) | buf[i];
>  if (IS_MARKER(state)) {
> +av_log(0,0, "MRK0 %d %LX %X %d %d\n", i, state, 
> pc1->lastmarker,pc1->framesize, pc1->size);
>  if (!pc1->lastmarker ||
>  pc1->lastmarker == CORE_MARKER(state) ||
>  pc1->lastmarker == DCA_SYNCWORD_SUBSTREAM) {
> @@ -75,6 +76,7 @@ static int dca_find_frame_end(DCAParseContext *pc1, const 
> uint8_t *buf,
>  else
>  pc1->lastmarker = CORE_MARKER(state);
>  i++;
> +av_log(0,0, "END0 %d %LX %X %d %d\n", i, state, 
> pc1->lastmarker,pc1->framesize, pc1->size);
>  break;
>  }
>  }
> 
> I get this diff between working and not working:
> 
>  MRK0 5 7FFE8001FC3C 0 0 0
>  END0 6 7FFE8001FC3C 7FFE8001 0 0
>  END1 1011 7FFE8001FC3C 7FFE8001 0 1006
>  dca_parse_params
>  dca_parse_params END
>  MRK0 5 7FFE8001FC3C 7FFE8001 1006 0
> -END0 6 7FFE8001FC3C 7FFE8001 1006 0
>  ENF
> -END1 5 80007FFE8001FC3C 7FFE8001 1006 1006
> 
> 
> assuming this is really a compiler bug
> can you fix the compiler or where can i find a working "gcc" package ?
> 

yes, i find out the bug in gcc. its -fexpensive-optimizations at levels -O2, 
-O3, -Os.
-O0 and -O1 is ok for fate-dca test, because not include this option.
i am fixing up this bug, so please wait for few days.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with mmi

2015-07-12 Thread
> so what is the plan to continue ?
> you suggested to upgrade to Fedora21 but fate does not pass after the
> upgrade
> 
> you should have tested this before suggesting an upgrade!
> 
> i cannot test your patches if fate does not pass
> 
> will you fix the failures in Fedora21 or is there some other solution
> so we can continue to test and apply patches for loongson
> ?
> 
sorry, i will resolve all the failures in FC21 before next patch.

> ive the strong feeling that you do not care at all that the patches
> and code you submit work
> 
i do care about!
firstly, i am passionate on this work.

second, i base on some other's optimization to continue.
thereis amounts of optimization bugs in ffmpeg-0.10, which some one student 
optimized before.
i spent huge time on fix bugs, as i am not familiar with compiler before.
to access outer net is difficult for us in company, and i have no loongson-pc 
home to work with.
so sorry about my slow working.

thirdly, i will ask help in company to help resolve this gcc bug, if dose, i 
will offer new gcc version.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with mmi

2015-07-12 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年7月12日 星期日
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with 
> mmi
> 
> On Sat, Jul 11, 2015 at 09:02:51PM +0200, Michael Niedermayer wrote:
> > On Sun, Jul 05, 2015 at 11:43:37PM +0200, Michael Niedermayer wrote:
> > > On Sat, Jul 04, 2015 at 12:14:51PM +0800, 周晓勇 wrote:
> > > > i will offer a download source of Fedora21 OS next Monday.
> > > > i will run yum-builddep ffmpeg-xxx.src.rpm for preparing devel 
> > > > environment.
> > > >  what else do you need to install, because it maybe slow for 
> > > > downloading other packages abroad.
> > > > the new FC21 build with mips64el N64 API and 
> > > > mips64r2(-march=loongson3a) isa, and enabled ARCH_MIPS64 after ffmpeg 
> > > > configure.
> > > > the FC19-O32 was slow, so aborted.
> > > 
> > > other things hmm
> > > ccache, dash, screen
> > 
> > with dash configure finished in 1minute instead of 4 with bash (on loongson)
> 
> fate no longer passes after installing Fedora21
> 
> with
> dash ./configure --enable-gpl --cc='ccache gcc' --enable-pthreads 
> --samples=/home/loongson/fate/ --enable-nonfree  --enable-version3 
> --assert-level=2
> i get failure due to illegal instructions
> 
> with
> dash ./configure --enable-gpl --cc='ccache gcc' --enable-pthreads 
> --samples=/home/loongson/fate/ --enable-nonfree  --enable-version3 
> --assert-level=2  --cpu=loongson3a --enable-loongson3
> it fails in dca tests:
> 
> TESTdca-core
> TESTdca-xll
> stddev:  853.28 PSNR: 37.71 MAXDIFF:11651 bytes:  1554432/   700416
> MAXDIFF: |11651 - 0| >= 1
> size: |1554432 - 700416| >= 0
> Test dca-core failed. Look at tests/data/fate/dca-core.err for details.
> tests/Makefile:202: recipe for target 'fate-dca-core' failed
> make: *** [fate-dca-core] Error 1
> make: *** Waiting for unfinished jobs
> stddev:  219.18 PSNR: 49.51 MAXDIFF: 2483 bytes:  8994816/  1073152
> MAXDIFF: |2483 - 0| >= 1
> size: |8994816 - 1073152| >= 0
> Test dca-xll failed. Look at tests/data/fate/dca-xll.err for details.
> tests/Makefile:202: recipe for target 'fate-dca-xll' failed
> make: *** [fate-dca-xll] Error 1
> 

yes, i got this dca failure too. i have encountered many other errors in fate 
test.
why do you think it's due to illegal instructions? 
i have no idea because ffmpeg or ffplay could decode and play the media data 
regularly only with wrong decoding.
if there is illegal instructions, when run ffmpeg or ffplay commands it will 
show illegal instruction caution.
in addition, with no optimization fate failed as well.


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with mmi

2015-07-11 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年7月11日 星期六
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with 
> mmi
> 
> On Sat, Jul 11, 2015 at 01:59:40PM +0200, Michael Niedermayer wrote:
> > On Sat, Jul 11, 2015 at 01:54:34PM +0200, Michael Niedermayer wrote:
> > > On Sat, Jul 11, 2015 at 01:35:36PM +0800, 周晓勇 wrote:
> > > > 
> > > > 
> > > > 
> > > > > -原始邮件-
> > > > > 发件人: "Michael Niedermayer" 
> > > > > 发送时间: 2015年7月11日 星期六
> > > > > 收件人: "FFmpeg development discussions and patches" 
> > > > > 
> > > > > 抄送: 
> > > > > 主题: Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized 
> > > > > h264pred with mmi
> > > > > 
> > > > > On Mon, Jul 06, 2015 at 10:17:54PM +0800, 周晓勇 wrote:
> > > > > > upgrade the new FC21 with these steps:
> > > > > > 1.mount Fedora21-xfce-Loongson-20150706.iso to /mnt
> > > > > > 2.mkfs.ext3(ext4 may not supported by loongson pmon) the usb disk
> > > > > > 3.copy all two folders in /mnt to usb
> > > > > > 4.reboot and plug in usb disk
> > > > > > 5.press key "u" until select menu
> > > > > > 6.enter in usb disk(first menu)
> > > > > > 7.enter in install system(please do not enter int other entries, 
> > > > > > because others not correctly work)
> > > > > > 8.choose on partition to install
> > > > > 
> > > > > 8a. wonder why it installs over another partition
> > > > > 
> > > > choose one partition you want to install into.
> > > > if you installed new os on partition sda9, so you need change the 
> > > > /etc/fstab of the new os. like this:
> > > > /dev/sda9 / ext4 ...
> > > 
> > > the root partition in fstab is fine
> > > 
> > > 
> > > > 
> > > > > 
> > > > > > 9.after install completed, reboot into any other os to enable first 
> > > > > > line in FC21 /etc/fstab:
> > > > > > #/dev/sda8 / ext4 ... --> /dev/sda8 / ext4 ... (sdax is the 
> > > > > > partition which you installed yet)
> > > > > > 10.copy /boot folder of FC21 partition into the first BOOT 
> > > > > > partition(sda1) to use the new kernel 3.18.xxx
> > > > > > 11.modify the boot.cfg file in BOOT partision(sda1)
> > > > > 
> > > > > and after that it boots to the GUI login and one cannot login as the
> > > > > new user is called loongson-fc21 but the home directory loongson
> > > > > changing the directory so they match gets one further, a dialog is
> > > > > displayed with 2 buttons in chinese, closing that dialog without
> > > > > clicking either leaves one in the partly working GUI which just shows
> > > > > the background image and a gray rectangle
> > > > > its possible to open some menu list and enter commands and a terminal
> > > > > with some key combinations
> > > > > after some searching i found a list of languages but chaging that
> > > > > to english doesnt change anything
> > > > > 
> > > > > what steps are needed to get this system working ?
> > > > > 
> > > > the users's default password is loongson, and same does root.
> > > 
> > > yes, i figured these out already, that was easy
> > > 
> > > 
> > > > sorry about that, i use another partition mounted as /home before i 
> > > > tarball the os, so you may need to add a new user to work.
> > > > i have changed the language to Deutsch and it dose work correctly, but 
> > > > because of my mistake, the tarball dose not have loongson-fc21 dir in 
> > > > it. the language configure file is located in user's dir, and as it 
> > > > dose not respond after you make the change in menulist, you need change 
> > > > locale language with steps:
> > > > 1.locale -a to list all languages
> > > > 2.change /etc/default/locale to LANG="en_US.UTF-8"
> > > > 
> > > > if there is no /etc/default/locale in os, you could add this line in 
> > > > /etc/profile:
> > > > export LC_CTYPE="en_US"
&

Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with mmi

2015-07-10 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年7月11日 星期六
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with 
> mmi
> 
> On Mon, Jul 06, 2015 at 10:17:54PM +0800, 周晓勇 wrote:
> > upgrade the new FC21 with these steps:
> > 1.mount Fedora21-xfce-Loongson-20150706.iso to /mnt
> > 2.mkfs.ext3(ext4 may not supported by loongson pmon) the usb disk
> > 3.copy all two folders in /mnt to usb
> > 4.reboot and plug in usb disk
> > 5.press key "u" until select menu
> > 6.enter in usb disk(first menu)
> > 7.enter in install system(please do not enter int other entries, because 
> > others not correctly work)
> > 8.choose on partition to install
> 
> 8a. wonder why it installs over another partition
> 
choose one partition you want to install into.
if you installed new os on partition sda9, so you need change the /etc/fstab of 
the new os. like this:
/dev/sda9 / ext4 ...

> 
> > 9.after install completed, reboot into any other os to enable first line in 
> > FC21 /etc/fstab:
> > #/dev/sda8 / ext4 ... --> /dev/sda8 / ext4 ... (sdax is the partition which 
> > you installed yet)
> > 10.copy /boot folder of FC21 partition into the first BOOT partition(sda1) 
> > to use the new kernel 3.18.xxx
> > 11.modify the boot.cfg file in BOOT partision(sda1)
> 
> and after that it boots to the GUI login and one cannot login as the
> new user is called loongson-fc21 but the home directory loongson
> changing the directory so they match gets one further, a dialog is
> displayed with 2 buttons in chinese, closing that dialog without
> clicking either leaves one in the partly working GUI which just shows
> the background image and a gray rectangle
> its possible to open some menu list and enter commands and a terminal
> with some key combinations
> after some searching i found a list of languages but chaging that
> to english doesnt change anything
> 
> what steps are needed to get this system working ?
> 
the users's default password is loongson, and same does root.
sorry about that, i use another partition mounted as /home before i tarball the 
os, so you may need to add a new user to work.
i have changed the language to Deutsch and it dose work correctly, but because 
of my mistake, the tarball dose not have loongson-fc21 dir in it. the language 
configure file is located in user's dir, and as it dose not respond after you 
make the change in menulist, you need change locale language with steps:
1.locale -a to list all languages
2.change /etc/default/locale to LANG="en_US.UTF-8"

if there is no /etc/default/locale in os, you could add this line in 
/etc/profile:
export LC_CTYPE="en_US"

3.if it works, use the start menulist to find where to change the lang and wait 
for a while after click.

we have the live install iso, but only could install via network for now. 
i considered it may waste your time as the download speed is slow, so i tarball 
the os for you.
so sorry for that, and we will improve it.


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/2] avcodec: loongson optimized h264pred with mmi

2015-07-09 Thread
Hi, Michael.

did you install the new FC21 and is there any problem when installing?
the installation maybe freted, but worth.
i add ".set arch=loongson3a" for last h264pred patch to pass compiling on 
FC19-O32, and fate success in both FC19-O32 and FC21-N64.
i think it's ok to use ".set arch=loongson3a" to bypass using dli in 
O32-system, as
these codes only for loongson3 cpu, and 3A, 3B use same option 
-march=loongson3a to enable mmi.




---
From f378ba9b5d15495f8fcb93e049ee3538b744ecba Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Thu, 9 Jul 2015 10:59:12 +0800
Subject: [PATCH 1/2] avcodec: loongson optimized h264pred with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile |   1 +
 libavcodec/mips/h264pred_init_mips.c |  53 +++
 libavcodec/mips/h264pred_mips.h  |  53 +++
 libavcodec/mips/h264pred_mmi.c   | 804 +++
 4 files changed, 911 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 5569a03..c16c93a 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -58,3 +58,4 @@ MSA-OBJS-$(CONFIG_MPEGVIDEOENC)   += 
mips/mpegvideoencdsp_msa.o
 MSA-OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_msa.o
 MMI-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_mmi.o
 MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
+MMI-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_mmi.o
diff --git a/libavcodec/mips/h264pred_init_mips.c 
b/libavcodec/mips/h264pred_init_mips.c
index 27ff10f..a2124ec 100644
--- a/libavcodec/mips/h264pred_init_mips.c
+++ b/libavcodec/mips/h264pred_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -20,6 +21,7 @@
 
 #include "config.h"
 #include "h264dsp_mips.h"
+#include "h264pred_mips.h"
 
 #if HAVE_MSA
 static av_cold void h264_pred_init_msa(H264PredContext *h, int codec_id,
@@ -94,6 +96,54 @@ static av_cold void h264_pred_init_msa(H264PredContext *h, 
int codec_id,
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static av_cold void h264_pred_init_mmi(H264PredContext *h, int codec_id,
+const int bit_depth, const int chroma_format_idc)
+{
+if (bit_depth == 8) {
+if (chroma_format_idc == 1) {
+h->pred8x8  [VERT_PRED8x8   ] = ff_pred8x8_vertical_8_mmi;
+h->pred8x8  [HOR_PRED8x8] = ff_pred8x8_horizontal_8_mmi;
+} else {
+h->pred8x8  [VERT_PRED8x8   ] = ff_pred8x16_vertical_8_mmi;
+h->pred8x8  [HOR_PRED8x8] = ff_pred8x16_horizontal_8_mmi;
+}
+
+h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_mmi;
+h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vertical_8_mmi;
+h->pred16x16[HOR_PRED8x8] = ff_pred16x16_horizontal_8_mmi;
+h->pred8x8l [TOP_DC_PRED] = ff_pred8x8l_top_dc_8_mmi;
+h->pred8x8l [DC_PRED] = ff_pred8x8l_dc_8_mmi;
+h->pred8x8l [HOR_PRED   ] = ff_pred8x8l_horizontal_8_mmi;
+
+switch (codec_id) {
+case AV_CODEC_ID_SVQ3:
+h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmi;
+;
+break;
+case AV_CODEC_ID_RV40:
+h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmi;
+;
+break;
+case AV_CODEC_ID_VP7:
+case AV_CODEC_ID_VP8:
+;
+break;
+default:
+h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmi;
+break;
+}
+
+if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
+if (chroma_format_idc == 1) {
+h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmi;
+h->pred8x8[DC_PRED8x8   ] = ff_pred8x8_dc_8_mmi;
+}
+}
+}
+}
+#endif /* HAVE_MMI */
+
 av_cold void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
 int bit_depth,
 const int chroma_format_idc)
@@ -101,4 +151,7 @@ av_cold void ff_h264_pred_init_mips(H264PredContext *h, int 
codec_id,
 #if HAVE_MSA
 h264_pred_init_msa(h, codec_id, bit_depth, chroma_format_idc);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+h264_pred_init_mmi(h, codec_id, bit_depth, chroma_format_idc);
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/h264pred_mips.h b/libavcodec/mips/h264pred_mips.h
new file mode 100644
index 000..16bf6fc
--- /dev/null
+++ b/libavcodec/mips/h264pred_mips.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the

[FFmpeg-devel] [PATCH 2/2] avcodec: loongson optimized mpegvideo dct unquantize with mmi

2015-07-09 Thread
From 86f901e61532d3c06dbd93e15d47a66b119c44f8 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Thu, 9 Jul 2015 14:34:21 +0800
Subject: [PATCH 2/2] avcodec: loongson optimized mpegvideo dct unquantize with
 mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile  |   1 +
 libavcodec/mips/mpegvideo_init_mips.c |  14 ++
 libavcodec/mips/mpegvideo_mips.h  |  35 
 libavcodec/mips/mpegvideo_mmi.c   | 303 ++
 4 files changed, 353 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index c16c93a..9eb815a 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -59,3 +59,4 @@ MSA-OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_msa.o
 MMI-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_mmi.o
 MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
 MMI-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_mmi.o
+MMI-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_mmi.o
diff --git a/libavcodec/mips/mpegvideo_init_mips.c 
b/libavcodec/mips/mpegvideo_init_mips.c
index ee14b31..85a833c 100644
--- a/libavcodec/mips/mpegvideo_init_mips.c
+++ b/libavcodec/mips/mpegvideo_init_mips.c
@@ -19,6 +19,7 @@
  */
 
 #include "h263dsp_mips.h"
+#include "mpegvideo_mips.h"
 
 #if HAVE_MSA
 static av_cold void dct_unquantize_init_msa(MpegEncContext *s)
@@ -29,9 +30,22 @@ static av_cold void dct_unquantize_init_msa(MpegEncContext 
*s)
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static av_cold void dct_unquantize_init_mmi(MpegEncContext *s)
+{
+s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_mmi;
+s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_mmi;
+s->dct_unquantize_mpeg1_intra = ff_dct_unquantize_mpeg1_intra_mmi;
+s->dct_unquantize_mpeg1_inter = ff_dct_unquantize_mpeg1_inter_mmi;
+}
+#endif /* HAVE_MMI */
+
 av_cold void ff_mpv_common_init_mips(MpegEncContext *s)
 {
 #if HAVE_MSA
 dct_unquantize_init_msa(s);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+dct_unquantize_init_mmi(s);
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/mpegvideo_mips.h b/libavcodec/mips/mpegvideo_mips.h
new file mode 100644
index 000..dbcea6a
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mips.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef MPEGVIDEO_MIPS_H
+#define MPEGVIDEO_MIPS_H
+
+#include "libavcodec/mpegvideo.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+
+#endif /* MPEGVIDEO_MIPS_H */
diff --git a/libavcodec/mips/mpegvideo_mmi.c b/libavcodec/mips/mpegvideo_mmi.c
new file mode 100644
index 000..b3d58bd
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mmi.c
@@ -0,0 +1,303 @@
+/*
+ * Loongson SIMD optimized mpegvideo
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *Zhang Shuangshuang 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegvideo_mips.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale)
+{
+int64_t level, qmul, qadd, nCoeffs;
+
+ 

[FFmpeg-devel] [PATCH 1/2] avcodec: loongson optimized h264pred with mmi

2015-07-09 Thread
Hi, Michael.
did you install the new FC21 and is there any problem when installing?
the installation maybe freted, but worth.
i add ".set arch=loongson3a" for last h264pred patch to pass compiling on 
FC19-O32, and fate success in both FC19-O32 and FC21-N64.
i think it's ok to use ".set arch=loongson3a" to bypass using dli in 
O32-system, as
these codes only for loongson3 cpu, and 3A, 3B use same option 
-march=loongson3a to enable mmi.




---
From f378ba9b5d15495f8fcb93e049ee3538b744ecba Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Thu, 9 Jul 2015 10:59:12 +0800
Subject: [PATCH 1/2] avcodec: loongson optimized h264pred with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile |   1 +
 libavcodec/mips/h264pred_init_mips.c |  53 +++
 libavcodec/mips/h264pred_mips.h  |  53 +++
 libavcodec/mips/h264pred_mmi.c   | 804 +++
 4 files changed, 911 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 5569a03..c16c93a 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -58,3 +58,4 @@ MSA-OBJS-$(CONFIG_MPEGVIDEOENC)   += 
mips/mpegvideoencdsp_msa.o
 MSA-OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_msa.o
 MMI-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_mmi.o
 MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
+MMI-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_mmi.o
diff --git a/libavcodec/mips/h264pred_init_mips.c 
b/libavcodec/mips/h264pred_init_mips.c
index 27ff10f..a2124ec 100644
--- a/libavcodec/mips/h264pred_init_mips.c
+++ b/libavcodec/mips/h264pred_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ *Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -20,6 +21,7 @@
 
 #include "config.h"
 #include "h264dsp_mips.h"
+#include "h264pred_mips.h"
 
 #if HAVE_MSA
 static av_cold void h264_pred_init_msa(H264PredContext *h, int codec_id,
@@ -94,6 +96,54 @@ static av_cold void h264_pred_init_msa(H264PredContext *h, 
int codec_id,
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static av_cold void h264_pred_init_mmi(H264PredContext *h, int codec_id,
+const int bit_depth, const int chroma_format_idc)
+{
+if (bit_depth == 8) {
+if (chroma_format_idc == 1) {
+h->pred8x8  [VERT_PRED8x8   ] = ff_pred8x8_vertical_8_mmi;
+h->pred8x8  [HOR_PRED8x8] = ff_pred8x8_horizontal_8_mmi;
+} else {
+h->pred8x8  [VERT_PRED8x8   ] = ff_pred8x16_vertical_8_mmi;
+h->pred8x8  [HOR_PRED8x8] = ff_pred8x16_horizontal_8_mmi;
+}
+
+h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_mmi;
+h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vertical_8_mmi;
+h->pred16x16[HOR_PRED8x8] = ff_pred16x16_horizontal_8_mmi;
+h->pred8x8l [TOP_DC_PRED] = ff_pred8x8l_top_dc_8_mmi;
+h->pred8x8l [DC_PRED] = ff_pred8x8l_dc_8_mmi;
+h->pred8x8l [HOR_PRED   ] = ff_pred8x8l_horizontal_8_mmi;
+
+switch (codec_id) {
+case AV_CODEC_ID_SVQ3:
+h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmi;
+;
+break;
+case AV_CODEC_ID_RV40:
+h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmi;
+;
+break;
+case AV_CODEC_ID_VP7:
+case AV_CODEC_ID_VP8:
+;
+break;
+default:
+h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmi;
+break;
+}
+
+if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
+if (chroma_format_idc == 1) {
+h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmi;
+h->pred8x8[DC_PRED8x8   ] = ff_pred8x8_dc_8_mmi;
+}
+}
+}
+}
+#endif /* HAVE_MMI */
+
 av_cold void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
 int bit_depth,
 const int chroma_format_idc)
@@ -101,4 +151,7 @@ av_cold void ff_h264_pred_init_mips(H264PredContext *h, int 
codec_id,
 #if HAVE_MSA
 h264_pred_init_msa(h, codec_id, bit_depth, chroma_format_idc);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+h264_pred_init_mmi(h, codec_id, bit_depth, chroma_format_idc);
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/h264pred_mips.h b/libavcodec/mips/h264pred_mips.h
new file mode 100644
index 000..16bf6fc
--- /dev/null
+++ b/libavcodec/mips/h264pred_mips.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the 

Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with mmi

2015-07-06 Thread
upgrade the new FC21 with these steps:
1.mount Fedora21-xfce-Loongson-20150706.iso to /mnt
2.mkfs.ext3(ext4 may not supported by loongson pmon) the usb disk
3.copy all two folders in /mnt to usb
4.reboot and plug in usb disk
5.press key "u" until select menu
6.enter in usb disk(first menu)
7.enter in install system(please do not enter int other entries, because others 
not correctly work)
8.choose on partition to install
9.after install completed, reboot into any other os to enable first line in 
FC21 /etc/fstab:
#/dev/sda8 / ext4 ... --> /dev/sda8 / ext4 ... (sdax is the partition which you 
installed yet)
10.copy /boot folder of FC21 partition into the first BOOT partition(sda1) to 
use the new kernel 3.18.xxx
11.modify the boot.cfg file in BOOT partision(sda1)


> -原始邮件-
> 发件人: "周晓勇" 
> 发送时间: 2015年7月6日 星期一
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred 
> with mmi
> 
> the new FC21 download address:
> http://loongnix.org/ftp/os/Fedora21/
> 
> the fault caused by ARCH_MIPS64 in configure doesn't match with O32 API in 
> old FC19.
> it's different on loongson arch(or other mips arch) and x86 arch that 32bit 
> system can only use mips32 instruction set.
> to achieve high performance, loongson use mips64r2 instruction set, and to 
> use mmi SIMD loongson use -march=loongson3a in compiling.
> after running configure file, on old O32 FC19 you get ARCH_MIPS only, but on 
> new FC21 you could specify API as O32, N32 or N64,
> because the new FC21 supported multi-libs. as most of the rpms were build 
> with N64 and OS mainly aiming at 64bit application, ffmpeg should be 
> configured and compiled into N64.
> with the new mips64el FC21 OS, you could compile correctly.
> 
> the new FC21 you will download was a test release with gst-libav inside, 
> which could decode 1080p mainline now.
> the ccache, dash and screen have been installed.
> the yum repo should be work abroad. if it doesn't, send your requirement to 
> me. ^ ^
> 
> 
> 
> > -原始邮件-
> > 发件人: "Michael Niedermayer" 
> > 发送时间: 2015年7月6日 星期一
> > 收件人: "FFmpeg development discussions and patches" 
> > 抄送: 
> > 主题: Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred 
> > with mmi
> > 
> > On Fri, Jul 03, 2015 at 06:13:25PM +0800, 周晓勇 wrote:
> > > From dc50d05ba8a4d40e17f15a52237f33cff0205fea Mon Sep 17 00:00:00 2001
> > > From: ZhouXiaoyong 
> > > Date: Fri, 3 Jul 2015 16:56:01 +0800
> > > Subject: [PATCH 4/4] avcodec: loongson optimized h264pred with mmi
> > > 
> > > 
> > > Signed-off-by: ZhouXiaoyong 
> > > ---
> > >  libavcodec/mips/Makefile |   1 +
> > >  libavcodec/mips/h264pred_init_mips.c |  53 +++
> > >  libavcodec/mips/h264pred_mips.h  |  53 +++
> > >  libavcodec/mips/h264pred_mmi.c   | 799 
> > > +++
> > >  4 files changed, 906 insertions(+)
> > 
> > this fails with:
> > 
> > /tmp/ccbnexwo.s: Assembler messages:
> > /tmp/ccbnexwo.s:44: Error: Number (0xfffbfffafff9fff8) larger than 32 bits
> > /tmp/ccbnexwo.s:46: Error: Number (0xfffefffdfffc) larger than 32 bits
> > /tmp/ccbnexwo.s:48: Error: Number (0x0004000300020001) larger than 32 bits
> > /tmp/ccbnexwo.s:50: Error: Number (0x0008000700060005) larger than 32 bits
> > /tmp/ccbnexwo.s:195: Error: Number (0x000300020001) larger than 32 bits
> > /tmp/ccbnexwo.s:198: Error: Number (0x0007000600050004) larger than 32 bits
> > /tmp/ccbnexwo.s:201: Error: Number (0x000b000a00090008) larger than 32 bits
> > /tmp/ccbnexwo.s:204: Error: Number (0x000f000e000d000c) larger than 32 bits
> > /tmp/ccbnexwo.s:302: Error: Number (0x0101010101010101) larger than 32 bits
> > /tmp/ccbnexwo.s:360: Error: Number (0x0101010101010101) larger than 32 bits
> > /tmp/ccbnexwo.s:1451: Error: Number (0x0001010101010101) larger than 32 bits
> > /tmp/ccbnexwo.s:1531: Error: Number (0x0101010101010101) larger than 32 bits
> > /tmp/ccbnexwo.s:1870: Error: Number (0x0101010101010101) larger than 32 bits
> > make: *** No rule to make target `libavcodec/mips/h264pred_mmi.o', needed 
> > by `libavcodec/libavcodec.a'.  Stop.
> > 
> > [...]
> > -- 
> > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> > 
> > I do not agree with what you have to say, but I'll defend to the death your
> > right to say it. -- Voltaire
> 
> 



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with mmi

2015-07-06 Thread
the new FC21 download address:
http://loongnix.org/ftp/os/Fedora21/

the fault caused by ARCH_MIPS64 in configure doesn't match with O32 API in old 
FC19.
it's different on loongson arch(or other mips arch) and x86 arch that 32bit 
system can only use mips32 instruction set.
to achieve high performance, loongson use mips64r2 instruction set, and to use 
mmi SIMD loongson use -march=loongson3a in compiling.
after running configure file, on old O32 FC19 you get ARCH_MIPS only, but on 
new FC21 you could specify API as O32, N32 or N64,
because the new FC21 supported multi-libs. as most of the rpms were build with 
N64 and OS mainly aiming at 64bit application, ffmpeg should be configured and 
compiled into N64.
with the new mips64el FC21 OS, you could compile correctly.

the new FC21 you will download was a test release with gst-libav inside, which 
could decode 1080p mainline now.
the ccache, dash and screen have been installed.
the yum repo should be work abroad. if it doesn't, send your requirement to me. 
^ ^



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年7月6日 星期一
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with 
> mmi
> 
> On Fri, Jul 03, 2015 at 06:13:25PM +0800, 周晓勇 wrote:
> > From dc50d05ba8a4d40e17f15a52237f33cff0205fea Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Fri, 3 Jul 2015 16:56:01 +0800
> > Subject: [PATCH 4/4] avcodec: loongson optimized h264pred with mmi
> > 
> > 
> > Signed-off-by: ZhouXiaoyong 
> > ---
> >  libavcodec/mips/Makefile |   1 +
> >  libavcodec/mips/h264pred_init_mips.c |  53 +++
> >  libavcodec/mips/h264pred_mips.h  |  53 +++
> >  libavcodec/mips/h264pred_mmi.c   | 799 
> > +++
> >  4 files changed, 906 insertions(+)
> 
> this fails with:
> 
> /tmp/ccbnexwo.s: Assembler messages:
> /tmp/ccbnexwo.s:44: Error: Number (0xfffbfffafff9fff8) larger than 32 bits
> /tmp/ccbnexwo.s:46: Error: Number (0xfffefffdfffc) larger than 32 bits
> /tmp/ccbnexwo.s:48: Error: Number (0x0004000300020001) larger than 32 bits
> /tmp/ccbnexwo.s:50: Error: Number (0x0008000700060005) larger than 32 bits
> /tmp/ccbnexwo.s:195: Error: Number (0x000300020001) larger than 32 bits
> /tmp/ccbnexwo.s:198: Error: Number (0x0007000600050004) larger than 32 bits
> /tmp/ccbnexwo.s:201: Error: Number (0x000b000a00090008) larger than 32 bits
> /tmp/ccbnexwo.s:204: Error: Number (0x000f000e000d000c) larger than 32 bits
> /tmp/ccbnexwo.s:302: Error: Number (0x0101010101010101) larger than 32 bits
> /tmp/ccbnexwo.s:360: Error: Number (0x0101010101010101) larger than 32 bits
> /tmp/ccbnexwo.s:1451: Error: Number (0x0001010101010101) larger than 32 bits
> /tmp/ccbnexwo.s:1531: Error: Number (0x0101010101010101) larger than 32 bits
> /tmp/ccbnexwo.s:1870: Error: Number (0x0101010101010101) larger than 32 bits
> make: *** No rule to make target `libavcodec/mips/h264pred_mmi.o', needed by 
> `libavcodec/libavcodec.a'.  Stop.
> 
> [...]
> -- 
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> I do not agree with what you have to say, but I'll defend to the death your
> right to say it. -- Voltaire



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] avcodec: loongson optimized mpegvideo dct unquantize with

2015-07-06 Thread
From cc51287aa33d095595f1373de2b0191a2180428c Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Mon, 6 Jul 2015 16:45:56 +0800
Subject: [PATCH 2/2] avcodec: loongson optimized mpegvideo dct unquantize with
 mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile  |   1 +
 libavcodec/mips/mpegvideo_init_mips.c |  14 ++
 libavcodec/mips/mpegvideo_mips.h  |  35 
 libavcodec/mips/mpegvideo_mmi.c   | 303 ++
 4 files changed, 353 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 894ca28..03a1990 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -52,3 +52,4 @@ MSA-OBJS-$(CONFIG_MPEGVIDEO)  += 
mips/mpegvideo_msa.o
 MMI-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_mmi.o
 MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
 MMI-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_mmi.o
+MMI-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_mmi.o
diff --git a/libavcodec/mips/mpegvideo_init_mips.c 
b/libavcodec/mips/mpegvideo_init_mips.c
index ee14b31..85a833c 100644
--- a/libavcodec/mips/mpegvideo_init_mips.c
+++ b/libavcodec/mips/mpegvideo_init_mips.c
@@ -19,6 +19,7 @@
  */
 
 #include "h263dsp_mips.h"
+#include "mpegvideo_mips.h"
 
 #if HAVE_MSA
 static av_cold void dct_unquantize_init_msa(MpegEncContext *s)
@@ -29,9 +30,22 @@ static av_cold void dct_unquantize_init_msa(MpegEncContext 
*s)
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static av_cold void dct_unquantize_init_mmi(MpegEncContext *s)
+{
+s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_mmi;
+s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_mmi;
+s->dct_unquantize_mpeg1_intra = ff_dct_unquantize_mpeg1_intra_mmi;
+s->dct_unquantize_mpeg1_inter = ff_dct_unquantize_mpeg1_inter_mmi;
+}
+#endif /* HAVE_MMI */
+
 av_cold void ff_mpv_common_init_mips(MpegEncContext *s)
 {
 #if HAVE_MSA
 dct_unquantize_init_msa(s);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+dct_unquantize_init_mmi(s);
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/mpegvideo_mips.h b/libavcodec/mips/mpegvideo_mips.h
new file mode 100644
index 000..dbcea6a
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mips.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef MPEGVIDEO_MIPS_H
+#define MPEGVIDEO_MIPS_H
+
+#include "libavcodec/mpegvideo.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+
+#endif /* MPEGVIDEO_MIPS_H */
diff --git a/libavcodec/mips/mpegvideo_mmi.c b/libavcodec/mips/mpegvideo_mmi.c
new file mode 100644
index 000..b3d58bd
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mmi.c
@@ -0,0 +1,303 @@
+/*
+ * Loongson SIMD optimized mpegvideo
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *Zhang Shuangshuang 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegvideo_mips.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale)
+{
+int64_t level, qmul, qadd, nCoeffs;

[FFmpeg-devel] [PATCH 1/2] configure: loongson enabled local aligned 32

2015-07-06 Thread
From 7cef687fac2f056a0f1a6e7cf65df5358b16a642 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Mon, 6 Jul 2015 09:42:17 +0800
Subject: [PATCH 1/2] configure: loongson enabled local aligned 32


Signed-off-by: ZhouXiaoyong 
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


diff --git a/configure b/configure
index bb3041b..bf6ca37 100755
--- a/configure
+++ b/configure
@@ -3974,7 +3974,7 @@ elif enabled mips; then
 disable mipsdspr1
 disable mipsdspr2
 disable msa
-enable local_aligned_8 local_aligned_16
+enable local_aligned_8 local_aligned_16 local_aligned_32
 enable simd_align_16
 enable fast_64bit
 enable fast_clz
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with mmi

2015-07-03 Thread
i will offer a download source of Fedora21 OS next Monday.
i will run yum-builddep ffmpeg-xxx.src.rpm for preparing devel environment.
 what else do you need to install, because it maybe slow for downloading other 
packages abroad.
the new FC21 build with mips64el N64 API and mips64r2(-march=loongson3a) isa, 
and enabled ARCH_MIPS64 after ffmpeg configure.
the FC19-O32 was slow, so aborted.

here i contrast the performance between libav and ffmpeg
i run test on my x86_64 notebook(Intel(R) Core(TM) i3-3217U CPU @ 1.80GHz):

ffmpeg version 2.5.7-0ubuntu0.15.04.1 Copyright (c) 2000-2015 the FFmpeg 
developers
  built with gcc 4.9.2 (Ubuntu 4.9.2-10ubuntu13)
avconv version 11.2-6:11.2-1, Copyright (c) 2000-2014 the Libav developers
  built on Jan 18 2015 05:12:33 with gcc 4.9.2 (Ubuntu 4.9.2-10ubuntu2)

1> single thread
---
time ffmpeg -threads 1  -i 1920x1080.mp4 -cpuflags 0 -f rawvideo -an -vframes 
4096 -y /dev/null

frame= 1253 fps= 38 q=0.0 Lsize= 3805988kB time=00:00:52.20 
bitrate=597196.8kbits/s
video:3805988kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB 
muxing overhead: 0.00%

real0m33.163s
user0m33.092s
sys 0m0.056s

---
time avconv -threads 1  -i 1920x1080.mp4 -cpuflags 0 -f rawvideo -an -vframes 
4096 -y /dev/null

frame= 1253 fps= 40 q=0.0 Lsize= 3805988kB time=52.17 bitrate=597673.8kbits/s   
 
video:3805988kB audio:0kB other streams:0kB global headers:0kB muxing overhead: 
0.00%

real0m31.154s
user0m31.036s
sys 0m0.108s

2> multi-threads
---
time ffmpeg -threads 1  -i 1920x1080.mp4 -cpuflags 0 -f rawvideo -an -vframes 
4096 -y /dev/null

frame= 1253 fps= 78 q=0.0 Lsize= 3805988kB time=00:00:52.20 
bitrate=597196.8kbits/s
video:3805988kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB 
muxing overhead: 0.00%

real0m16.157s
user0m59.600s
sys 0m0.328s

---
time avconv -threads 1  -i 1920x1080.mp4 -cpuflags 0 -f rawvideo -an -vframes 
4096 -y /dev/null

frame= 1253 fps= 84 q=0.0 Lsize= 3805988kB time=52.17 bitrate=597673.8kbits/s   
 
video:3805988kB audio:0kB other streams:0kB global headers:0kB muxing overhead: 
0.00%

real0m15.005s
user0m55.296s
sys 0m0.240s

libav is faster than ffmpeg? 
the test of other video has same conclusion.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 4/4] avcodec: loongson optimized h264pred with mmi

2015-07-03 Thread
>From dc50d05ba8a4d40e17f15a52237f33cff0205fea Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 3 Jul 2015 16:56:01 +0800
Subject: [PATCH 4/4] avcodec: loongson optimized h264pred with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile |   1 +
 libavcodec/mips/h264pred_init_mips.c |  53 +++
 libavcodec/mips/h264pred_mips.h  |  53 +++
 libavcodec/mips/h264pred_mmi.c   | 799 +++
 4 files changed, 906 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 49c7556..894ca28 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -51,3 +51,4 @@ MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= 
mips/pixblockdsp_msa.o
 MSA-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_msa.o
 MMI-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_mmi.o
 MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
+MMI-OBJS-$(CONFIG_H264PRED)   += mips/h264pred_mmi.o
diff --git a/libavcodec/mips/h264pred_init_mips.c 
b/libavcodec/mips/h264pred_init_mips.c
index 27ff10f..dc9c95a 100644
--- a/libavcodec/mips/h264pred_init_mips.c
+++ b/libavcodec/mips/h264pred_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Shivraj Patil (shivraj.pa...@imgtec.com)
+ * Copyright (c) 2015 Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -20,6 +21,7 @@
 
 #include "config.h"
 #include "h264dsp_mips.h"
+#include "h264pred_mips.h"
 
 #if HAVE_MSA
 static av_cold void h264_pred_init_msa(H264PredContext *h, int codec_id,
@@ -94,6 +96,54 @@ static av_cold void h264_pred_init_msa(H264PredContext *h, 
int codec_id,
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static av_cold void h264_pred_init_mmi(H264PredContext *h, int codec_id,
+const int bit_depth, const int chroma_format_idc)
+{
+if (bit_depth == 8) {
+if (chroma_format_idc == 1) {
+h->pred8x8  [VERT_PRED8x8   ] = ff_pred8x8_vertical_8_mmi;
+h->pred8x8  [HOR_PRED8x8] = ff_pred8x8_horizontal_8_mmi;
+} else {
+h->pred8x8  [VERT_PRED8x8   ] = ff_pred8x16_vertical_8_mmi;
+h->pred8x8  [HOR_PRED8x8] = ff_pred8x16_horizontal_8_mmi;
+}
+
+h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_mmi;
+h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vertical_8_mmi;
+h->pred16x16[HOR_PRED8x8] = ff_pred16x16_horizontal_8_mmi;
+h->pred8x8l [TOP_DC_PRED] = ff_pred8x8l_top_dc_8_mmi;
+h->pred8x8l [DC_PRED] = ff_pred8x8l_dc_8_mmi;
+h->pred8x8l [HOR_PRED   ] = ff_pred8x8l_horizontal_8_mmi;
+
+switch (codec_id) {
+case AV_CODEC_ID_SVQ3:
+h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmi;
+;
+break;
+case AV_CODEC_ID_RV40:
+h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmi;
+;
+break;
+case AV_CODEC_ID_VP7:
+case AV_CODEC_ID_VP8:
+;
+break;
+default:
+h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmi;
+break;
+}
+
+if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
+if (chroma_format_idc == 1) {
+h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmi;
+h->pred8x8[DC_PRED8x8   ] = ff_pred8x8_dc_8_mmi;
+}
+}
+}
+}
+#endif /* HAVE_MMI */
+
 av_cold void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
 int bit_depth,
 const int chroma_format_idc)
@@ -101,4 +151,7 @@ av_cold void ff_h264_pred_init_mips(H264PredContext *h, int 
codec_id,
 #if HAVE_MSA
 h264_pred_init_msa(h, codec_id, bit_depth, chroma_format_idc);
 #endif  // #if HAVE_MSA
+#if HAVE_MMI
+h264_pred_init_mmi(h, codec_id, bit_depth, chroma_format_idc);
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/h264pred_mips.h b/libavcodec/mips/h264pred_mips.h
new file mode 100644
index 000..16bf6fc
--- /dev/null
+++ b/libavcodec/mips/h264pred_mips.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Fou

[FFmpeg-devel] [PATCH 3/4] configure: add loongson2 cpu support

2015-07-03 Thread
>From c6c4faaab7c598ce0fb6f7a4afab825f7a86 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 3 Jul 2015 16:44:12 +0800
Subject: [PATCH 3/4] configure: add loongson2 cpu support


Signed-off-by: ZhouXiaoyong 
---
 configure | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)


diff --git a/configure b/configure
index a292102..bb3041b 100755
--- a/configure
+++ b/configure
@@ -1591,6 +1591,8 @@ ARCH_EXT_LIST_MIPS="
 "
 
 ARCH_EXT_LIST_LOONGSON="
+loongson2
+loongson3
 mmi
 "
 
@@ -3964,7 +3966,7 @@ elif enabled mips; then
 check_cflags "-mtune=i6400 -mabi=64"
 check_ldflags "-mabi=64"
 ;;
-loongson3*)
+loongson*)
 disable mipsfpu
 disable mips32r2
 disable mips32r5
@@ -3979,7 +3981,17 @@ elif enabled mips; then
 enable fast_cmov
 enable fast_unaligned
 disable aligned_stack
-cpuflags="-march=loongson3a -mhard-float"
+case $cpu in
+loongson3*)
+cpuflags="-march=loongson3a -mhard-float"
+;;
+loongson2e)
+cpuflags="-march=loongson2e -mhard-float"
+;;
+loongson2f)
+cpuflags="-march=loongson2f -mhard-float"
+;;
+esac
 ;;
 generic)
 disable mips32r5
@@ -4737,13 +4749,15 @@ EOF
 
 elif enabled mips; then
 
+enabled loongson2 && check_inline_asm loongson2 '"dmult.g $8, $9, $10"'
+enabled loongson3 && check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"'
 enabled mmi && check_inline_asm mmi '"punpcklhw $f0, $f0, $f0"'
 
 # Enable minimum ISA based on selected options
 if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
 add_cflags "-mips64r2"
 add_asflags "-mips64r2"
-elif enabled mips64 && enabled mipsfpu && disabled loongson3; then
+elif enabled mips64 && enabled mipsfpu && disabled loongson2 && disabled 
loongson3; then
 add_cflags "-mips64"
 add_asflags "-mips64"
 elif enabled mipsdspr1 || enabled mipsdspr2; then
@@ -4768,7 +4782,6 @@ elif enabled mips; then
  check_inline_asm mipsfpu   '"madd.d $f0, $f2, $f4, $f6"'
 enabled msa   && check_cflags "-mmsa" && check_ldflags "-mmsa" &&
  check_inline_asm msa   '"addvi.b $w0, $w1, 1"'
-enabled loongson3 && check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"'
 
 enabled mips32r5 && add_asflags "-mips32r5 -mfp64"
 enabled mips64r6 && add_asflags "-mips64r6 -mfp64"
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/4] use mmi instead of loongson3 as simd-optimization flag

2015-07-03 Thread
>From 0b953ff84cce87c2b988852aa59c899e2fa23309 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 3 Jul 2015 16:27:48 +0800
Subject: [PATCH 2/4] use mmi instead of loongson3 as simd-optimization flag


Signed-off-by: ZhouXiaoyong 
---
 Makefile   |  2 +-
 arch.mak   |  2 +-
 configure  | 10 ++
 libavcodec/mips/Makefile   |  4 ++--
 libavcodec/mips/h264chroma_init_mips.c |  8 
 libavcodec/mips/h264dsp_init_mips.c|  8 
 6 files changed, 18 insertions(+), 16 deletions(-)


diff --git a/Makefile b/Makefile
index fd59628..a6f7fad 100644
--- a/Makefile
+++ b/Makefile
@@ -81,7 +81,7 @@ SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS 
TOOLS  \
ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \
ALTIVEC-OBJS MMX-OBJS YASM-OBJS   \
MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSPR1-OBJS MSA-OBJS   \
-   LOONGSON3-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS
+   MMI-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS
 
 define RESET
 $(1) :=
diff --git a/arch.mak b/arch.mak
index 4508c2a..80f4ba2 100644
--- a/arch.mak
+++ b/arch.mak
@@ -8,7 +8,7 @@ OBJS-$(HAVE_MIPSFPU)   += $(MIPSFPU-OBJS)$(MIPSFPU-OBJS-yes)
 OBJS-$(HAVE_MIPSDSPR1) += $(MIPSDSPR1-OBJS)  $(MIPSDSPR1-OBJS-yes)
 OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS)  $(MIPSDSPR2-OBJS-yes)
 OBJS-$(HAVE_MSA)   += $(MSA-OBJS)$(MSA-OBJS-yes)
-OBJS-$(HAVE_LOONGSON3) += $(LOONGSON3-OBJS)  $(LOONGSON3-OBJS-yes)
+OBJS-$(HAVE_MMI)   += $(MMI-OBJS)   $(MMI-OBJS-yes)
 
 OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
 OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes)
diff --git a/configure b/configure
index cc23991..a292102 100755
--- a/configure
+++ b/configure
@@ -374,7 +374,7 @@ Optimization options (experts only):
   --disable-mipsdspr2  disable MIPS DSP ASE R2 optimizations
   --disable-msadisable MSA optimizations
   --disable-mipsfpudisable floating point MIPS optimizations
-  --disable-loongson3  disable Loongson-3 SIMD optimizations
+  --disable-mmidisable Loongson SIMD optimizations
   --disable-fast-unaligned consider unaligned accesses slow
 
 Developer options (useful when working on FFmpeg itself):
@@ -1591,7 +1591,7 @@ ARCH_EXT_LIST_MIPS="
 "
 
 ARCH_EXT_LIST_LOONGSON="
-loongson3
+mmi
 "
 
 ARCH_EXT_LIST_X86_SIMD="
@@ -2046,7 +2046,7 @@ mips32r2_deps="mips"
 mips32r5_deps="mips"
 mips64r6_deps="mips"
 msa_deps="mips"
-loongson3_deps="mips"
+mmi_deps="mips"
 
 altivec_deps="ppc"
 dcbzl_deps="ppc"
@@ -4737,6 +4737,8 @@ EOF
 
 elif enabled mips; then
 
+enabled mmi && check_inline_asm mmi '"punpcklhw $f0, $f0, $f0"'
+
 # Enable minimum ISA based on selected options
 if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
 add_cflags "-mips64r2"
@@ -5779,7 +5781,7 @@ if enabled mips; then
 echo "MIPS DSP R1 enabled   ${mipsdspr1-no}"
 echo "MIPS DSP R2 enabled   ${mipsdspr2-no}"
 echo "MIPS MSA enabled  ${msa-no}"
-echo "LOONGSON3 enabled ${loongson3-no}"
+echo "LOONGSON MMI enabled  ${mmi-no}"
 fi
 if enabled ppc; then
 echo "AltiVec enabled   ${altivec-no}"
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 277ac2a..49c7556 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -49,5 +49,5 @@ MSA-OBJS-$(CONFIG_HPELDSP)+= 
mips/hpeldsp_msa.o
 MSA-OBJS-$(CONFIG_BLOCKDSP)   += mips/blockdsp_msa.o
 MSA-OBJS-$(CONFIG_PIXBLOCKDSP)+= mips/pixblockdsp_msa.o
 MSA-OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_msa.o
-LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
-LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
+MMI-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_mmi.o
+MMI-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/h264chroma_init_mips.c 
b/libavcodec/mips/h264chroma_init_mips.c
index 1cc5767..122148d 100644
--- a/libavcodec/mips/h264chroma_init_mips.c
+++ b/libavcodec/mips/h264chroma_init_mips.c
@@ -38,7 +38,7 @@ static av_cold void h264chroma_init_msa(H264ChromaContext *c, 
int bit_depth)
 }
 #endif  // #if HAVE_MSA
 
-#if HAVE_LOONGSON3
+#if HAVE_MMI
 static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth)
 {
 int high_bit_depth = bit_depth > 8;
@@ -50,14 +50,14 @@ static av_cold void h264chroma_init_mmi(H264ChromaContext 
*c, int bit_depth)
 c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmi;
 }
 }
-#endif /* HAVE_LOONGSON3 */
+#endif /* HAVE_MMI */
 
 av_cold void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth)
 {
 #if HAVE_MSA
 h264chroma_init_msa(c, bit_depth);
 #endif  // #if HAVE_MSA
-#if HAVE_LOONGSON3
+#if HAVE_MMI
 h264chroma_init_mmi(

[FFmpeg-devel] [PATCH 1/4] avcodec: loongson remove useless macros in mipsfpu optimization

2015-07-03 Thread
>From be9c7fd9b3ddcba9c122c933717f54437d2e1c8a Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 3 Jul 2015 15:59:00 +0800
Subject: [PATCH 1/4] avcodec: loongson remove useless macros in mipsfpu
 optimization


Loongson has disabled all mipsfpu optimization as fate-test faild.


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/aacsbr_mips.h | 4 ++--
 libavcodec/mips/iirfilter_mips.c  | 4 
 libavcodec/mips/mpegaudiodsp_mips_float.c | 6 --
 libavcodec/mips/sbrdsp_mips.c | 4 
 4 files changed, 2 insertions(+), 16 deletions(-)


diff --git a/libavcodec/mips/aacsbr_mips.h b/libavcodec/mips/aacsbr_mips.h
index e525197..da8389f 100644
--- a/libavcodec/mips/aacsbr_mips.h
+++ b/libavcodec/mips/aacsbr_mips.h
@@ -149,7 +149,7 @@ static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, 
FFTContext *mdct,
 }
 }
 
-#if (HAVE_MIPSFPU && !HAVE_LOONGSON3)
+#if HAVE_MIPSFPU
 static void sbr_qmf_synthesis_mips(FFTContext *mdct,
   SBRDSPContext *sbrdsp, AVFloatDSPContext *fdsp,
   float *out, float X[2][38][64],
@@ -488,7 +488,7 @@ static void sbr_qmf_synthesis_mips(FFTContext *mdct,
 #define sbr_qmf_analysis sbr_qmf_analysis_mips
 #define sbr_qmf_synthesis sbr_qmf_synthesis_mips
 
-#endif /* (HAVE_MIPSFPU && !HAVE_LOONGSON3) */
+#endif /* HAVE_MIPSFPU */
 #endif /* HAVE_INLINE_ASM */
 
 #endif /* AVCODEC_MIPS_AACSBR_FLOAT_H */
diff --git a/libavcodec/mips/iirfilter_mips.c b/libavcodec/mips/iirfilter_mips.c
index 5a145cf..a5646cd 100644
--- a/libavcodec/mips/iirfilter_mips.c
+++ b/libavcodec/mips/iirfilter_mips.c
@@ -55,7 +55,6 @@
 #include "libavcodec/iirfilter.h"
 
 #if HAVE_INLINE_ASM
-#if !HAVE_LOONGSON3
 typedef struct FFIIRFilterCoeffs {
 int   order;
 float gain;
@@ -196,13 +195,10 @@ static void ff_iir_filter_flt_mips(const struct 
FFIIRFilterCoeffs *c,
 }
 }
 }
-#endif /* !HAVE_LOONGSON3 */
 #endif /* HAVE_INLINE_ASM */
 
 void ff_iir_filter_init_mips(FFIIRFilterContext *f) {
 #if HAVE_INLINE_ASM
-#if !HAVE_LOONGSON3
 f->filter_flt = ff_iir_filter_flt_mips;
-#endif /* !HAVE_LOONGSON3 */
 #endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavcodec/mips/mpegaudiodsp_mips_float.c 
b/libavcodec/mips/mpegaudiodsp_mips_float.c
index beebace..bd36894 100644
--- a/libavcodec/mips/mpegaudiodsp_mips_float.c
+++ b/libavcodec/mips/mpegaudiodsp_mips_float.c
@@ -278,7 +278,6 @@ static void ff_mpadsp_apply_window_mips_float(float 
*synth_buf, float *window,
 );
 }
 
-#if !HAVE_LOONGSON3
 static void ff_dct32_mips_float(float *out, const float *tab)
 {
 float val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7,
@@ -787,7 +786,6 @@ static void ff_dct32_mips_float(float *out, const float 
*tab)
 out[15] = val30 + val17;
 out[31] = val31;
 }
-#endif /* !HAVE_LOONGSON3 */
 
 static void imdct36_mips_float(float *out, float *buf, float *in, float *win)
 {
@@ -1226,7 +1224,6 @@ static void imdct36_mips_float(float *out, float *buf, 
float *in, float *win)
 );
 }
 
-#if !HAVE_LOONGSON3
 static void ff_imdct36_blocks_mips_float(float *out, float *buf, float *in,
int count, int switch_point, int block_type)
 {
@@ -1245,13 +1242,10 @@ static void ff_imdct36_blocks_mips_float(float *out, 
float *buf, float *in,
 out++;
 }
 }
-#endif /* !HAVE_LOONGSON3 */
 
 void ff_mpadsp_init_mipsfpu(MPADSPContext *s)
 {
 s->apply_window_float   = ff_mpadsp_apply_window_mips_float;
-#if !HAVE_LOONGSON3
 s->imdct36_blocks_float = ff_imdct36_blocks_mips_float;
 s->dct32_float  = ff_dct32_mips_float;
-#endif /* !HAVE_LOONGSON3 */
 }
diff --git a/libavcodec/mips/sbrdsp_mips.c b/libavcodec/mips/sbrdsp_mips.c
index 63361e4..c203095 100644
--- a/libavcodec/mips/sbrdsp_mips.c
+++ b/libavcodec/mips/sbrdsp_mips.c
@@ -438,7 +438,6 @@ static void sbr_qmf_deint_bfly_mips(float *v, const float 
*src0, const float *sr
 }
 }
 
-#if !HAVE_LOONGSON3
 static void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2])
 {
 int i;
@@ -607,7 +606,6 @@ static void sbr_autocorrelate_mips(const float x[40][2], 
float phi[3][2][2])
 : "memory"
 );
 }
-#endif /* !HAVE_LOONGSON3 */
 
 static void sbr_hf_gen_mips(float (*X_high)[2], const float (*X_low)[2],
  const float alpha0[2], const float alpha1[2],
@@ -896,9 +894,7 @@ void ff_sbrdsp_init_mips(SBRDSPContext *s)
 s->sum64x5 = sbr_sum64x5_mips;
 s->sum_square = sbr_sum_square_mips;
 s->qmf_deint_bfly = sbr_qmf_deint_bfly_mips;
-#if !HAVE_LOONGSON3
 s->autocorrelate = sbr_autocorrelate_mips;
-#endif /* !HAVE_LOONGSON3 */
 s->hf_gen = sbr_hf_gen_mips;
 s->hf_g_filt = sbr_hf_g_filt_mips;
 
-- 
2.1.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: loongson3 optimized h264chroma put and avg with mmi

2015-06-01 Thread
abort the one before and re-patch on the latest master.

---
>From c35e1e18f446fc15233ab42f52077d57681b27fa Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 17 Apr 2015 17:09:06 +0800
Subject: [PATCH] avcodec: loongson3 optimized h264chroma put and avg with mmi

Signed-off-by: ZhouXiaoyong 
---
 libavcodec/h264chroma.c|   2 +
 libavcodec/h264chroma.h|   1 +
 libavcodec/mips/Makefile   |   2 +
 libavcodec/mips/h264chroma_init_mips.c |  42 +++
 libavcodec/mips/h264chroma_mips.h  |  35 ++
 libavcodec/mips/h264chroma_mmi.c   | 582 +
 6 files changed, 664 insertions(+)

diff --git a/libavcodec/h264chroma.c b/libavcodec/h264chroma.c
index 5b3e13b..c2f1f30 100644
--- a/libavcodec/h264chroma.c
+++ b/libavcodec/h264chroma.c
@@ -54,4 +54,6 @@ av_cold void ff_h264chroma_init(H264ChromaContext *c, int 
bit_depth)
 ff_h264chroma_init_ppc(c, bit_depth);
 if (ARCH_X86)
 ff_h264chroma_init_x86(c, bit_depth);
+if (ARCH_MIPS)
+ff_h264chroma_init_mips(c, bit_depth);
 }
diff --git a/libavcodec/h264chroma.h b/libavcodec/h264chroma.h
index d4b8a0e..e0f45ad 100644
--- a/libavcodec/h264chroma.h
+++ b/libavcodec/h264chroma.h
@@ -34,5 +34,6 @@ void ff_h264chroma_init_aarch64(H264ChromaContext *c, int 
bit_depth);
 void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth);
 void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth);
 void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth);
 
 #endif /* AVCODEC_H264CHROMA_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 610ff1a..5999fa7 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -20,8 +20,10 @@ MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)  += 
mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o
 OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
+OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o\
  mips/hevc_mc_bi_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
+LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/h264chroma_init_mips.c 
b/libavcodec/mips/h264chroma_init_mips.c
new file mode 100644
index 000..4c10da7
--- /dev/null
+++ b/libavcodec/mips/h264chroma_init_mips.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264chroma_mips.h"
+
+#if HAVE_LOONGSON3
+static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth)
+{
+int high_bit_depth = bit_depth > 8;
+
+if (!high_bit_depth) {
+c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_mmi;
+c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_mmi;
+c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmi;
+c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmi;
+}
+}
+#endif /* HAVE_LOONGSON3 */
+
+av_cold void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth)
+{
+#if HAVE_LOONGSON3
+h264chroma_init_mmi(c, bit_depth);
+#endif /* HAVE_LOONGSON3 */
+}
diff --git a/libavcodec/mips/h264chroma_mips.h 
b/libavcodec/mips/h264chroma_mips.h
new file mode 100644
index 000..314e8a3
--- /dev/null
+++ b/libavcodec/mips/h264chroma_mips.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of

Re: [FFmpeg-devel] [PATCH] avcodec: loongson3 optimized h264chroma put and avg with mmi

2015-06-01 Thread
sorry about the mistake when pasting to email.

---
>From 1b0f0c0d3b3eb017c7a79daaba89f9fc677381c4 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 17 Apr 2015 17:09:06 +0800
Subject: [PATCH] avcodec: loongson3 optimized h264chroma put and avg with mmi

Signed-off-by: ZhouXiaoyong 
---
 libavcodec/h264chroma.c|   2 +
 libavcodec/h264chroma.h|   1 +
 libavcodec/mips/Makefile   |   2 +
 libavcodec/mips/h264chroma_init_mips.c |  42 +++
 libavcodec/mips/h264chroma_mips.h  |  35 ++
 libavcodec/mips/h264chroma_mmi.c   | 582 +
 6 files changed, 664 insertions(+)

diff --git a/libavcodec/h264chroma.c b/libavcodec/h264chroma.c
index 5b3e13b..c2f1f30 100644
--- a/libavcodec/h264chroma.c
+++ b/libavcodec/h264chroma.c
@@ -54,4 +54,6 @@ av_cold void ff_h264chroma_init(H264ChromaContext *c, int 
bit_depth)
 ff_h264chroma_init_ppc(c, bit_depth);
 if (ARCH_X86)
 ff_h264chroma_init_x86(c, bit_depth);
+if (ARCH_MIPS)
+ff_h264chroma_init_mips(c, bit_depth);
 }
diff --git a/libavcodec/h264chroma.h b/libavcodec/h264chroma.h
index d4b8a0e..e0f45ad 100644
--- a/libavcodec/h264chroma.h
+++ b/libavcodec/h264chroma.h
@@ -34,5 +34,6 @@ void ff_h264chroma_init_aarch64(H264ChromaContext *c, int 
bit_depth);
 void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth);
 void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth);
 void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth);
 
 #endif /* AVCODEC_H264CHROMA_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 9786175..a460f10 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -20,7 +20,9 @@ MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)  += 
mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o
 OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
+OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
+LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/h264chroma_init_mips.c 
b/libavcodec/mips/h264chroma_init_mips.c
new file mode 100644
index 000..4c10da7
--- /dev/null
+++ b/libavcodec/mips/h264chroma_init_mips.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264chroma_mips.h"
+
+#if HAVE_LOONGSON3
+static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth)
+{
+int high_bit_depth = bit_depth > 8;
+
+if (!high_bit_depth) {
+c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_mmi;
+c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_mmi;
+c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmi;
+c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmi;
+}
+}
+#endif /* HAVE_LOONGSON3 */
+
+av_cold void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth)
+{
+#if HAVE_LOONGSON3
+h264chroma_init_mmi(c, bit_depth);
+#endif /* HAVE_LOONGSON3 */
+}
diff --git a/libavcodec/mips/h264chroma_mips.h 
b/libavcodec/mips/h264chroma_mips.h
new file mode 100644
index 000..314e8a3
--- /dev/null
+++ b/libavcodec/mips/h264chroma_mips.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser Gener

[FFmpeg-devel] [PATCH] avcodec: loongson3 optimized h264chroma put and avg with mmi

2015-05-31 Thread
>From 671813a26715d9d2c1b0ef9da9e4da458b709e50 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 17 Apr 2015 17:09:06 +0800
Subject: [PATCH] avcodec: loongson3 optimized h264chroma put and avg with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/h264chroma.c|   2 +
 libavcodec/h264chroma.h|   1 +
 libavcodec/mips/Makefile   |   2 +
 libavcodec/mips/h264chroma_init_mips.c |  42 +++
 libavcodec/mips/h264chroma_mips.h  |  35 ++
 libavcodec/mips/h264chroma_mmi.c   | 582 +
 6 files changed, 664 insertions(+)


diff --git a/libavcodec/h264chroma.c b/libavcodec/h264chroma.c
index 5b3e13b..c2f1f30 100644
--- a/libavcodec/h264chroma.c
+++ b/libavcodec/h264chroma.c
@@ -54,4 +54,6 @@ av_cold void ff_h264chroma_init(H264ChromaContext *c, int 
bit_depth)
 ff_h264chroma_init_ppc(c, bit_depth);
 if (ARCH_X86)
 ff_h264chroma_init_x86(c, bit_depth);
+if (ARCH_MIPS)
+ff_h264chroma_init_mips(c, bit_depth);
 }
diff --git a/libavcodec/h264chroma.h b/libavcodec/h264chroma.h
index d4b8a0e..e0f45ad 100644
--- a/libavcodec/h264chroma.h
+++ b/libavcodec/h264chroma.h
@@ -34,5 +34,6 @@ void ff_h264chroma_init_aarch64(H264ChromaContext *c, int 
bit_depth);
 void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth);
 void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth);
 void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth);
 
 #endif /* AVCODEC_H264CHROMA_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 9786175..a460f10 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -20,7 +20,9 @@ MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)  += 
mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o
 OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
+OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o\
  mips/hevc_mc_uni_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
+LOONGSON3-OBJS-$(CONFIG_H264CHROMA)   += mips/h264chroma_mmi.o
diff --git a/libavcodec/mips/h264chroma_init_mips.c 
b/libavcodec/mips/h264chroma_init_mips.c
new file mode 100644
index 000..588cc8a
--- /dev/null
+++ b/libavcodec/mips/h264chroma_init_mips.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264chroma_mips.h"
+
+#if HAVE_LOONGSON3
+static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth)
+{
+int high_bit_depth = bit_depth > 8;
+
+if (!high_bit_depth) {
+c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_mmi;
+c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_mmi;
+c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmi;
+c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmi;
+}
+}
+#endif /* HAVE_LOONGSON3 */
+
+av_cold void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth
+{
+#if HAVE_LOONGSON3
+h264chroma_init_mmi(c, bit_depth);
+#endif /* HAVE_LOONGSON3 */
+}
diff --git a/libavcodec/mips/h264chroma_mips.h 
b/libavcodec/mips/h264chroma_mips.h
new file mode 100644
index 000..314e8a3
--- /dev/null
+++ b/libavcodec/mips/h264chroma_mips.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You sho

Re: [FFmpeg-devel] [PATCH] avcodec: loongson3 optimized mpegvideo dct unquantize and denoise with mmi

2015-05-28 Thread
> > > On Wed, May 27, 2015 at 06:10:31PM +0800, 周晓勇 wrote:
> > > > From eb476eba51fffd30989917606a9b239f7c67ad26 Mon Sep 17 00:00:00 2001
> > > > From: ZhouXiaoyong 
> > > > Date: Thu, 21 May 2015 16:32:35 +0800
> > > > Subject: [PATCH] avcodec: loongson3 optimized mpegvideo dct unquantize 
> > > > and
> > > >  denoise with mmi
> > > > 
> > > > 
> > > > Signed-off-by: ZhouXiaoyong 
> > > > ---
> > > >  libavcodec/mips/Makefile  |   2 +
> > > >  libavcodec/mips/mpegvideo_init_mips.c |  40 
> > > >  libavcodec/mips/mpegvideo_mips.h  |  36 
> > > >  libavcodec/mips/mpegvideo_mmi.c   | 365 
> > > > ++
> > > >  libavcodec/mpegvideo.c|   2 +
> > > >  libavcodec/mpegvideo.h|   1 +
> > > >  6 files changed, 446 insertions(+)
> > > 
> > > this seems to break fate-mpeg4-resolution-change-up-down on loongson3
> > > 
> > > [...]
> > i have done the test, fate-mpeg4 passed all with no problem.
> > and also, all fate passed, not only the loongson3a but also loongson3b.
> > 
> > i wonder the break maybe caused by the os, the FC19 version on your machine 
> > is FC19-Build090-20150317.
> > here is the Build-010 download address: 
> > http://www.loongnix.org/dev/ftp/os/Fedora19/iso/Fedora19-Desktop-Loongson-Release-Build010-20150429.iso
> > 
> > i have the same problem, for example, make fate -j4 break down at 
> > vsynth_lena-h263p.
> > but do make fate-vsynth_lena-h263p again, i will pass.
> > i do the fate-test on loongson3b machine with same setting and evironment, 
> > it pass all correctly.
> > i doubt it's about the cpu's SIMD unit problem or the glib-math's, also 
> > maybe the multi-threads compiling.
> > so, you could remake the breakdown fate-mpeg4-resolution-change-up-down for 
> > a try.
> 
> trying
> make -j4 fate-vsynth_lena-mpeg4 fate-vsynth_lena-mpeg4-error 
> fate-vsynth_lena-mpeg4-nsse fate-vsynth_lena-mpeg4-qpel 
> fate-vsynth1-h263-obmc fate-vsynth1-mpeg4-adv fate-vsynth1-wmv2 
> fate-vsynth2-h263-obmc fate-vsynth2-rv10 fate-vsynth2-rv20 fate-lavf-ismv
> it succeeded 100% of the time before the patch while 50% of the
> time after the patch something random fails
> also -j1 fails after the patch occasionally
> 
> why do you think upgrading the OS would help ?
> i assume you have the latest and you said it happens for you too
> 
> [...]

why the test successed randomly? do you have encountered this situation?
why i suggest to update os is because i achieved more success probability on 
build-010.
by the way, all test successed 100% exactly on loongson3b machine.
so, if the random fail is not acceptable and you assure there's no relationship 
with the fate-test itself, i will abort this patch and rewrite.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: loongson3 optimized mpegvideo dct unquantize and denoise with mmi

2015-05-27 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年5月27日 星期三
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH] avcodec: loongson3 optimized mpegvideo dct 
> unquantize and denoise with mmi
> 
> On Wed, May 27, 2015 at 06:10:31PM +0800, 周晓勇 wrote:
> > From eb476eba51fffd30989917606a9b239f7c67ad26 Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Thu, 21 May 2015 16:32:35 +0800
> > Subject: [PATCH] avcodec: loongson3 optimized mpegvideo dct unquantize and
> >  denoise with mmi
> > 
> > 
> > Signed-off-by: ZhouXiaoyong 
> > ---
> >  libavcodec/mips/Makefile  |   2 +
> >  libavcodec/mips/mpegvideo_init_mips.c |  40 
> >  libavcodec/mips/mpegvideo_mips.h  |  36 
> >  libavcodec/mips/mpegvideo_mmi.c   | 365 
> > ++
> >  libavcodec/mpegvideo.c|   2 +
> >  libavcodec/mpegvideo.h|   1 +
> >  6 files changed, 446 insertions(+)
> 
> this seems to break fate-mpeg4-resolution-change-up-down on loongson3
> 
> [...]
i have done the test, fate-mpeg4 passed all with no problem.
and also, all fate passed, not only the loongson3a but also loongson3b.

i wonder the break maybe caused by the os, the FC19 version on your machine is 
FC19-Build090-20150317.
here is the Build-010 download address: 
http://www.loongnix.org/dev/ftp/os/Fedora19/iso/Fedora19-Desktop-Loongson-Release-Build010-20150429.iso

i have the same problem, for example, make fate -j4 break down at 
vsynth_lena-h263p.
but do make fate-vsynth_lena-h263p again, i will pass.
i do the fate-test on loongson3b machine with same setting and evironment, it 
pass all correctly.
i doubt it's about the cpu's SIMD unit problem or the glib-math's, also maybe 
the multi-threads compiling.
so, you could remake the breakdown fate-mpeg4-resolution-change-up-down for a 
try.

to update the os with these steps:
1.mkfs.ext4 the partition, and rename it FC19
2.mount iso to /mnt/ and mount the FC19 partition to /run/media/loongson/FC19/
3.sudo tar xf  /mnt/live/Fedora19-xxx.tar.gz -C /run/media/loongson/FC19/ 
--numeric-owner
4.copy the /boot/vmlinuz-3.10.0-2015.03.17.fc19.mipsel on new partition to 
/dev/sda1 's /boot/
5.modify the boot.cfg on /dev/sda1 to use the new kernel


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec: loongson3 optimized mpegvideo dct unquantize and denoise with mmi

2015-05-27 Thread
>From eb476eba51fffd30989917606a9b239f7c67ad26 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Thu, 21 May 2015 16:32:35 +0800
Subject: [PATCH] avcodec: loongson3 optimized mpegvideo dct unquantize and
 denoise with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile  |   2 +
 libavcodec/mips/mpegvideo_init_mips.c |  40 
 libavcodec/mips/mpegvideo_mips.h  |  36 
 libavcodec/mips/mpegvideo_mmi.c   | 365 ++
 libavcodec/mpegvideo.c|   2 +
 libavcodec/mpegvideo.h|   1 +
 6 files changed, 446 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 25813e7..4799757 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -20,6 +20,8 @@ MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)  += 
mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o
 OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
+OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
+LOONGSON3-OBJS-$(CONFIG_MPEGVIDEO)+= mips/mpegvideo_mmi.o
diff --git a/libavcodec/mips/mpegvideo_init_mips.c 
b/libavcodec/mips/mpegvideo_init_mips.c
new file mode 100644
index 000..f1fef7c
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_init_mips.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegvideo_mips.h"
+
+#if HAVE_LOONGSON3
+static av_cold void mpv_common_init_mmi(MpegEncContext *s)
+{
+s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_mmi;
+s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_mmi;
+s->dct_unquantize_mpeg1_intra = ff_dct_unquantize_mpeg1_intra_mmi;
+s->dct_unquantize_mpeg1_inter = ff_dct_unquantize_mpeg1_inter_mmi;
+
+s->denoise_dct = ff_denoise_dct_mmi;
+}
+#endif /* HAVE_LOONGSON3 */
+
+av_cold void ff_mpv_common_init_mips(MpegEncContext *s)
+{
+#if HAVE_LOONGSON3
+mpv_common_init_mmi(s);
+#endif /* HAVE_LOONGSON3 */
+}
diff --git a/libavcodec/mips/mpegvideo_mips.h b/libavcodec/mips/mpegvideo_mips.h
new file mode 100644
index 000..625c8c5
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mips.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef MPEGVIDEO_MIPS_H
+#define MPEGVIDEO_MIPS_H
+
+#include "libavcodec/mpegvideo.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block);
+
+#endif /* MPEGVIDEO_MIPS_H */
diff --git a/libavcodec/mips/mpegvideo_mmi.c b/libavcodec/mips/mpegvideo_mmi.c
new file mode 100644
index 000..9ae8ecd
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mmi.c
@@ -0,0 +1,365 @@
+/*
+ * Loongson SIMD optimized mpegvideo
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 

[FFmpeg-devel] avcodec: loongson3 optimized h264dsp weighted mc with mmi

2015-05-26 Thread
>From 4c82a8b130b31beb3811dbc22da4b0e8188fa737 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Wed, 13 May 2015 22:51:59 +0800
Subject: [PATCH] avcodec: loongson3 optimized h264dsp weighted mc with mmi


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile|   1 +
 libavcodec/mips/h264dsp_init_mips.c |  21 +++
 libavcodec/mips/h264dsp_mips.h  |  16 +++
 libavcodec/mips/h264dsp_mmi.c   | 278 
 4 files changed, 316 insertions(+)


diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index eaedd7f..25813e7 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -22,3 +22,4 @@ OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
+LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
diff --git a/libavcodec/mips/h264dsp_init_mips.c 
b/libavcodec/mips/h264dsp_init_mips.c
index 8d3d760..d9182f2 100644
--- a/libavcodec/mips/h264dsp_init_mips.c
+++ b/libavcodec/mips/h264dsp_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ * Copyright (c) 2015 Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -65,10 +66,30 @@ static av_cold void h264dsp_init_msa(H264DSPContext *c,
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_LOONGSON3
+static av_cold void h264dsp_init_mmi(H264DSPContext * c,
+ const int bit_depth,
+ const int chroma_format_idc)
+{
+if (bit_depth == 8) {
+c->weight_h264_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi;
+c->weight_h264_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi;
+c->weight_h264_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi;
+
+c->biweight_h264_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi;
+c->biweight_h264_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi;
+c->biweight_h264_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi;
+}
+}
+#endif /* HAVE_LOONGSON3 */
+
 av_cold void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
   const int chroma_format_idc)
 {
 #if HAVE_MSA
 h264dsp_init_msa(c, bit_depth, chroma_format_idc);
 #endif  // #if HAVE_MSA
+#if HAVE_LOONGSON3
+h264dsp_init_mmi(c, bit_depth, chroma_format_idc);
+#endif /* HAVE_LOONGSON3 */
 }
diff --git a/libavcodec/mips/h264dsp_mips.h b/libavcodec/mips/h264dsp_mips.h
index df9b0b2..319f6d3 100644
--- a/libavcodec/mips/h264dsp_mips.h
+++ b/libavcodec/mips/h264dsp_mips.h
@@ -68,4 +68,20 @@ void ff_weight_h264_pixels8_8_msa(uint8_t *src, int stride, 
int height,
 void ff_weight_h264_pixels4_8_msa(uint8_t *src, int stride, int height,
   int log2_denom, int weight, int offset);
 
+void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height,
+int log2_denom, int weight, int offset);
+void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
+int stride, int height, int log2_denom, int weightd, int weights,
+int offset);
+void ff_h264_weight_pixels8_8_mmi(uint8_t *block, int stride, int height,
+int log2_denom, int weight, int offset);
+void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src,
+int stride, int height, int log2_denom, int weightd, int weights,
+int offset);
+void ff_h264_weight_pixels4_8_mmi(uint8_t *block, int stride, int height,
+int log2_denom, int weight, int offset);
+void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
+int stride, int height, int log2_denom, int weightd, int weights,
+int offset);
+
 #endif  // #ifndef H264_DSP_MIPS_H
diff --git a/libavcodec/mips/h264dsp_mmi.c b/libavcodec/mips/h264dsp_mmi.c
new file mode 100644
index 000..641cd2f
--- /dev/null
+++ b/libavcodec/mips/h264dsp_mmi.c
@@ -0,0 +1,278 @@
+/*
+ * Loongson SIMD optimized h264dsp
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *Zhang Shuangshuang 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

Re: [FFmpeg-devel] [PATCH] avcodec: loongson3 optimized h264dsp weighted mc with mmi

2015-05-22 Thread
i am working on debuging the two patchs i send u before.
mpegvideo patch has two incorrect functions which cause failures in 
fate-vsynth1(2)-mpeg2(-422).
i will repatch the mpegvideo optimization fist after resolving the bugs.
thanks.


> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年5月23日 星期六
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH] avcodec: loongson3 optimized h264dsp weighted 
> mc with mmi
> 
> On Wed, May 13, 2015 at 03:18:04PM +0800, 周晓勇 wrote:
> > From 1d06af967f8578387fc84d4eb268d31ecba1353d Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Wed, 13 May 2015 22:51:59 +0800
> > Subject: [PATCH] avcodec: loongson3 optimized h264dsp weighted mc with mmi
> > 
> > Signed-off-by: ZhouXiaoyong 
> > ---
> >  libavcodec/mips/Makefile|   1 +
> >  libavcodec/mips/h264dsp_init_mips.c |  21 +++
> >  libavcodec/mips/h264dsp_mips.h  |  16 +++
> >  libavcodec/mips/h264dsp_mmi.c   | 278 
> > 
> >  4 files changed, 316 insertions(+)
> 
> this causes failures in fate:
> 
> Test h264-conformance-cvwp1_toshiba_e failed. Look at 
> tests/data/fate/h264-conformance-cvwp1_toshiba_e.err for details.
> make: *** [fate-h264-conformance-cvwp1_toshiba_e] Error 1
> make: *** Waiting for unfinished jobs
> --- ./tests/ref/fate/h264-conformance-cvwp2_toshiba_e   2015-05-06 
> 01:16:58.387200786 +0800
> +++ tests/data/fate/h264-conformance-cvwp2_toshiba_e2015-05-23 
> 00:37:23.548839281 +0800
> @@ -30,7 +30,7 @@
>  0, 28, 28,1,   152064, 0xe55213fd
>  0, 29, 29,1,   152064, 0xa701e59d
>  0, 30, 30,1,   152064, 0xad65e200
> -0, 31, 31,1,   152064, 0x4606dc82
> +0, 31, 31,1,   152064, 0x56b7df76
>  0, 32, 32,1,   152064, 0x582f9f64
>  0, 33, 33,1,   152064, 0xc630662a
>  0, 34, 34,1,   152064, 0xe54bacf5
> Test h264-conformance-cvwp2_toshiba_e failed. Look at 
> tests/data/fate/h264-conformance-cvwp2_toshiba_e.err for details.
> make: *** [fate-h264-conformance-cvwp2_toshiba_e] Error 1
> ...
> 
> [...]
> -- 
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> I do not agree with what you have to say, but I'll defend to the death your
> right to say it. -- Voltaire




___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/3] avcodec/mips: loongson fix bugs in mathops optimization

2015-05-21 Thread
the incorrect UMULH, ff_sqrt, MAC64 and MLS64 to be optimized later, delete 
them just for now.
---

>From 5a809d2ff4cbe809143456dac1305df391341cc1 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Thu, 21 May 2015 09:53:50 +0800
Subject: [PATCH] avcodec/mips: loongson remove incorrect mathops optimization

Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/mathops.h | 51 ---
 1 file changed, 51 deletions(-)

diff --git a/libavcodec/mips/mathops.h b/libavcodec/mips/mathops.h
index cdc7705..bb9dc83 100644
--- a/libavcodec/mips/mathops.h
+++ b/libavcodec/mips/mathops.h
@@ -43,19 +43,6 @@ static inline av_const int MULH(int a, int b)
 return c;
 }
 
-#define UMULH UMULH
-static inline av_const unsigned UMULH(unsigned a, unsigned b)
-{
-unsigned c;
-__asm__ ("dmultu %1, %2 \n\t"
- "mflo %0   \n\t"
- "dsrl %0, %0, 32   \n\t"
- : "=r"(c)
- : "r"(a),"r"(b)
- : "hi", "lo");
-return c;
-}
-
 #define mid_pred mid_pred
 static inline av_const int mid_pred(int a, int b, int c)
 {
@@ -73,44 +60,6 @@ static inline av_const int mid_pred(int a, int b, int c)
 return t;
 }
 
-#define ff_sqrt ff_sqrt
-static inline av_const unsigned int ff_sqrt(unsigned int a)
-{
-unsigned int b;
-
-__asm__ ("ctc1 %1, $f0  \n\t"
- "sqrt.s $f2, $f0   \n\t"
- "cvt.w.s $f0, $f2  \n\t"
- "cfc1 %0, $f0  \n\t"
- : "=r"(b)
- : "r"(a));
-return b;
-}
-
-static inline av_const int64_t MAC64(int64_t d, int a, int b)
-{
-int64_t m;
-__asm__ ("dmult %2, %3 \n\t"
- "mflo  %1 \n\t"
- "daddu %0, %0, %1 \n\t"
- : "+r"(d), "=&r"(m) : "r"(a), "r"(b)
- : "hi", "lo");
-return d;
-}
-#define MAC64(d, a, b) ((d) = MAC64(d, a, b))
-
-static inline av_const int64_t MLS64(int64_t d, int a, int b)
-{
-int64_t m;
-__asm__ ("dmult %2, %3 \n\t"
- "mflo  %1 \n\t"
- "dsubu %0, %0, %1 \n\t"
- : "+r"(d), "=&r"(m) : "r"(a), "r"(b)
- : "hi", "lo");
-return d;
-}
-#define MLS64(d, a, b) ((d) = MLS64(d, a, b))
-
 #endif /* HAVE_LOONGSON3 */
 
 #endif /* HAVE_INLINE_ASM */
-- 
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 3/3] configure: add cpuflags for loongson3 series cpu

2015-05-20 Thread
>From 24ac12a834ef25633e39705dd423a111b089be68 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Wed, 20 May 2015 13:16:55 +0800
Subject: [PATCH 3/3] configure: add cpuflags for loongson3 series cpu


Signed-off-by: ZhouXiaoyong 
---
 configure | 1 +
 1 file changed, 1 insertion(+)


diff --git a/configure b/configure
index 7152afc..a02fe4a 100755
--- a/configure
+++ b/configure
@@ -3937,6 +3937,7 @@ elif enabled mips; then
 enable fast_cmov
 enable fast_unaligned
 disable aligned_stack
+cpuflags="-march=loongson3a -mhard-float"
 ;;
 generic)
 disable mips32r5
-- 
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/3] avcodec/mips: loongson fix bugs in mathops optimization

2015-05-20 Thread
>From d0ad6c2fbb57fb16e5507149a7378022329820f8 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Tue, 19 May 2015 15:43:09 +0800
Subject: [PATCH 2/3] avcodec/mips: loongson fix bugs in mathops optimization


The mathops bugs impact passing fate test.


Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/mathops.h | 41 +++--
 1 file changed, 7 insertions(+), 34 deletions(-)


diff --git a/libavcodec/mips/mathops.h b/libavcodec/mips/mathops.h
index cdc7705..d6b0706 100644
--- a/libavcodec/mips/mathops.h
+++ b/libavcodec/mips/mathops.h
@@ -43,19 +43,6 @@ static inline av_const int MULH(int a, int b)
 return c;
 }
 
-#define UMULH UMULH
-static inline av_const unsigned UMULH(unsigned a, unsigned b)
-{
-unsigned c;
-__asm__ ("dmultu %1, %2 \n\t"
- "mflo %0   \n\t"
- "dsrl %0, %0, 32   \n\t"
- : "=r"(c)
- : "r"(a),"r"(b)
- : "hi", "lo");
-return c;
-}
-
 #define mid_pred mid_pred
 static inline av_const int mid_pred(int a, int b, int c)
 {
@@ -77,40 +64,26 @@ static inline av_const int mid_pred(int a, int b, int c)
 static inline av_const unsigned int ff_sqrt(unsigned int a)
 {
 unsigned int b;
-
-__asm__ ("ctc1 %1, $f0  \n\t"
- "sqrt.s $f2, $f0   \n\t"
- "cvt.w.s $f0, $f2  \n\t"
- "cfc1 %0, $f0  \n\t"
+__asm__ ("sqrt.s $f0, %1 \n\t"
+ "trunc.w.s $f0, $f0 \n\t"
+ "mfc1 %0, $f0   \n\t"
  : "=r"(b)
- : "r"(a));
+ : "f"((float)a));
 return b;
 }
 
 static inline av_const int64_t MAC64(int64_t d, int a, int b)
 {
 int64_t m;
-__asm__ ("dmult %2, %3 \n\t"
- "mflo  %1 \n\t"
- "daddu %0, %0, %1 \n\t"
+__asm__ ("dmult %2, %3  \n\t"
+ "mflo  %1  \n\t"
+ "dadd %0, %0, %1   \n\t"
  : "+r"(d), "=&r"(m) : "r"(a), "r"(b)
  : "hi", "lo");
 return d;
 }
 #define MAC64(d, a, b) ((d) = MAC64(d, a, b))
 
-static inline av_const int64_t MLS64(int64_t d, int a, int b)
-{
-int64_t m;
-__asm__ ("dmult %2, %3 \n\t"
- "mflo  %1 \n\t"
- "dsubu %0, %0, %1 \n\t"
- : "+r"(d), "=&r"(m) : "r"(a), "r"(b)
- : "hi", "lo");
-return d;
-}
-#define MLS64(d, a, b) ((d) = MLS64(d, a, b))
-
 #endif /* HAVE_LOONGSON3 */
 
 #endif /* HAVE_INLINE_ASM */
-- 
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/3] configure: disable mipsfpu for loongson3

2015-05-20 Thread
>From 2f81f4c3cfd726a939d9be9d30d67118649ddc19 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Mon, 18 May 2015 17:14:18 +0800
Subject: [PATCH 1/3] configure: disable mipsfpu for loongson3


MIPSFPU optimization does't support FATE correctly on Loongson-3.


Signed-off-by: ZhouXiaoyong 
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


diff --git a/configure b/configure
index 2eada0e..7152afc 100755
--- a/configure
+++ b/configure
@@ -3923,7 +3923,7 @@ elif enabled mips; then
 check_ldflags "-mabi=64"
 ;;
 loongson3*)
-enable mipsfpu
+disable mipsfpu
 disable mips32r2
 disable mips32r5
 disable mips64r6
-- 
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec: loongson fix optimized ff_sqrt bug

2015-05-14 Thread
>From a825fc8ad61d6296d12cc4074eda494e4b978fa3 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 15 May 2015 01:30:42 +0800
Subject: [PATCH] avcodec: loongson fix optimized ff_sqrt bug

Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/mathops.h | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/libavcodec/mips/mathops.h b/libavcodec/mips/mathops.h
index cdc7705..76a0f2d 100644
--- a/libavcodec/mips/mathops.h
+++ b/libavcodec/mips/mathops.h
@@ -76,15 +76,12 @@ static inline av_const int mid_pred(int a, int b, int c)
 #define ff_sqrt ff_sqrt
 static inline av_const unsigned int ff_sqrt(unsigned int a)
 {
-unsigned int b;
+float b;
+__asm__ ("sqrt.s %0, %1 \n\t"
+ : "=f"(b)
+ : "f"((float)a));
 
-__asm__ ("ctc1 %1, $f0  \n\t"
- "sqrt.s $f2, $f0   \n\t"
- "cvt.w.s $f0, $f2  \n\t"
- "cfc1 %0, $f0  \n\t"
- : "=r"(b)
- : "r"(a));
-return b;
+return (unsigned int)b;
 }
 
 static inline av_const int64_t MAC64(int64_t d, int a, int b)
--
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: loongson3 optimized mpegvideo dct with mmi

2015-05-13 Thread
sorry, the last same patch has error in libavcodec/mips/mpegvideo_mips.h
this one below is ok.
---

>From 3e1a538f6c2999e31e90161cf1d90b984c405676 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Wed, 13 May 2015 23:22:49 +0800
Subject: [PATCH] avcodec: loongson3 optimized mpegvideo dct with mmi

Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile  |   2 +
 libavcodec/mips/mpegvideo_init_mips.c |  45 +++
 libavcodec/mips/mpegvideo_mips.h  |  40 +++
 libavcodec/mips/mpegvideo_mmi.c   | 556 ++
 libavcodec/mpegvideo.c|   2 +
 libavcodec/mpegvideo.h|   1 +
 6 files changed, 646 insertions(+)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 25813e7..4799757 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -20,6 +20,8 @@ MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)  += 
mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o
 OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
+OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
+LOONGSON3-OBJS-$(CONFIG_MPEGVIDEO)+= mips/mpegvideo_mmi.o
diff --git a/libavcodec/mips/mpegvideo_init_mips.c 
b/libavcodec/mips/mpegvideo_init_mips.c
new file mode 100644
index 000..7083209
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_init_mips.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegvideo_mips.h"
+
+#if HAVE_LOONGSON3
+static av_cold void ff_mpv_common_init_mmi(MpegEncContext *s)
+{
+s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_mmi;
+s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_mmi;
+s->dct_unquantize_mpeg1_intra = ff_dct_unquantize_mpeg1_intra_mmi;
+s->dct_unquantize_mpeg1_inter = ff_dct_unquantize_mpeg1_inter_mmi;
+
+if (s->flags & CODEC_FLAG_BITEXACT) {
+s->dct_unquantize_mpeg2_intra = 
ff_dct_unquantize_mpeg2_intra_bitexact_mmi;
+}
+
+s->dct_unquantize_mpeg2_inter = ff_dct_unquantize_mpeg2_inter_mmi;
+s->denoise_dct = ff_denoise_dct_mmi;
+}
+#endif /* HAVE_LOONGSON3 */
+
+av_cold void ff_mpv_common_init_mips(MpegEncContext *s)
+{
+#if HAVE_LOONGSON3
+ff_mpv_common_init_mmi(s);
+#endif /* HAVE_LOONGSON3 */
+}
diff --git a/libavcodec/mips/mpegvideo_mips.h b/libavcodec/mips/mpegvideo_mips.h
new file mode 100644
index 000..8e56eab
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mips.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef MPEGVIDEO_MIPS_H
+#define MPEGVIDEO_MIPS_H
+
+#include "libavcodec/mpegvideo.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg2_intra_bitexact_mmi(MpegEncContext *s,
+int16_t *block, int n, int qscale);
+void ff_dct_unqua

[FFmpeg-devel] [PATCH] avcodec: loongson3 optimized mpegvideo dct with mmi

2015-05-13 Thread
>From 3e0b5c6904593e3cf3599cef579927e92cf1a2fa Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Wed, 13 May 2015 23:22:49 +0800
Subject: [PATCH] avcodec: loongson3 optimized mpegvideo dct with mmi

Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile  |   2 +
 libavcodec/mips/mpegvideo_init_mips.c |  45 +++
 libavcodec/mips/mpegvideo_mips.h  |  40 +++
 libavcodec/mips/mpegvideo_mmi.c   | 556 ++
 libavcodec/mpegvideo.c|   2 +
 libavcodec/mpegvideo.h|   1 +
 6 files changed, 646 insertions(+)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 25813e7..4799757 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -20,6 +20,8 @@ MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)  += 
mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)+= mips/iirfilter_mips.o
 OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
+OBJS-$(CONFIG_MPEGVIDEO)  += mips/mpegvideo_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
+LOONGSON3-OBJS-$(CONFIG_MPEGVIDEO)+= mips/mpegvideo_mmi.o
diff --git a/libavcodec/mips/mpegvideo_init_mips.c 
b/libavcodec/mips/mpegvideo_init_mips.c
new file mode 100644
index 000..7083209
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_init_mips.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegvideo_mips.h"
+
+#if HAVE_LOONGSON3
+static av_cold void ff_mpv_common_init_mmi(MpegEncContext *s)
+{
+s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_mmi;
+s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_mmi;
+s->dct_unquantize_mpeg1_intra = ff_dct_unquantize_mpeg1_intra_mmi;
+s->dct_unquantize_mpeg1_inter = ff_dct_unquantize_mpeg1_inter_mmi;
+
+if (s->flags & CODEC_FLAG_BITEXACT) {
+s->dct_unquantize_mpeg2_intra = 
ff_dct_unquantize_mpeg2_intra_bitexact_mmi;
+}
+
+s->dct_unquantize_mpeg2_inter = ff_dct_unquantize_mpeg2_inter_mmi;
+s->denoise_dct = ff_denoise_dct_mmi;
+}
+#endif /* HAVE_LOONGSON3 */
+
+av_cold void ff_mpv_common_init_mips(MpegEncContext *s)
+{
+#if HAVE_LOONGSON3
+ff_mpv_common_init_mmi(s);
+#endif /* HAVE_LOONGSON3 */
+}
diff --git a/libavcodec/mips/mpegvideo_mips.h b/libavcodec/mips/mpegvideo_mips.h
new file mode 100644
index 000..d077e11
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mips.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef H264_DSP_MIPS_H
+#define H264_DSP_MIPS_H
+
+#include "libavcodec/mpegvideo.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_dct_unquantize_mpeg2_intra_bitexact_mmi(MpegEncContext *s,
+int16_t *block, int n, int qscale);
+void ff_dct_unquantize_mpeg2_inter_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale);
+void ff_denoise

[FFmpeg-devel] [PATCH] avcodec: loongson3 optimized h264dsp weighted mc with mmi

2015-05-13 Thread
>From 1d06af967f8578387fc84d4eb268d31ecba1353d Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Wed, 13 May 2015 22:51:59 +0800
Subject: [PATCH] avcodec: loongson3 optimized h264dsp weighted mc with mmi

Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile|   1 +
 libavcodec/mips/h264dsp_init_mips.c |  21 +++
 libavcodec/mips/h264dsp_mips.h  |  16 +++
 libavcodec/mips/h264dsp_mmi.c   | 278 
 4 files changed, 316 insertions(+)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index eaedd7f..25813e7 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -22,3 +22,4 @@ OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
+LOONGSON3-OBJS-$(CONFIG_H264DSP)  += mips/h264dsp_mmi.o
diff --git a/libavcodec/mips/h264dsp_init_mips.c 
b/libavcodec/mips/h264dsp_init_mips.c
index 8d3d760..d9182f2 100644
--- a/libavcodec/mips/h264dsp_init_mips.c
+++ b/libavcodec/mips/h264dsp_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Parag Salasakar (parag.salasa...@imgtec.com)
+ * Copyright (c) 2015 Zhou Xiaoyong 
  *
  * This file is part of FFmpeg.
  *
@@ -65,10 +66,30 @@ static av_cold void h264dsp_init_msa(H264DSPContext *c,
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_LOONGSON3
+static av_cold void h264dsp_init_mmi(H264DSPContext * c,
+ const int bit_depth,
+ const int chroma_format_idc)
+{
+if (bit_depth == 8) {
+c->weight_h264_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi;
+c->weight_h264_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi;
+c->weight_h264_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi;
+
+c->biweight_h264_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi;
+c->biweight_h264_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi;
+c->biweight_h264_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi;
+}
+}
+#endif /* HAVE_LOONGSON3 */
+
 av_cold void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
   const int chroma_format_idc)
 {
 #if HAVE_MSA
 h264dsp_init_msa(c, bit_depth, chroma_format_idc);
 #endif  // #if HAVE_MSA
+#if HAVE_LOONGSON3
+h264dsp_init_mmi(c, bit_depth, chroma_format_idc);
+#endif /* HAVE_LOONGSON3 */
 }
diff --git a/libavcodec/mips/h264dsp_mips.h b/libavcodec/mips/h264dsp_mips.h
index df9b0b2..319f6d3 100644
--- a/libavcodec/mips/h264dsp_mips.h
+++ b/libavcodec/mips/h264dsp_mips.h
@@ -68,4 +68,20 @@ void ff_weight_h264_pixels8_8_msa(uint8_t *src, int stride, 
int height,
 void ff_weight_h264_pixels4_8_msa(uint8_t *src, int stride, int height,
   int log2_denom, int weight, int offset);
 
+void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height,
+int log2_denom, int weight, int offset);
+void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
+int stride, int height, int log2_denom, int weightd, int weights,
+int offset);
+void ff_h264_weight_pixels8_8_mmi(uint8_t *block, int stride, int height,
+int log2_denom, int weight, int offset);
+void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src,
+int stride, int height, int log2_denom, int weightd, int weights,
+int offset);
+void ff_h264_weight_pixels4_8_mmi(uint8_t *block, int stride, int height,
+int log2_denom, int weight, int offset);
+void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
+int stride, int height, int log2_denom, int weightd, int weights,
+int offset);
+
 #endif  // #ifndef H264_DSP_MIPS_H
diff --git a/libavcodec/mips/h264dsp_mmi.c b/libavcodec/mips/h264dsp_mmi.c
new file mode 100644
index 000..5776b88
--- /dev/null
+++ b/libavcodec/mips/h264dsp_mmi.c
@@ -0,0 +1,278 @@
+/*
+ * Loongson SIMD optimized h264dsp
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *Zhang Shuangshuang 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 0

[FFmpeg-devel] [PATCH] configure: disabled -mips64 option for loongson and remove redundant cpuflags

2015-05-12 Thread
>From 159cc99c2dc1cb4b68b48787cc53002cc7993c14 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Wed, 13 May 2015 12:33:30 +0800
Subject: [PATCH] configure: disabled -mips64 option for loongson and remove
 redundant cpuflags

1.Option -march=loongson3a conflicts with -mips64 or -mips64r2.
2.Option -mhard-float has been added.
---
 configure | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index 3e304b0..2eada0e 100755
--- a/configure
+++ b/configure
@@ -3937,7 +3937,6 @@ elif enabled mips; then
 enable fast_cmov
 enable fast_unaligned
 disable aligned_stack
-cpuflags="-march=$cpu"
 ;;
 generic)
 disable mips32r5
@@ -4691,7 +4690,7 @@ elif enabled mips; then
 if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
 add_cflags "-mips64r2"
 add_asflags "-mips64r2"
-elif enabled mips64 && enabled mipsfpu; then
+elif enabled mips64 && enabled mipsfpu && disabled loongson3; then
 add_cflags "-mips64"
 add_asflags "-mips64"
 elif enabled mipsdspr1 || enabled mipsdspr2; then
@@ -4716,8 +4715,7 @@ elif enabled mips; then
  check_inline_asm mipsfpu   '"madd.d $f0, $f2, $f4, $f6"'
 enabled msa   && check_cflags "-mmsa" && check_ldflags "-mmsa" &&
  check_inline_asm msa   '"addvi.b $w0, $w1, 1"'
-enabled loongson3 && add_cflags "-mhard-float" && add_asflags 
"-mhard-float" &&
-check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"'
+enabled loongson3 && check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"'
 
 enabled mips32r5 && add_asflags "-mips32r5 -mfp64"
 enabled mips64r6 && add_asflags "-mips64r6 -mfp64"
--
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/2] configure: disabled -mips64 option for loongson and remove redundant flags

2015-05-12 Thread
@@ -3937,7 +3937,6 @@ elif enabled mips; then
 enable fast_cmov
 enable fast_unaligned
 disable aligned_stack
-cpuflags="-march=$cpu"
 ;;
 generic)
 disable mips32r5
@@ -4716,8 +4715,6 @@ elif enabled mips; then
  check_inline_asm mipsfpu   '"madd.d $f0, $f2, $f4, $f6"'
 enabled msa   && check_cflags "-mmsa" && check_ldflags "-mmsa" &&
  check_inline_asm msa   '"addvi.b $w0, $w1, 1"'
-enabled loongson3 && add_cflags "-mhard-float" && add_asflags 
"-mhard-float" &&
-check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"'
 
 enabled mips32r5 && add_asflags "-mips32r5 -mfp64"
 enabled mips64r6 && add_asflags "-mips64r6 -mfp64"

they are redundant, so removed.

@@ -4691,7 +4690,7 @@ elif enabled mips; then
 if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
 add_cflags "-mips64r2"
 add_asflags "-mips64r2"
-elif enabled mips64 && enabled mipsfpu; then
+elif enabled mips64 && enabled mipsfpu && disabled loongson3; then
 add_cflags "-mips64"
 add_asflags "-mips64"
 elif enabled mipsdspr1 || enabled mipsdspr2; then

on 64bit OS(run the configure on Loongson CentOS6.4) the mips64 will be 
enabled, so if not added disabled loongson3 the flags -mips64 will be added.
-mips64 conflicts with -march=loongson3a.

do you need our cross compiler gcc-4.8.3-d197-o32-loongson to verify?
my test has passed.

./configure --enable-gpl --enable-pthreads --samples=/home/coffeez/fate/ 
--enable-nonfree --enable-version3 --assert-level=2 --target-os=linux 
--cross-prefix='/home/xuchenghua/toolchain/cross-tools/gcc-4.8.3-d197-o32-loongson/usr/bin/'
 --cc='ccache mips64el-redhat-linux-gcc-4.8.3' --as='mips64el-redhat-linux-as' 
--ranlib='mips64el-redhat-linux-ranlib' --cxx='mips64el-redhat-linux-g++' 
--ar='mips64el-redhat-linux-ar' --nm='mips64el-redhat-linux-nm' --arch=mips 
--cpu=loongson3a --enable-doc --disable-stripping

install prefix/usr/local
source path   .
C compilerccache mips64el-redhat-linux-gcc-4.8.3
C library glibc
host C compiler   gcc
host C libraryglibc
ARCH  mips (loongson3a)
big-endianno
runtime cpu detection yes
MIPS FPU enabled  yes
MIPS32R5 enabled  no
MIPS64R6 enabled  no
MIPS DSP R1 enabled   no
MIPS DSP R2 enabled   no
MIPS MSA enabled  no
LOONGSON3 enabled yes
debug symbols yes
strip symbols no
optimize for size no
optimizations yes
staticyes
sharedno
postprocessing supportyes
new filter supportyes
network support   yes
threading support pthreads
safe bitstream reader yes
SDL support   no
opencl enabledno
texi2html enabled no
perl enabled  yes
pod2man enabled   yes
makeinfo enabled  yes
makeinfo supports HTMLyes

External libraries:
iconv

Enabled decoders:
aac binkh263i
aac_latmbinkaudio_dct   h263p
aascbinkaudio_rdft  h264
ac3 bintext hevc
ac3_fixed   bmp hnm4_video
adpcm_4xm   bmv_audio   hq_hqa
adpcm_adx   bmv_video   hqx
adpcm_afc   brender_pix huffyuv
adpcm_ctc93 iac
adpcm_dtk   cavsidcin
adpcm_eaccaptionidf
adpcm_ea_maxis_xa   cdgraphics  iff_byterun1
adpcm_ea_r1 cdxliff_ilbm
adpcm_ea_r2 cinepak imc
adpcm_ea_r3 cljrindeo2
adpcm_ea_xascllcindeo3
adpcm_g722  comfortnoiseindeo4
adpcm_g726  cookindeo5
adpcm_g726lecpiainterplay_dpcm
adpcm_ima_amv   cscdinterplay_video
adpcm_ima_apc   cyuvjacosub
adpcm_ima_dk3   dca jpeg2000
adpcm_ima_dk4   dfa jpegls
adpcm_ima_ea_eacs   dirac   jv
adpcm_ima_ea_sead   dnxhd   kgv1
adpcm_ima_iss   dpx kmvc
adpcm_ima_oki   dsd_lsbflagarith
adpcm_ima_qtdsd_lsbf_planar loco
adpcm_ima_rad   dsd_msbfmace3
adpcm_ima_smjpegdsd_msbf_planar mace6
adpcm_ima_wav   dsicinaudio mdec
adpcm_ima_wsdsicinvideo metasound
adpcm_msdss_sp  microdvd
adpcm_sbpro_2   dvbsub  mimi

Re: [FFmpeg-devel] [PATCH 2/2] avcodec: optimize mpegvideo decoder for Loongson-3 v1

2015-05-12 Thread



> -原始邮件-
> 发件人: "Carl Eugen Hoyos" 
> 发送时间: 2015年5月12日 星期二
> 收件人: ffmpeg-devel@ffmpeg.org
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 2/2] avcodec: optimize mpegvideo decoder for 
> Loongson-3 v1
> 
> 周晓勇  loongson.cn> writes:
> 
> > > why are the optimized functions and the code that 
> > > sets them in the context in 2 seperate files ?
> > > this seems unneccesarily complicated
> > > 
> > 
> > i case mipstec optimize the same functions of 
> > mpegvideodec using msa they could use filename
> > mpegviedo_mips.c and write init functions into 
> > mpegvideo_init.c
> 
> Separating init code and asm code is necessary if 
> one is compiled with a C compiler, the other with 
> yasm / nasm. In your case, putting asm and init 
> code in one file makes everything easier.
> 
ok, i will repatch it.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/2] configure: disabled -mips64 option for loongson and remove redundant flags

2015-05-12 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年5月12日 星期二
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 1/2] configure: disabled -mips64 option for 
> loongson and remove redundant flags
> 
> On Tue, May 12, 2015 at 10:32:19AM +0800, 周晓勇 wrote:
> > 
> > 
> > 
> > > -原始邮件-
> > > 发件人: "Michael Niedermayer" 
> > > 发送时间: 2015年5月11日 星期一
> > > 收件人: "FFmpeg development discussions and patches" 
> > > 
> > > 抄送: 
> > > 主题: Re: [FFmpeg-devel] [PATCH 1/2] configure: disabled -mips64 option for 
> > > loongson and remove redundant flags
> > > 
> > > On Mon, May 11, 2015 at 12:38:46PM +0800, 周晓勇 wrote:
> > > > From 9837274dcee5e1900160901cf28d883246a7ecfb Mon Sep 17 00:00:00 2001
> > > > From: Zhou Xiaoyong 
> > > > Date: Mon, 11 May 2015 03:28:52 -0400
> > > > Subject: [PATCH 1/2] configure: disabled -mips64 option for loongson 
> > > > and remove redundant flags
> > > > 
> > > > 1.Option -march=loongson3a conflicts with -mips64 or -mips64r2.
> > > > 2.Option -mhard-float has been added.
> > > > 
> > > > Signed-off-by: Zhou Xiaoyong 
> > > > ---
> > > >  configure |5 +
> > > >  1 files changed, 1 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/configure b/configure
> > > > index 1b41367..7716e66 100755
> > > > --- a/configure
> > > > +++ b/configure
> > > > @@ -3937,7 +3937,6 @@ elif enabled mips; then
> > > >  enable fast_cmov
> > > >  enable fast_unaligned
> > > >  disable aligned_stack
> > > > -cpuflags="-march=$cpu"
> > > >  ;;
> > > >  generic)
> > > >  disable mips32r5
> > > > @@ -4691,7 +4690,7 @@ elif enabled mips; then
> > > >  if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
> > > >  add_cflags "-mips64r2"
> > > >  add_asflags "-mips64r2"
> > > > -elif enabled mips64 && enabled mipsfpu; then
> > > > +elif enabled mips64 && enabled mipsfpu && disabled loongson3; then
> > > 
> > > this breaks build on mips:
> > > CC  libavcodec/ac3dec_fixed.o
> > > In file included from ffmpeg/libavcodec/ac3dec_fixed.c:169:
> > > ffmpeg/libavcodec/ac3dec.c: In function ‘ac3_decode_init’:
> > > ffmpeg/libavcodec/ac3dec.c:213: warning: ‘request_channels’ is deprecated 
> > > (declared at ffmpeg/libavcodec/avcodec.h:2038)
> > > ffmpeg/libavcodec/ac3dec.c:215: warning: ‘request_channels’ is deprecated 
> > > (declared at ffmpeg/libavcodec/avcodec.h:2038)
> > > /tmp/ccgKabta.s: Assembler messages:
> > > /tmp/ccgKabta.s:5229: Error: opcode not supported on this processor: 
> > > mips32r2 (mips32r2) `dmult $25,$24'
> > > /tmp/ccgKabta.s:5231: Error: opcode not supported on this processor: 
> > > mips32r2 (mips32r2) `dsrl $25,$25,32'
> > > 
> > > [...]
> > > 
> > 
> > weird, i have no problem with this patch. all pass except some fate.
> > ./configure --enable-gpl --cc='ccache gcc' --enable-pthreads 
> > --samples=/home/loongson/fate/ --enable-nonfree --enable-version3 
> > --assert-level=2 --cpu=loongson3a
> 
> above was cross build for non loongson MIPS
> with mips-linux-gnu-gcc-4.4 (Debian 4.4.5-8) 4.4.5
> it works before the patch but not afterwards
> 
> 
> after the patch
> HAVE_LOONGSON3=yes
> 
> [...]

mips-linux-gnu-gcc-4.4 is not compatible with loongson OS FC-19, because the 
FC-19's API is O32. you could not use odd register of loongson cpu in O32 mode.
i do the test of cross-build in my x86 using our cross-compiler 
gcc-4.8.3-d197-o32-loongson with configure below:

./configure --enable-gpl --enable-pthreads --samples=/home/coffeez/fate/ 
--enable-nonfree --enable-version3 --assert-level=2 --enable-cross-compile 
--target-os=linux 
--cc=/home/xuchenghua/toolchain/cross-tools/gcc-4.8.3-d197-o32-loongson/usr/bin/mips64el-redhat-linux-gcc
 --host-cc=/usr/bin/gcc 
--host-ld=/home/xuchenghua/toolchain/cross-tools/gcc-4.8.3-d197-o32-loongson/usr/bin/mips64el-redhat-linux-ld
 --arch=mips --cpu=loongson3a --disable-doc

it pass all only at the end report ffserver error.

why disable doc?
it reported ld doc/print_option.o error.

what configure do you use when cross build?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/2] configure: disabled -mips64 option for loongson and remove redundant flags

2015-05-11 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年5月11日 星期一
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 1/2] configure: disabled -mips64 option for 
> loongson and remove redundant flags
> 
> On Mon, May 11, 2015 at 12:38:46PM +0800, 周晓勇 wrote:
> > From 9837274dcee5e1900160901cf28d883246a7ecfb Mon Sep 17 00:00:00 2001
> > From: Zhou Xiaoyong 
> > Date: Mon, 11 May 2015 03:28:52 -0400
> > Subject: [PATCH 1/2] configure: disabled -mips64 option for loongson and 
> > remove redundant flags
> > 
> > 1.Option -march=loongson3a conflicts with -mips64 or -mips64r2.
> > 2.Option -mhard-float has been added.
> > 
> > Signed-off-by: Zhou Xiaoyong 
> > ---
> >  configure |5 +
> >  1 files changed, 1 insertions(+), 4 deletions(-)
> > 
> > diff --git a/configure b/configure
> > index 1b41367..7716e66 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3937,7 +3937,6 @@ elif enabled mips; then
> >  enable fast_cmov
> >  enable fast_unaligned
> >  disable aligned_stack
> > -cpuflags="-march=$cpu"
> >  ;;
> >  generic)
> >  disable mips32r5
> > @@ -4691,7 +4690,7 @@ elif enabled mips; then
> >  if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
> >  add_cflags "-mips64r2"
> >  add_asflags "-mips64r2"
> > -elif enabled mips64 && enabled mipsfpu; then
> > +elif enabled mips64 && enabled mipsfpu && disabled loongson3; then
> 
> this breaks build on mips:
> CC  libavcodec/ac3dec_fixed.o
> In file included from ffmpeg/libavcodec/ac3dec_fixed.c:169:
> ffmpeg/libavcodec/ac3dec.c: In function ‘ac3_decode_init’:
> ffmpeg/libavcodec/ac3dec.c:213: warning: ‘request_channels’ is deprecated 
> (declared at ffmpeg/libavcodec/avcodec.h:2038)
> ffmpeg/libavcodec/ac3dec.c:215: warning: ‘request_channels’ is deprecated 
> (declared at ffmpeg/libavcodec/avcodec.h:2038)
> /tmp/ccgKabta.s: Assembler messages:
> /tmp/ccgKabta.s:5229: Error: opcode not supported on this processor: mips32r2 
> (mips32r2) `dmult $25,$24'
> /tmp/ccgKabta.s:5231: Error: opcode not supported on this processor: mips32r2 
> (mips32r2) `dsrl $25,$25,32'
> 
> [...]
> 

weird, i have no problem with this patch. all pass except some fate.
./configure --enable-gpl --cc='ccache gcc' --enable-pthreads 
--samples=/home/loongson/fate/ --enable-nonfree --enable-version3 
--assert-level=2 --cpu=loongson3a

1.when testing on FC19-O32 system, config.mak list as:

CC_IDENT=gcc 4.8.3 (GCC) 20140624 (Red Hat 4.8.3-1)
ARCH=mips
CPPFLAGS= -D_ISOC99_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE 
-D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600 -DPIC -DZLIB_CONST
CFLAGS=   -march=loongson3a -std=c99 -fomit-frame-pointer -fPIC -mhard-float 
-pthread -D_GNU_SOURCE=1 -D_REENTRANT -I/usr/include/SDL  -g 
-Wdeclaration-after-statement -Wall -Wdisabled-optimization -Wpointer-arith 
-Wredundant-decls -Wwrite-strings -Wtype-limits -Wundef -Wmissing-prototypes 
-Wno-pointer-to-int-cast -Wstrict-prototypes -Wempty-body -Wno-parentheses 
-Wno-switch -Wno-format-zero-length -Wno-pointer-sign -O3 -fno-math-errno 
-fno-signed-zeros -fno-tree-vectorize -Werror=format-security 
-Werror=implicit-function-declaration -Werror=missing-prototypes 
-Werror=return-type -Werror=vla -Wformat -fdiagnostics-color=auto 
-Wno-maybe-uninitialized
CXXFLAGS=  -D__STDC_CONSTANT_MACROS
ASFLAGS=   -march=loongson3a -fPIC -mhard-float -g
ARCH_MIPS=yes
!ARCH_MIPS64=yes
HAVE_MIPSFPU=yes
!HAVE_MIPS32R2=yes
!HAVE_MIPS32R5=yes
!HAVE_MIPS64R6=yes
!HAVE_MIPSDSPR1=yes
!HAVE_MIPSDSPR2=yes
!HAVE_MSA=yes
HAVE_LOONGSON3=yes
!HAVE_ALIGNED_STACK=yes
HAVE_FAST_64BIT=yes
HAVE_FAST_CLZ=yes
HAVE_FAST_CMOV=yes
HAVE_LOCAL_ALIGNED_8=yes
HAVE_LOCAL_ALIGNED_16=yes
!HAVE_LOCAL_ALIGNED_32=yes
HAVE_SIMD_ALIGN_16=yes

2.when testing on CentOS-N64 system, config.mak list as:

CC_IDENT=gcc 4.4.7 (GCC) 20120313 (Red Hat 4.4.7-3.1)
ARCH=mips
CPPFLAGS= -D_ISOC99_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE 
-D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600 -DPIC -DZLIB_CONST
CFLAGS=   -march=loongson3a -std=c99 -fomit-frame-pointer -fPIC -mhard-float 
-pthread  -g -Wdeclaration-after-statement -Wall -Wdisabled-optimization 
-Wpointer-arith -Wredundant-decls -Wwrite-strings -Wtype-limits -Wundef 
-Wmissing-prototypes -Wno-pointer-to-int-cast -Wstrict-prototypes -Wempty-body 
-Wno-parentheses -Wno-switch -Wno-format-zero-length -Wno-pointer-sign -O3 
-fno-math-errno -fno-signed-zeros -fno-tree-vectorize -Werror=format-security 
-Werror=implicit-function-declaration -Werror=missing-protot

Re: [FFmpeg-devel] [PATCH 2/2] avcodec: optimize mpegvideo decoder for Loongson-3 v1

2015-05-11 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年5月11日 星期一
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: 
> 主题: Re: [FFmpeg-devel] [PATCH 2/2] avcodec: optimize mpegvideo decoder for 
> Loongson-3 v1
> 
> On Mon, May 11, 2015 at 12:41:51PM +0800, 周晓勇 wrote:
> > From 3d5a9d0d38c96d6b6ec51b082102f3a231b5b881 Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Mon, 11 May 2015 09:27:00 +0800
> > Subject: [PATCH 2/2] avcodec: optimize mpegvideo decoder for Loongson-3 v1
> > 
> > Signed-off-by: ZhouXiaoyong 
> > ---
> >  libavcodec/mips/Makefile |4 +
> 
> >  libavcodec/mips/mpegvideo_init.c |   44 +++
> >  libavcodec/mips/mpegvideo_loongson.c |  563 
> > ++
> 
> why are the optimized functions and the code that sets them in the
> context in 2 seperate files ?
> this seems unneccesarily complicated
> 

i case mipstec optimize the same functions of mpegvideodec using msa they could 
use filename mpegviedo_mips.c and write init functions into mpegvideo_init.c


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] avcodec: optimize mpegvideo decoder for Loongson-3 v1

2015-05-10 Thread
>From 3d5a9d0d38c96d6b6ec51b082102f3a231b5b881 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Mon, 11 May 2015 09:27:00 +0800
Subject: [PATCH 2/2] avcodec: optimize mpegvideo decoder for Loongson-3 v1

Signed-off-by: ZhouXiaoyong 
---
 libavcodec/mips/Makefile |4 +
 libavcodec/mips/mpegvideo_init.c |   44 +++
 libavcodec/mips/mpegvideo_loongson.c |  563 ++
 libavcodec/mips/mpegvideo_loongson.h |   40 +++
 libavcodec/mpegvideo.c   |2 +
 libavcodec/mpegvideo.h   |1 +
 6 files changed, 654 insertions(+), 0 deletions(-)

diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index eaedd7f..c9b3294 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -22,3 +22,7 @@ OBJS-$(CONFIG_HEVC_DECODER)   += 
mips/hevcdsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)   += mips/hevcdsp_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)+= mips/h264dsp_msa.o
+
+#Loongson-3 SIMD Optimization
+LOONGSON3-OBJS-$(CONFIG_MPEGVIDEO)+= mips/mpegvideo_init.o
+LOONGSON3-OBJS-$(CONFIG_MPEGVIDEO)+= mips/mpegvideo_loongson.o
diff --git a/libavcodec/mips/mpegvideo_init.c b/libavcodec/mips/mpegvideo_init.c
new file mode 100644
index 000..ba8c801
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_init.c
@@ -0,0 +1,44 @@
+/*
+ * Loongson optimized mpegvideo
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/mpegvideo.h"
+#include "mpegvideo_loongson.h"
+
+av_cold void ff_MPV_common_init_loongson(MpegEncContext *s)
+{
+s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_mmi;
+s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_mmi;
+s->dct_unquantize_mpeg1_intra = ff_dct_unquantize_mpeg1_intra_mmi;
+s->dct_unquantize_mpeg1_inter = ff_dct_unquantize_mpeg1_inter_mmi;
+
+if (s->flags & CODEC_FLAG_BITEXACT) {
+s->dct_unquantize_mpeg2_intra = 
ff_dct_unquantize_mpeg2_intra_bitexact_mmi;
+}
+
+s->dct_unquantize_mpeg2_inter = ff_dct_unquantize_mpeg2_inter_mmi;
+s->denoise_dct = ff_denoise_dct_mmi;
+}
diff --git a/libavcodec/mips/mpegvideo_loongson.c 
b/libavcodec/mips/mpegvideo_loongson.c
new file mode 100644
index 000..50a97f3
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_loongson.c
@@ -0,0 +1,563 @@
+/*
+ * Loongson optimized mpegvideo
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong 
+ *Zhang Shuangshuang 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegvideo_loongson.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+int n, int qscale)
+{
+int64_t level, qmul, qadd, nCoeffs;
+
+qmul = qscale << 1;
+assert(s->block_last_index[n]>=0 || s->h263_aic);
+
+if (!s->h263_aic) {
+if (n<4)
+level = block[0] * s->y_dc_scale;
+else
+level = block[0] * s->c_dc_scale;
+qadd = (qscale-1) | 1;
+} else {
+qadd = 0;
+level = block[0];
+}
+
+if(s->ac_pred)
+nCoeffs = 63;
+else
+nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
+
+__asm__ volatile (
+".set arch=loongson3a   \r\n"
+"xo

[FFmpeg-devel] [PATCH 1/2] configure: disabled -mips64 option for loongson and remove redundant flags

2015-05-10 Thread
>From 9837274dcee5e1900160901cf28d883246a7ecfb Mon Sep 17 00:00:00 2001
From: Zhou Xiaoyong 
Date: Mon, 11 May 2015 03:28:52 -0400
Subject: [PATCH 1/2] configure: disabled -mips64 option for loongson and remove 
redundant flags

1.Option -march=loongson3a conflicts with -mips64 or -mips64r2.
2.Option -mhard-float has been added.

Signed-off-by: Zhou Xiaoyong 
---
 configure |5 +
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index 1b41367..7716e66 100755
--- a/configure
+++ b/configure
@@ -3937,7 +3937,6 @@ elif enabled mips; then
 enable fast_cmov
 enable fast_unaligned
 disable aligned_stack
-cpuflags="-march=$cpu"
 ;;
 generic)
 disable mips32r5
@@ -4691,7 +4690,7 @@ elif enabled mips; then
 if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
 add_cflags "-mips64r2"
 add_asflags "-mips64r2"
-elif enabled mips64 && enabled mipsfpu; then
+elif enabled mips64 && enabled mipsfpu && disabled loongson3; then
 add_cflags "-mips64"
 add_asflags "-mips64"
 elif enabled mipsdspr1 || enabled mipsdspr2; then
@@ -4716,8 +4715,6 @@ elif enabled mips; then
  check_inline_asm mipsfpu   '"madd.d $f0, $f2, $f4, $f6"'
 enabled msa   && check_cflags "-mmsa" && check_ldflags "-mmsa" &&
  check_inline_asm msa   '"addvi.b $w0, $w1, 1"'
-enabled loongson3 && add_cflags "-mhard-float" && add_asflags 
"-mhard-float" &&
-check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"'
 
 enabled mips32r5 && add_asflags "-mips32r5 -mfp64"
 enabled mips64r6 && add_asflags "-mips64r6 -mfp64"
--
1.7.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/3] configure: remove loongson check inline asm and mips dependent

2015-05-09 Thread
> > From 6f76746ecd5b623e7610e293be2cbe02022dd22a Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Thu, 7 May 2015 11:50:42 +0800
> > Subject: [PATCH 2/3] configure: remove loongson check inline asm and mips
> >  dependent
> >
> > 1.Loongson now have two series CPU supported MMI(Multi-Media Instruct).
> >   Loongson-3 designed multi-core have good performance in decoding.
> >   Loongson-2's support is comming soon.
> > 2.Replaced loongson with loongson2 and loongson3.
> >
> > Signed-off-by: ZhouXiaoyong 
> >
> 
> How does the commit message justify this commit?
> 

i just introduce why flag loongson is replaced by loongson2 and loongson3 here, 
because the earlier patchs for configure file dose not clarify why loongson3 
added. for more info about loongson open source, visit www.loongson.cn and 
www.loongnix.org.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 3/3] configure: loongson enable subarch mips64

2015-05-09 Thread
> On Fri, May 08, 2015 at 06:17:59PM +0800, 周晓勇 wrote:
> > From 1094ca8983b34492dece7da5cd09125dbda04726 Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Fri, 8 May 2015 09:08:00 +0800
> > Subject: [PATCH 3/3] configure: loongson enable subarch mips64
> > 
> > 1.Fedora19(Loongson) has released 32bit version and 64bit will comming soon.
> 
> if its actually 32bit currently than maybe mips is more correct than
> mips64, i dont know
> 

you could check the FC19(build with mips3 instruction set) system on the 
Loongson-3A-780E machine. it only has /lib/ forlder, but the CentOS6.4-MultiLib 
has /lib/, /lib32/ and /lib64. /lib/ is O32 API, /lib32/ is N32 API and /lib64 
is N64 API. i think it's ok for ffmpeg to use MIPS64 instruction set on a 32bit 
OS, because our kernel has 64bit API and the ffmpeg-2.1.7-1.loongson installed 
in the FC19 works correctly. by the way, the system BACKUP on the machine is 
build with mips64r2 instruction set and it works faster than FC19.
how do you think?

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 3/3] configure: loongson enable subarch mips64

2015-05-08 Thread
>From 1094ca8983b34492dece7da5cd09125dbda04726 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 8 May 2015 09:08:00 +0800
Subject: [PATCH 3/3] configure: loongson enable subarch mips64

1.Fedora19(Loongson) has released 32bit version and 64bit will comming soon.
2.On 32bit OS check_64bit returns subarch as mips, but mips64 expected.
3.Loongson-3 compatible with mips64r2 instruction set, and added on with MMI.
4.-march=loongson3a conflict with -mips64 and -mips64r2.
5.Loongson-3A1000 and Loongson-3B1500 not support mipsdspr1 or mipsdapr2.
6.The new CPU 3A2000 support mipsdspr2.

Signed-off-by: ZhouXiaoyong 
---
 configure | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/configure b/configure
index 0f79874..4e6fddd 100755
--- a/configure
+++ b/configure
@@ -2028,7 +2028,7 @@ mips32r2_deps="mips"
 mips32r5_deps="mips"
 mips64r6_deps="mips"
 msa_deps="mips"
-loongson3_deps="mips"
+loongson3_deps="mips64"
 
 altivec_deps="ppc"
 dcbzl_deps="ppc"
@@ -3923,6 +3923,7 @@ elif enabled mips; then
 check_ldflags "-mabi=64"
 ;;
 loongson3*)
+subarch=mips64
 enable mipsfpu
 disable mips32r2
 disable mips32r5
@@ -3937,7 +3938,6 @@ elif enabled mips; then
 enable fast_cmov
 enable fast_unaligned
 disable aligned_stack
-cpuflags="-march=$cpu"
 ;;
 generic)
 disable mips32r5
@@ -4087,7 +4087,9 @@ case "$arch" in
 spic=$shared
 ;;
 mips)
-check_64bit mips mips64 '_MIPS_SIM > 1'
+if [[ ! "$cpu" =~ "loongson" ]]; then
+check_64bit mips mips64 '_MIPS_SIM > 1'
+fi
 spic=$shared
 ;;
 parisc)
@@ -4692,7 +4694,7 @@ elif enabled mips; then
 if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
 add_cflags "-mips64r2"
 add_asflags "-mips64r2"
-elif enabled mips64 && enabled mipsfpu; then
+elif enabled mips64 && enabled mipsfpu && !(enabled loongson3); then
 add_cflags "-mips64"
 add_asflags "-mips64"
 elif enabled mipsdspr1 || enabled mipsdspr2; then
@@ -4717,8 +4719,6 @@ elif enabled mips; then
  check_inline_asm mipsfpu   '"madd.d $f0, $f2, $f4, $f6"'
 enabled msa   && check_cflags "-mmsa" && check_ldflags "-mmsa" &&
  check_inline_asm msa   '"addvi.b $w0, $w1, 1"'
-enabled loongson3 && add_cflags "-mhard-float" && add_asflags 
"-mhard-float" &&
-check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"'
 
 enabled mips32r5 && add_asflags "-mips32r5 -mfp64"
 enabled mips64r6 && add_asflags "-mips64r6 -mfp64"
--
1.8.3.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/3] configure: remove loongson check inline asm and mips dependent

2015-05-08 Thread
>From 6f76746ecd5b623e7610e293be2cbe02022dd22a Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Thu, 7 May 2015 11:50:42 +0800
Subject: [PATCH 2/3] configure: remove loongson check inline asm and mips
 dependent

1.Loongson now have two series CPU supported MMI(Multi-Media Instruct).
  Loongson-3 designed multi-core have good performance in decoding.
  Loongson-2's support is comming soon.
2.Replaced loongson with loongson2 and loongson3.

Signed-off-by: ZhouXiaoyong 
---
 configure | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/configure b/configure
index 8bb6d9f..0f79874 100755
--- a/configure
+++ b/configure
@@ -2021,7 +2021,6 @@ setend_deps="arm"
 
 map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
 
-loongson_deps="mips"
 mipsfpu_deps="mips"
 mipsdspr1_deps="mips"
 mipsdspr2_deps="mips"
@@ -4689,8 +4688,6 @@ EOF
 
 elif enabled mips; then
 
-check_inline_asm loongson '"dmult.g $1, $2, $3"'
-
 # Enable minimum ISA based on selected options
 if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
 add_cflags "-mips64r2"
--
1.8.3.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/3] configure: replace arch loongson with arch extra list loongson

2015-05-08 Thread
>From 2836a9b83ab781920b9d048ef9ce2acf9061a2a7 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Thu, 7 May 2015 11:12:17 +0800
Subject: [PATCH 1/3] configure: replace arch loongson with arch extra list
 loongson

Signed-off-by: ZhouXiaoyong 
---
 configure | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index d3f23c8..8bb6d9f 100755
--- a/configure
+++ b/configure
@@ -1577,6 +1577,9 @@ ARCH_EXT_LIST_MIPS="
 mipsdspr1
 mipsdspr2
 msa
+"
+
+ARCH_EXT_LIST_LOONGSON="
 loongson3
 "
 
@@ -1617,7 +1620,7 @@ ARCH_EXT_LIST="
 $ARCH_EXT_LIST_PPC
 $ARCH_EXT_LIST_X86
 $ARCH_EXT_LIST_MIPS
-loongson
+$ARCH_EXT_LIST_LOONGSON
 "
 
 ARCH_FEATURES="
--
1.8.3.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] configure: replace arch loongson with arch extra list loongson

2015-05-06 Thread



> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年5月6日 星期三
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: gaoxiang , "孟小甫" 
> 主题: Re: [FFmpeg-devel] [PATCH] configure: replace arch loongson with arch 
> extra list loongson
> 
> On Wed, May 06, 2015 at 02:38:21PM +0800, 周晓勇 wrote:
> > From a5031b4c4b97f790a40603cff9a1f45cbb043005 Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Wed, 6 May 2015 14:05:21 +0800
> > Subject: [PATCH] configure: replace arch loongson with arch extra list 
> > loongson
> > 
> > fate pass when do configure without --cc='ccache gcc' option:
> > ./configure --enable-gpl --enable-pthreads --samples=/home/loongson/fate/
> >  --enable-nonfree --enable-version3 --assert-level=2 --cpu=loongson3a
> >  --enable-loongson3
> 
> with this ARCH_MIPS64 is disabled, is this intended ?
> 
ARCH_MIPS64 only be used in libavutil/mips/intereadwrite.h for AV_RN32. i mean 
to not disturb other MIPS64 machines, and Loongson's optimization maybe not 
compatible for other MIPS64 before tested. as i have no MIPS64 machine expect 
Loongson3 for testing.
In my personal git-devel branch, i have optimized the other funcs for 
Loongson-3, such as AV_WN32, AV_RN64, AV_WN64, AV_COPY32, AV_COPY64, AV_SWAP64, 
AV_ZERO32, AV_ZERO_64.
But, its boost gain little than anticipant. i will do more test to make sure 
the optimized intreadwrite boost truely, then send u the patch.

> why is "--enable-loongson3" needed when "--cpu=loongson3a" is already
> specified ?
> 
no need, i just add on to make sure the SIMD optimization enabled.

> and fate still fails
> time ./configure --enable-gpl --enable-pthreads 
> --samples=/home/loongson/fate/  --enable-nonfree --enable-version3 
> --assert-level=2 --cpu=loongson3a --enable-loongson3
> real4m48.779s
> user4m13.918s
> sys 0m40.020s
> 
> time make -j4
> real19m31.114s
> user57m52.785s
> sys 2m52.359s
> 
> make -j5 fate-vsynth1-rv10 fate-vsynth1-svq1 fate-amrwb-23k85 fate-dss-lp 
> fate-lavf-avi
> 
> --- ./tests/ref/fate/dss-lp 2015-05-06 01:16:58.238387245 +0800
> +++ tests/data/fate/dss-lp  2015-05-06 20:15:23.060689405 +0800
> @@ -1,31 +1,31 @@
>  #tb 0: 1/8000
> -0,  0,  0,  240,  480, 0xf1107658
> -0,240,240,  240,  480, 0x50dee179
> -0,480,480,  240,  480, 0x40090802
> -0,720,720,  240,  480, 0x3ef9f6ff
> -0,960,960,  240,  480, 0x5b7df231
> -0,   1200,   1200,  240,  480, 0xe266efd1
> -0,   1440,   1440,  240,  480, 0xfbe6e658
> -0,   1680,   1680,  240,  480, 0xde84f311
> -0,   1920,   1920,  240,  480, 0x5854ec2f
> -0,   2160,   2160,  240,  480, 0x4901cdea
> -0,   2400,   2400,  240,  480, 0x03f3e619
> -0,   2640,   2640,  240,  480, 0x47abfe87
> -0,   2880,   2880,  240,  480, 0x69dddf34
> -0,   3120,   3120,  240,  480, 0x1cfeee2c
> -0,   3360,   3360,  240,  480, 0x1860ef1c
> -0,   3600,   3600,  240,  480, 0x8f86e8ed
> -0,   3840,   3840,  240,  480, 0x307deaf8
> -0,   4080,   4080,  240,  480, 0xeca7eca0
> -0,   4320,   4320,  240,  480, 0x1835ee1c
> -0,   4560,   4560,  240,  480, 0x6676ed66
> -0,   4800,   4800,  240,  480, 0x49c2fd04
> -0,   5040,   5040,  240,  480, 0xc463db75
> -0,   5280,   5280,  240,  480, 0x1931ed7d
> -0,   5520,   5520,  240,  480, 0xc99ff886
> -0,   5760,   5760,  240,  480, 0xcd3ae8de
> -0,   6000,   6000,  240,  480, 0x2294ecfa
> -0,   6240,   6240,  240,  480, 0xcf5ef14b
> -0,   6480,   6480,  240,  480, 0x6325d4fe
> -0,   6720,   6720,  240,  480, 0x3790dcf2
> -0,   6960,   6960,  240,  480, 0x0fbee6c0
> +0,  0,  0,  240,  480, 0x4f3de452
> +0,240,240,  240,  480, 0x55d1f9da
> +0,480,480,  240,  480, 0xe887e1f6
> +0,720,720,  240,  480, 0xc353f768
> +0,960,960,  240,  480, 0x34adebcc
> +0,   1200,   1200,  240,  480, 0x7d67dfa2
> +0,   1440,   1440,  240,  480, 0xc7a4f1f4
> +0,   1680,   1680,  240,  480, 0x549cf083
> +0,   1920,   1920,  240,  480, 0x468dead7
> +0,   2160,   2160,  240,  480, 0x7e6af748
>

[FFmpeg-devel] [PATCH] configure: replace arch loongson with arch extra list loongson

2015-05-05 Thread
>From a5031b4c4b97f790a40603cff9a1f45cbb043005 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Wed, 6 May 2015 14:05:21 +0800
Subject: [PATCH] configure: replace arch loongson with arch extra list loongson

fate pass when do configure without --cc='ccache gcc' option:
./configure --enable-gpl --enable-pthreads --samples=/home/loongson/fate/
 --enable-nonfree --enable-version3 --assert-level=2 --cpu=loongson3a
 --enable-loongson3

Signed-off-by: ZhouXiaoyong 
---
 configure | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index d3f23c8..0f79874 100755
--- a/configure
+++ b/configure
@@ -1577,6 +1577,9 @@ ARCH_EXT_LIST_MIPS="
 mipsdspr1
 mipsdspr2
 msa
+"
+
+ARCH_EXT_LIST_LOONGSON="
 loongson3
 "
 
@@ -1617,7 +1620,7 @@ ARCH_EXT_LIST="
 $ARCH_EXT_LIST_PPC
 $ARCH_EXT_LIST_X86
 $ARCH_EXT_LIST_MIPS
-loongson
+$ARCH_EXT_LIST_LOONGSON
 "
 
 ARCH_FEATURES="
@@ -2018,7 +2021,6 @@ setend_deps="arm"
 
 map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
 
-loongson_deps="mips"
 mipsfpu_deps="mips"
 mipsdspr1_deps="mips"
 mipsdspr2_deps="mips"
@@ -4686,8 +4688,6 @@ EOF
 
 elif enabled mips; then
 
-check_inline_asm loongson '"dmult.g $1, $2, $3"'
-
 # Enable minimum ISA based on selected options
 if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
 add_cflags "-mips64r2"
--
1.8.3.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Loongson Optimization Patchs and Machine Express Issuse

2015-04-26 Thread
The imgtec's msa optimization had changed the Makefile earlier than me, it's ok 
now.


> -原始邮件-
> 发件人: "Michael Niedermayer" 
> 发送时间: 2015年4月25日 星期六
> 收件人: "FFmpeg development discussions and patches" 
> 抄送: gaoxiang , "孟小甫" 
> 主题: Re: [FFmpeg-devel] Loongson Optimization Patchs and Machine Express Issuse
> 
> On Wed, Apr 22, 2015 at 02:27:11PM +0800, 周晓勇 wrote:
> >  Makefile |2 +-
> >  arch.mak |1 +
> >  libavcodec/mips/Makefile |2 ++
> >  3 files changed, 4 insertions(+), 1 deletion(-)
> > 26e64031017485302c85e184adcc941a26889962  
> > 0004-makefile-add-Loongson-3-support.patch
> > From aae2e408d57d79aac8760b03eac98b03e04451b1 Mon Sep 17 00:00:00 2001
> > From: ZhouXiaoyong 
> > Date: Wed, 15 Apr 2015 20:26:31 +0800
> > Subject: [PATCH 04/11] makefile: add Loongson-3 support
> 
> doesnt apply cleanly:
> 
> Applying: makefile: add Loongson-3 support
> Using index info to reconstruct a base tree...
> Falling back to patching base and 3-way merge...
> Auto-merging libavcodec/mips/Makefile
> CONFLICT (content): Merge conflict in libavcodec/mips/Makefile
> Auto-merging arch.mak
> CONFLICT (content): Merge conflict in arch.mak
> Auto-merging Makefile
> CONFLICT (content): Merge conflict in Makefile
> Failed to merge in the changes.
> Patch failed at 0001 makefile: add Loongson-3 support
> 
> [...]
> 
> -- 
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> If you think the mosad wants you dead since a long time then you are either
> wrong or dead since a long time.



--
周晓勇 @系统研发部
Christian Zhou @System R & D

龙芯中科技术有限公司
Loongson Technology Corporation Limited

北京市海淀区温泉镇中关村环保科技示范园龙芯产业园2号楼
Loongson Industrial Park, building 2 , Zhongguancun Environmental protection 
park,
Haidian District, Beijing, China



0001-makefile-add-Loongson-3-support.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Loongson Optimization Patchs and Machine Express Issuse

2015-04-25 Thread
Ok! I will add ARCH_EXT_LIST_LOONGSON and clarify the CPU series in configure 
to support the older Loongson-2.
Loongson-2: 2E, 2F with GS464 core support MMI SIMD
Loongson-3: 3A1000 with GS464 core, 3B1500 with GS464V core is compatible with 
GS464 SIMD, the new 3A2000 with GS464E core is compatible with GS464 SIMD
GS464V means Vector and have more vector instruts
GS464E means Enhance and maybe added more SIMD later

在 2015-04-25 20:10:31,"Michael Niedermayer"  写道:

>On Wed, Apr 22, 2015 at 02:27:11PM +0800, 周晓勇 wrote:
>> Hi Michael,
>> We have prepared the patchs and Loongson-3A machine for you, but DHL 
>> needs consignee's phone number in case they could not contact u. Could you 
>> send me your phone number right now? I have make an appointment with courier 
>> tomorrow morning.
>> The enclosures are our Loongson-3 patchs for ffmpeg-master based on 
>> fbdaebb29861d32acc93fa55fd13554a2ae32eb4.
>> Cause the machine belong to the company, our boss need one Electronic 
>> Receipt to confirm that i do not keep the machine privately. Could you email 
>> me one receipt with your sign after you receive it?
>> 
>> Thanks.
>> 
>> 
>> 
>> --
>> 周晓勇 @系统研发部
>> Christian Zhou @System R & D
>> 
>> 龙芯中科技术有限公司
>> Loongson Technology Corporation Limited
>> 
>> 北京市海淀区温泉镇中关村环保科技示范园龙芯产业园2号楼
>> Loongson Industrial Park, building 2 , Zhongguancun Environmental protection 
>> park,
>> Haidian District, Beijing, China
>
>>  configure |   36 +---
>>  1 file changed, 33 insertions(+), 3 deletions(-)
>> c12c6e26378a6e67153bffe7003592b5a3973f4b  
>> 0001-configure-add-Loongson-3-support.patch
>> From 65973ecb8b667c0f75dcc808d664f72d27c52f71 Mon Sep 17 00:00:00 2001
>> From: ZhouXiaoyong 
>> Date: Wed, 15 Apr 2015 19:50:01 +0800
>> Subject: [PATCH 01/11] configure: add Loongson-3 support
>> 
>> ---
>>  configure | 36 +---
>>  1 file changed, 33 insertions(+), 3 deletions(-)
>> 
>> diff --git a/configure b/configure
>> index 389de92..9a3f341 100755
>> --- a/configure
>> +++ b/configure
>> @@ -366,6 +366,7 @@ Optimization options (experts only):
>>--disable-mipsdspr2  disable MIPS DSP ASE R2 optimizations
>>--disable-msadisable MSA optimizations
>>--disable-mipsfpudisable floating point MIPS optimizations
>> +  --disable-loongson3  disable Loongson-3 SIMD optimizations
>>--disable-fast-unaligned consider unaligned accesses slow
>>  
>>  Developer options (useful when working on FFmpeg itself):
>> @@ -1573,6 +1574,7 @@ ARCH_EXT_LIST_MIPS="
>>  mipsdspr1
>>  mipsdspr2
>>  msa
>> +loongson3
>>  "
>>  
>>  ARCH_EXT_LIST_X86_SIMD="
>> @@ -1612,7 +1614,6 @@ ARCH_EXT_LIST="
>>  $ARCH_EXT_LIST_PPC
>>  $ARCH_EXT_LIST_X86
>>  $ARCH_EXT_LIST_MIPS
>> -loongson
>>  "
>>  
>>  ARCH_FEATURES="
>> @@ -2018,6 +2019,7 @@ mipsdspr2_deps="mips"
>>  mips32r5_deps="mips"
>>  mips64r6_deps="mips"
>>  msa_deps="mips"
>> +loongson3_deps="mips"
>>  
>>  altivec_deps="ppc"
>>  ppc4xx_deps="ppc"
>
>> @@ -3717,6 +3719,16 @@ case "$arch" in
>>  ;;
>>  mips*|IP*)
>>  arch="mips"
>> +_cpuinfo="cat /proc/cpuinfo"
>> +pname=$($_cpuinfo | grep 'model' | cut -d ':' -f 2 | head -n 1)
>> +case $pname in
>> +*Loongson-3A*)
>> +cpu="loongson3a"
>> +;;
>> +*Loongson-3B*)
>> +cpu="loongson3a"
>> +;;
>> +esac
>>  ;;
>>  parisc*|hppa*)
>>  arch="parisc"
>
>applied without this hunk and without the loongson removial
>
>you can remove loongson but it should be a seperate patch and also
>remove all uses of HAVE_LOONGSON
>
>[...]
>-- 
>Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
>What does censorship reveal? It reveals fear. -- Julian Assange
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Loongson Optimization Patchs and Machine Express Issuse

2015-04-25 Thread
HAVE_LOONGSON is insteaded by HAVE_LOONGSON3. Even Loongson-2E and 2F support 
Loongson SIMD instructs but have low performance for decoding. We plan to fucus 
on optimizing Loongson-3A1000, 3B1500 and 3A1500, and modify the configure file 
to support Loongson-2 series later by adding HAVE_LOONGSON2.


在 2015-04-25 19:41:46,"Michael Niedermayer"  写道:

>On Wed, Apr 22, 2015 at 02:27:11PM +0800, 周晓勇 wrote:
>> Hi Michael,
>> We have prepared the patchs and Loongson-3A machine for you, but DHL 
>> needs consignee's phone number in case they could not contact u. Could you 
>> send me your phone number right now? I have make an appointment with courier 
>> tomorrow morning.
>> The enclosures are our Loongson-3 patchs for ffmpeg-master based on 
>> fbdaebb29861d32acc93fa55fd13554a2ae32eb4.
>> Cause the machine belong to the company, our boss need one Electronic 
>> Receipt to confirm that i do not keep the machine privately. Could you email 
>> me one receipt with your sign after you receive it?
>> 
>> Thanks.
>> 
>> 
>> 
>> --
>[...]
>
>>  aacsbr_mips.h |4 ++--
>>  iirfilter_mips.c  |4 
>>  mpegaudiodsp_mips_float.c |6 ++
>>  sbrdsp_mips.c |4 
>>  4 files changed, 16 insertions(+), 2 deletions(-)
>> 2ee4cc1afabace5d2b6fa52ea91b211187cc18f0  
>> 0002-avcodec-mips-disable-assembly-not-supported-for-Loon.patch
>> From 3488470e6099444ed5d9cd75c181ce69d824267f Mon Sep 17 00:00:00 2001
>> From: ZhouXiaoyong 
>> Date: Wed, 15 Apr 2015 20:18:15 +0800
>> Subject: [PATCH 02/11] avcodec/mips: disable assembly not supported for
>>  Loongson-3
>
>applied
>
>
>[...]
>
>>  mathops.h  |3 ++
>>  mips/mathops.h |   78 
>> +++--
>>  2 files changed, 73 insertions(+), 8 deletions(-)
>> 001ca979230cbffc83a1004d01c1b9d7a66f1577  
>> 0003-avcodec-optimize-mathops-for-Loongson-3-v1.patch
>> From 6cb445778ef2730eb1f33ddb3ef30a5ed017e787 Mon Sep 17 00:00:00 2001
>> From: ZhouXiaoyong 
>> Date: Thu, 16 Apr 2015 14:42:44 +0800
>> Subject: [PATCH 03/11] avcodec: optimize mathops for Loongson-3 v1
>> 
>> ---
>>  libavcodec/mathops.h  |  3 ++
>>  libavcodec/mips/mathops.h | 78 
>> ++-
>>  2 files changed, 73 insertions(+), 8 deletions(-)
>> 
>> diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
>> index 87d110b..46283ca 100644
>> --- a/libavcodec/mathops.h
>> +++ b/libavcodec/mathops.h
>> @@ -211,6 +211,8 @@ if ((y) < (x)) {\
>>  #   define FASTDIV(a,b) ((uint32_t)uint64_t)a) * ff_inverse[b]) >> 32))
>>  #endif /* FASTDIV */
>>  
>> +#ifndef ff_sqrt
>> +#define ff_sqrt ff_sqrt
>>  static inline av_const unsigned int ff_sqrt(unsigned int a)
>>  {
>>  unsigned int b;
>> @@ -230,6 +232,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int 
>> a)
>>  
>>  return b - (a < b * b);
>>  }
>> +#endif
>>  
>>  static inline int8_t ff_u8_to_s8(uint8_t a)
>>  {
>> diff --git a/libavcodec/mips/mathops.h b/libavcodec/mips/mathops.h
>> index 5673fc0..cdc7705 100644
>> --- a/libavcodec/mips/mathops.h
>> +++ b/libavcodec/mips/mathops.h
>> @@ -1,5 +1,6 @@
>>  /*
>>   * Copyright (c) 2009 Mans Rullgard 
>> + * Copyright (c) 2015 Zhou Xiaoyong 
>>   *
>>   * This file is part of FFmpeg.
>>   *
>> @@ -27,14 +28,73 @@
>>  
>>  #if HAVE_INLINE_ASM
>>  
>> -#if HAVE_LOONGSON
>> +#if HAVE_LOONGSON3
>
>This removes optimizations for loongson (1 or whatever) ?
>is that intended ?
>
>
>-- 
>Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
>The real ebay dictionary, page 2
>"100% positive feedback" - "All either got their money back or didnt complain"
>"Best seller ever, very honest" - "Seller refunded buyer after failed scam"

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


  1   2   >