Re: [FFmpeg-devel] [FFMpeg-Devel][GSoC][PATCH 2/6] postproc: Made QP, nonBQP, and pQPb arrays

2015-04-22 Thread Michael Niedermayer
On Wed, Apr 22, 2015 at 04:27:27PM -0400, Tucker DiNapoli wrote:
> From: Tucker DiNapoli 
> 
> Also pulled QP initialization out of inner loop, which removed some redundent 
> code.
> 
> Added some dummy fields to PPContext to allow current code to work while
> changing the rest of the postprocessing code to support the arrays.
> 
> I also increased alignment requirements for some fields in the PPContext 
> struct to
> support future avx2 code.
> ---
>  libpostproc/postprocess_internal.h | 10 -
>  libpostproc/postprocess_template.c | 81 
> ++
>  2 files changed, 46 insertions(+), 45 deletions(-)

applied

thanks

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I do not agree with what you have to say, but I'll defend to the death your
right to say it. -- Voltaire


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [FFMpeg-Devel][GSoC][PATCH 2/6] postproc: Made QP, nonBQP, and pQPb arrays

2015-04-22 Thread Tucker DiNapoli
From: Tucker DiNapoli 

Also pulled QP initialization out of inner loop, which removed some redundent 
code.

Added some dummy fields to PPContext to allow current code to work while
changing the rest of the postprocessing code to support the arrays.

I also increased alignment requirements for some fields in the PPContext struct 
to
support future avx2 code.
---
 libpostproc/postprocess_internal.h | 10 -
 libpostproc/postprocess_template.c | 81 ++
 2 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/libpostproc/postprocess_internal.h 
b/libpostproc/postprocess_internal.h
index 1ebd974..c1a306d 100644
--- a/libpostproc/postprocess_internal.h
+++ b/libpostproc/postprocess_internal.h
@@ -143,8 +143,11 @@ typedef struct PPContext{
 DECLARE_ALIGNED(8, uint64_t, pQPb);
 DECLARE_ALIGNED(8, uint64_t, pQPb2);
 
-DECLARE_ALIGNED(8, uint64_t, mmxDcOffset)[64];
-DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold)[64];
+DECLARE_ALIGNED(32, uint64_t, pQPb_block)[4];
+DECLARE_ALIGNED(32, uint64_t, pQPb2_block)[4];
+
+DECLARE_ALIGNED(32, uint64_t, mmxDcOffset)[64];
+DECLARE_ALIGNED(32, uint64_t, mmxDcThreshold)[64];
 
 QP_STORE_T *stdQPTable;   ///< used to fix MPEG2 style qscale
 QP_STORE_T *nonBQPTable;
@@ -153,6 +156,9 @@ typedef struct PPContext{
 int QP;
 int nonBQP;
 
+DECLARE_ALIGNED(32, int, QP_block)[4];
+DECLARE_ALIGNED(32, int, nonBQP_block)[4];
+
 int frameNum;
 
 int cpuCaps;
diff --git a/libpostproc/postprocess_template.c 
b/libpostproc/postprocess_template.c
index e153b13..b7296c4 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3479,7 +3479,7 @@ static void RENAME(postProcess)(const uint8_t src[], int 
srcStride, uint8_t dst[
 #endif
 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
-int QP=0;
+int QP=0, nonBQP=0;
 /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 
1 line upwards
if not than use a temporary buffer */
 if(y+15 >= height){
@@ -3512,6 +3512,29 @@ static void RENAME(postProcess)(const uint8_t src[], int 
srcStride, uint8_t dst[
 int endx = FFMIN(width, x+32);
 uint8_t *dstBlockStart = dstBlock;
 const uint8_t *srcBlockStart = srcBlock;
+int qp_index = 0;
+for(qp_index=0; qp_index < (endx-startx)/BLOCK_SIZE; qp_index++){
+QP = QPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
+nonBQP = nonBQPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
+if(!isColor){
+QP= (QP* QPCorrecture + 256*128)>>16;
+nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
+yHistogram[(srcBlock+qp_index*8)[srcStride*12 + 4]]++;
+}
+c.QP_block[qp_index] = QP;
+c.nonBQP_block[qp_index] = nonBQP;
+#if TEMPLATE_PP_MMX
+__asm__ volatile(
+"movd %1, %%mm7 \n\t"
+"packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
+"packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
+"packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
+"movq %%mm7, %0 \n\t"
+: "=m" (c.pQPb_block[qp_index])
+: "r" (QP)
+);
+#endif
+}
   for(; x < endx; x+=BLOCK_SIZE){
 RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride 
+ 32);
 RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + 
copyAhead+1)*srcStride + 32);
@@ -3543,27 +3566,15 @@ static void RENAME(postProcess)(const uint8_t src[], 
int srcStride, uint8_t dst[
   dstBlock = dstBlockStart;
   srcBlock = srcBlockStart;
 
-  for(x = startx; x < endx; x+=BLOCK_SIZE){
+  for(x = startx, qp_index = 0; x < endx; x+=BLOCK_SIZE, qp_index++){
 const int stride= dstStride;
-QP = QPptr[x>>qpHShift];
-c.nonBQP = nonBQPptr[x>>qpHShift];
-if(!isColor){
-QP= (QP* QPCorrecture + 256*128)>>16;
-c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
-yHistogram[srcBlock[srcStride*12 + 4]]++;
-}
-c.QP= QP;
-#if TEMPLATE_PP_MMX
-__asm__ volatile(
-"movd %1, %%mm7 \n\t"
-"packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
-"packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
-"packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
-"movq %%mm7, %0 \n\t"
-: "=m" (c.pQPb)
-: "r" (QP)
-);
-#endif
+//temporary while changing QP stuff to make things continue to work
+//eventually QP,nonBQP,etc will be arrays and this will be 
unnecessary
+c.Q