From: Tucker DiNapoli <t.dinapol...@gmail.com>

This series of patches makes some changes to libpostproc in preperation for 
adding
sse2 and avx2 simd versions of some functions. None of the changes should effect
the library in any major way, but they are necessary for future changes.
I've tested all the patches on my machine and they all apply cleanly,
and the fate-filter-pp tests succeed after each patch. I've also tried my
best to split the patches into small changes. I have some more patches
which add the ability to process multiple blocks in each function, which
will allow the new simd versions to be substitute in without any additional
work.

This just replaces some conditionally compiled inline asm with the
conditionally defined macros from the last commit
---
 libpostproc/postprocess_template.c | 63 +++++---------------------------------
 1 file changed, 8 insertions(+), 55 deletions(-)

diff --git a/libpostproc/postprocess_template.c 
b/libpostproc/postprocess_template.c
index 4016b08..48501fe 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3368,34 +3368,10 @@ static void RENAME(postProcess)(const uint8_t src[], 
int srcStride, uint8_t dst[
         // finish 1 block before the next otherwise we might have a problem
         // with the L1 Cache of the P4 ... or only a few blocks at a time or 
something
         for(x=0; x<width; x+=BLOCK_SIZE){
-
-#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
-/*
-            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
-*/
-
-            __asm__(
-                "mov %4, %%"REG_a"              \n\t"
-                "shr $2, %%"REG_a"              \n\t"
-                "and $6, %%"REG_a"              \n\t"
-                "add %5, %%"REG_a"              \n\t"
-                "mov %%"REG_a", %%"REG_d"       \n\t"
-                "imul %1, %%"REG_a"             \n\t"
-                "imul %3, %%"REG_d"             \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                "add %1, %%"REG_a"              \n\t"
-                "add %3, %%"REG_d"              \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), 
"r" ((x86_reg)dstStride),
-                "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
-                : "%"REG_a, "%"REG_d
-            );
-#endif
+            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
+            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
+            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
+            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
             RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
                               srcBlock + srcStride*8, srcStride, mode & 
LEVEL_FIX, &c.packedYOffset);
@@ -3474,33 +3450,10 @@ static void RENAME(postProcess)(const uint8_t src[], 
int srcStride, uint8_t dst[
             uint8_t *dstBlockStart = dstBlock;
             const uint8_t *srcBlockStart = srcBlock;
           for(; x < endx; x+=BLOCK_SIZE){
-#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
-/*
-            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
-*/
-
-            __asm__(
-                "mov %4, %%"REG_a"              \n\t"
-                "shr $2, %%"REG_a"              \n\t"
-                "and $6, %%"REG_a"              \n\t"
-                "add %5, %%"REG_a"              \n\t"
-                "mov %%"REG_a", %%"REG_d"       \n\t"
-                "imul %1, %%"REG_a"             \n\t"
-                "imul %3, %%"REG_d"             \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                "add %1, %%"REG_a"              \n\t"
-                "add %3, %%"REG_d"              \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), 
"r" ((x86_reg)dstStride),
-                "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
-                : "%"REG_a, "%"REG_d
-            );
-#endif
+            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
+            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
+            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
+            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
             RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
                               srcBlock + srcStride*copyAhead, srcStride, mode 
& LEVEL_FIX, &c.packedYOffset);
-- 
2.3.3

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to