From: Tucker DiNapoli <t.dinapol...@gmail.com> This series of patches makes some changes to libpostproc in preperation for adding sse2 and avx2 simd versions of some functions. None of the changes should effect the library in any major way, but they are necessary for future changes. I've tested all the patches on my machine and they all apply cleanly, and the fate-filter-pp tests succeed after each patch. I've also tried my best to split the patches into small changes. I have some more patches which add the ability to process multiple blocks in each function, which will allow the new simd versions to be substitute in without any additional work.
This just replaces some conditionally compiled inline asm with the conditionally defined macros from the last commit --- libpostproc/postprocess_template.c | 63 +++++--------------------------------- 1 file changed, 8 insertions(+), 55 deletions(-) diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c index 4016b08..48501fe 100644 --- a/libpostproc/postprocess_template.c +++ b/libpostproc/postprocess_template.c @@ -3368,34 +3368,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ // finish 1 block before the next otherwise we might have a problem // with the L1 Cache of the P4 ... or only a few blocks at a time or something for(x=0; x<width; x+=BLOCK_SIZE){ - -#if TEMPLATE_PP_MMXEXT && HAVE_6REGS -/* - prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); - prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); - prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); - prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); -*/ - - __asm__( - "mov %4, %%"REG_a" \n\t" - "shr $2, %%"REG_a" \n\t" - "and $6, %%"REG_a" \n\t" - "add %5, %%"REG_a" \n\t" - "mov %%"REG_a", %%"REG_d" \n\t" - "imul %1, %%"REG_a" \n\t" - "imul %3, %%"REG_d" \n\t" - "prefetchnta 32(%%"REG_a", %0) \n\t" - "prefetcht0 32(%%"REG_d", %2) \n\t" - "add %1, %%"REG_a" \n\t" - "add %3, %%"REG_d" \n\t" - "prefetchnta 32(%%"REG_a", %0) \n\t" - "prefetcht0 32(%%"REG_d", %2) \n\t" - :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), - "g" ((x86_reg)x), "g" ((x86_reg)copyAhead) - : "%"REG_a, "%"REG_d - ); -#endif + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32); + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32); + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32); + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32); RENAME(blockCopy)(dstBlock + dstStride*8, dstStride, srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset); @@ -3474,33 +3450,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ uint8_t *dstBlockStart = dstBlock; const uint8_t *srcBlockStart = srcBlock; for(; x < endx; x+=BLOCK_SIZE){ -#if TEMPLATE_PP_MMXEXT && HAVE_6REGS -/* - prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); - prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); - prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); - prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); -*/ - - __asm__( - "mov %4, %%"REG_a" \n\t" - "shr $2, %%"REG_a" \n\t" - "and $6, %%"REG_a" \n\t" - "add %5, %%"REG_a" \n\t" - "mov %%"REG_a", %%"REG_d" \n\t" - "imul %1, %%"REG_a" \n\t" - "imul %3, %%"REG_d" \n\t" - "prefetchnta 32(%%"REG_a", %0) \n\t" - "prefetcht0 32(%%"REG_d", %2) \n\t" - "add %1, %%"REG_a" \n\t" - "add %3, %%"REG_d" \n\t" - "prefetchnta 32(%%"REG_a", %0) \n\t" - "prefetcht0 32(%%"REG_d", %2) \n\t" - :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), - "g" ((x86_reg)x), "g" ((x86_reg)copyAhead) - : "%"REG_a, "%"REG_d - ); -#endif + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32); + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32); + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32); + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32); RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride, srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset); -- 2.3.3 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel