On Wed, Apr 01, 2015 at 02:36:01PM -0400, Tucker DiNapoli wrote: > These patches are updates to patches previously posted to the mailing lists, > with some bugs fixed and the reasoning behind some changes expanded on. > > This addes macros in postprocess.c that use inline asm for x86, > __builtin_prefetch if using a recent enough gcc compatable compiler, and > that does nothing otherwise. Inline asm in postprocess_template.c was > replaced by these macros. > --- > libpostproc/postprocess.c | 10 ++++++ > libpostproc/postprocess_template.c | 63 > +++++--------------------------------- > 2 files changed, 18 insertions(+), 55 deletions(-) > > diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c > index 9d89782..f8d28ba 100644 > --- a/libpostproc/postprocess.c > +++ b/libpostproc/postprocess.c > @@ -197,6 +197,16 @@ static inline void prefetcht2(const void *p) > : : "r" (p) > ); > } > +#elif AV_GCC_VERSION_AT_LEAST(3,2) > +#define prefetchnta(p) __builtin_prefetch(p,0,0) > +#define prefetcht0(p) __builtin_prefetch(p,0,1) > +#define prefetcht1(p) __builtin_prefetch(p,0,2) > +#define prefetcht2(p) __builtin_prefetch(p,0,3) > +#else > +#define prefetchnta(p) > +#define prefetcht0(p) > +#define prefetcht1(p) > +#define prefetcht2(p) > #endif > > /* The horizontal functions exist only in C because the MMX > diff --git a/libpostproc/postprocess_template.c > b/libpostproc/postprocess_template.c > index 16e441a..6377ea7 100644 > --- a/libpostproc/postprocess_template.c > +++ b/libpostproc/postprocess_template.c > @@ -3368,34 +3368,10 @@ static void RENAME(postProcess)(const uint8_t src[], > int srcStride, uint8_t dst[ > // finish 1 block before the next otherwise we might have a problem > // with the L1 Cache of the P4 ... or only a few blocks at a time or > something > for(x=0; x<width; x+=BLOCK_SIZE){ > - > -#if TEMPLATE_PP_MMXEXT && HAVE_6REGS > -/* > - prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); > - prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); > - prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); > - prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); > -*/ > - > - __asm__( > - "mov %4, %%"REG_a" \n\t" > - "shr $2, %%"REG_a" \n\t" > - "and $6, %%"REG_a" \n\t" > - "add %5, %%"REG_a" \n\t" > - "mov %%"REG_a", %%"REG_d" \n\t" > - "imul %1, %%"REG_a" \n\t" > - "imul %3, %%"REG_d" \n\t" > - "prefetchnta 32(%%"REG_a", %0) \n\t" > - "prefetcht0 32(%%"REG_d", %2) \n\t" > - "add %1, %%"REG_a" \n\t" > - "add %3, %%"REG_d" \n\t" > - "prefetchnta 32(%%"REG_a", %0) \n\t" > - "prefetcht0 32(%%"REG_d", %2) \n\t" > - :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), > "r" ((x86_reg)dstStride), > - "g" ((x86_reg)x), "g" ((x86_reg)copyAhead) > - : "%"REG_a, "%"REG_d > - ); > -#endif > + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32); > + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + > 32); > + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32); > + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
this will fail on older CPUs which do not support prefetch* [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Into a blind darkness they enter who follow after the Ignorance, they as if into a greater darkness enter who devote themselves to the Knowledge alone. -- Isha Upanishad
signature.asc
Description: Digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel