On Tue, Nov 04, 2014 at 03:05:24PM +0800, rongyan wrote: > Hi, > There are 3 patches to fix bugs for POWER8 little endian. I will send 3 > patches in 3 different email. This is the first, functions > hadamard8_diff8x8_altivec() hadamard8_diff16x8_altivec() > sad16_x2_altivec() > sad16_y2_altivec() > sad16_xy2_altivec() > sad16_altivec() > sad8_altivec() > sse16_altivec() > sse8_altivec() > are fixed. > The fate test result after merge these 3 patches can be found on website by > searching "ibmcrl", also attached in the below to facilitate the review. The > passed test cases change from 1679/2182 to 2202/2235. > > Thanks for review and any comments please feel free to let me know. > > Rong Yan > ------------------ > The world has enough for everyone's need, but not enough for everyone's > greed.
> me_cmp.c | 497 > ++++++++++++++++++++++++--------------------------------------- > 1 file changed, 196 insertions(+), 301 deletions(-) > d18753b0bc6ac53409a3070a61795e079282b6ee > 0001-libavcodec-ppc-me_cmp.c-fix-hadamard8_diff8x8_altive.patch > From 40e4b6d2871772e17c86986cfeca16dcdb24ded8 Mon Sep 17 00:00:00 2001 > From: Rong Yan <rongyan...@gmail.com> > Date: Tue, 4 Nov 2014 06:23:11 +0000 > Subject: [PATCH 1/3] libavcodec/ppc/me_cmp.c : fix hadamard8_diff8x8_altivec() > hadamard8_diff16x8_altivec() sad16_x2_altivec() sad16_y2_altivec() > sad16_xy2_altivec() sad16_altivec() sad8_altivec() sse16_altivec() > sse8_altivec() for POWER LE > > --- > libavcodec/ppc/me_cmp.c | 497 > +++++++++++++++++++----------------------------- > 1 file changed, 196 insertions(+), 301 deletions(-) > > diff --git a/libavcodec/ppc/me_cmp.c b/libavcodec/ppc/me_cmp.c > index 8ff8193..133c3bb 100644 > --- a/libavcodec/ppc/me_cmp.c > +++ b/libavcodec/ppc/me_cmp.c > @@ -35,64 +35,39 @@ > #include "libavcodec/me_cmp.h" > > #if HAVE_ALTIVEC > -#if HAVE_VSX > -static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, > - int line_size, int h) > -{ > - int i, s = 0; > - const vector unsigned char zero = > - (const vector unsigned char) vec_splat_u8(0); > - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); > - vector signed int sumdiffs; > - > - for (i = 0; i < h; i++) { > - /* Read unaligned pixels into our vectors. The vectors are as > follows: > - * pix1v: pix1[0] - pix1[15] > - * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */ > - vector unsigned char pix1v = vec_vsx_ld(0, pix1); > - vector unsigned char pix2v = vec_vsx_ld(0, pix2); > - vector unsigned char pix2iv = vec_vsx_ld(1, pix2); > - > - /* Calculate the average vector. */ > - vector unsigned char avgv = vec_avg(pix2v, pix2iv); > - > - /* Calculate a sum of abs differences vector. */ > - vector unsigned char t5 = vec_sub(vec_max(pix1v, avgv), > - vec_min(pix1v, avgv)); > - > - /* Add each 4 pixel group together and put 4 results into sad. */ > - sad = vec_sum4s(t5, sad); > - > - pix1 += line_size; > - pix2 += line_size; > - } > - /* Sum up the four partial sums, and put the result into s. */ > - sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); > - sumdiffs = vec_splat(sumdiffs, 3); > - vec_vsx_st(sumdiffs, 0, &s); > - return s; > -} > -#else > static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, > int line_size, int h) > { > int i, s = 0; > +#if HAVE_BIGENDIAN > const vector unsigned char zero = > (const vector unsigned char) vec_splat_u8(0); > vector unsigned char perm1 = vec_lvsl(0, pix2); > vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1)); > vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); > vector signed int sumdiffs; > +#else /* HAVE_BIGENDIAN */ > + const vector unsigned char __attribute__((aligned(16))) zero = > + (const vector unsigned char) vec_splat_u8(0); > + vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned > int) vec_splat_u32(0); > + vector signed int __attribute__((aligned(16))) sumdiffs; > +#endif /* HAVE_BIGENDIAN */ this is much better! but please use #if HAVE_BIGENDIAN #define DECLARE_ALIGNED_LE(n,t,v) t v #else #define DECLARE_ALIGNED_LE(n,t,v) DECLARE_ALIGNED(n,t,v) #endif ... DECLARE_ALIGNED_LE(16, const vector unsigned char, zero) = (const vector unsigned char) vec_splat_u8(0); #if HAVE_BIGENDIAN vector unsigned char perm1 = vec_lvsl(0, pix2); vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1)); #endif /* HAVE_BIGENDIAN */ DECLARE_ALIGNED_LE(16, vector unsigned int, sad) = (vector unsigned int) vec_splat_u32(0); DECLARE_ALIGNED_LE(16, vector signed int, sumdiffs); > > for (i = 0; i < h; i++) { > /* Read unaligned pixels into our vectors. The vectors are as > follows: > * pix1v: pix1[0] - pix1[15] > * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */ > +#if HAVE_BIGENDIAN > vector unsigned char pix1v = vec_ld(0, pix1); > vector unsigned char pix2l = vec_ld(0, pix2); > vector unsigned char pix2r = vec_ld(16, pix2); > vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm1); > vector unsigned char pix2iv = vec_perm(pix2l, pix2r, perm2); > +#else /* HAVE_BIGENDIAN */ > + vector unsigned char pix1v = vec_vsx_ld(0, pix1); > + vector unsigned char pix2v = vec_vsx_ld(0, pix2); > + vector unsigned char pix2iv = vec_vsx_ld(1, pix2); > +#endif /* HAVE_BIGENDIAN */ > > /* Calculate the average vector. */ > vector unsigned char avgv = vec_avg(pix2v, pix2iv); please use: #if HAVE_BIGENDIAN #define VEC_LD(a, b) vec_ld(a, b) #else #define VEC_LD(a, b) vec_vsx_ld(a, b) #endif ... vector unsigned char pix1v = VEC_LD(0, pix1); vector unsigned char pix2l = VEC_LD(0, pix2); vector unsigned char pix2r = VEC_LD(16, pix2); #if HAVE_BIGENDIAN vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm1); vector unsigned char pix2iv = vec_perm(pix2l, pix2r, perm2); #endif /* HAVE_BIGENDIAN */ [...] > +#if HAVE_BIGENDIAN > pix1v = vec_ld(0, pix1); > > pix2l = vec_ld(0, pix3); > @@ -373,6 +262,16 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t > *pix1, uint8_t *pix2, > pix3lv = (vector unsigned short) vec_mergel(zero, pix3v); > pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv); > pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv); > +#else /* HAVE_BIGENDIAN */ > + pix1v = vec_vsx_ld(0, pix1); > + pix3v = vec_vsx_ld(0, pix3); > + pix3iv = vec_vsx_ld(1, pix3); > + > + pix3hv = (vector unsigned short) vec_mergeh(pix3v, zero); > + pix3lv = (vector unsigned short) vec_mergel(pix3v, zero); > + pix3ihv = (vector unsigned short) vec_mergeh(pix3iv, zero); > + pix3ilv = (vector unsigned short) vec_mergel(pix3iv, zero); > +#endif /* HAVE_BIGENDIAN */ please use: #if HAVE_BIGENDIAN #define VEC_MERGEH(a, b) vec_mergeh(a,b) #define VEC_MERGEL(a, b) vec_mergel(a,b) #else #define VEC_MERGEH(a, b) vec_mergeh(b,a) #define VEC_MERGEL(a, b) vec_mergel(b,a) #endif ... pix3hv = (vector unsigned short) VEC_MERGEH(pix3v, zero); pix3lv = (vector unsigned short) VEC_MERGEL(pix3v, zero); pix3ihv = (vector unsigned short) VEC_MERGEH(pix3iv, zero); pix3ilv = (vector unsigned short) VEC_MERGEL(pix3iv, zero); [...] > +#if HAVE_BIGENDIAN > vec_ste(sumsqr, 0, &s); > +#else /* HAVE_BIGENDIAN */ > + vec_vsx_st(sumsqr, 0, &s); > +#endif /* HAVE_BIGENDIAN */ please use: #if HAVE_BIGENDIAN #define VEC_STE(a,b,c) vec_ste(a,b,c) #else #define VEC_STE(a,b,c) vec_vsx_st(a,b,c) #endif ... VEC_STE(sumsqr, 0, &s); [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Opposition brings concord. Out of discord comes the fairest harmony. -- Heraclitus
signature.asc
Description: Digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel