Hi,

On Thu, May 26, 2011 at 11:30 AM, Kostya <kostya.shish...@gmail.com> wrote:
> On Thu, May 26, 2011 at 11:21:46AM -0400, Ronald S. Bultje wrote:
>> ---
>>  libswscale/ppc/swscale_altivec_template.c |   30 
>> +++++++++++++++-------------
>>  libswscale/ppc/swscale_template.c         |   30 
>> +++++++++++++++-------------
>>  libswscale/ppc/yuv2rgb_altivec.c          |   17 ++++++++-------
>>  libswscale/swscale_internal.h             |    7 +++--
>>  4 files changed, 45 insertions(+), 39 deletions(-)
>>
>> diff --git a/libswscale/ppc/swscale_altivec_template.c 
>> b/libswscale/ppc/swscale_altivec_template.c
>> index b123f70..eee7bdd 100644
>> --- a/libswscale/ppc/swscale_altivec_template.c
>> +++ b/libswscale/ppc/swscale_altivec_template.c
>> @@ -86,9 +86,11 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, 
>> int dstW)
>>  }
>>
>>  static inline void
>> -yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int 
>> lumFilterSize,
>> -                      const int16_t *chrFilter, const int16_t **chrSrc, int 
>> chrFilterSize,
>> -                      uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int 
>> dstW, int chrDstW)
>> +yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc,
>> +                      int lumFilterSize, const int16_t *chrFilter,
>> +                      const int16_t **chrUSrc, const int16_t **chrVSrc,
>> +                      int chrFilterSize, uint8_t *dest, uint8_t *uDest,
>> +                      uint8_t *vDest, int dstW, int chrDstW)
>>  {
>>      const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 
>> 18)};
>>      register int i, j;
>> @@ -159,22 +161,22 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const 
>> int16_t **lumSrc, int lumF
>>              vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
>>              vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is 
>> loaded 8 times in vChrFilter
>>
>> -            perm = vec_lvsl(0, chrSrc[j]);
>> -            l1 = vec_ld(0, chrSrc[j]);
>> -            l1_V = vec_ld(2048 << 1, chrSrc[j]);
>> +            perm = vec_lvsl(0, chrUSrc[j]);
>> +            l1 = vec_ld(0, chrUSrc[j]);
>> +            l1_V = vec_ld(0, chrVSrc[j]);
>>
>>              for (i = 0; i < (chrDstW - 7); i+=8) {
>>                  int offset = i << 2;
>> -                vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]);
>> -                vector signed short l2_V = vec_ld(((i + 2048) << 1) + 16, 
>> chrSrc[j]);
>> +                vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]);
>> +                vector signed short l2_V = vec_ld((i << 1) + 16, 
>> chrVSrc[j]);
>>
>>                  vector signed int v1 = vec_ld(offset, u);
>>                  vector signed int v2 = vec_ld(offset + 16, u);
>>                  vector signed int v1_V = vec_ld(offset, v);
>>                  vector signed int v2_V = vec_ld(offset + 16, v);
>>
>> -                vector signed short ls = vec_perm(l1, l2, perm); // 
>> chrSrc[j][i] ... chrSrc[j][i+7]
>> -                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // 
>> chrSrc[j][i+2048] ... chrSrc[j][i+2055]
>> +                vector signed short ls = vec_perm(l1, l2, perm); // 
>> chrUSrc[j][i] ... chrUSrc[j][i+7]
>> +                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // 
>> chrVSrc[j][i] ... chrVSrc[j][i]
>>
>>                  vector signed int i1 = vec_mule(vChrFilter, ls);
>>                  vector signed int i2 = vec_mulo(vChrFilter, ls);
>> @@ -182,9 +184,9 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const 
>> int16_t **lumSrc, int lumF
>>                  vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
>>
>>                  vector signed int vf1 = vec_mergeh(i1, i2);
>> -                vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] 
>> * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
>> +                vector signed int vf2 = vec_mergel(i1, i2); // 
>> chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j]
>>                  vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
>> -                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // 
>> chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
>> +                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // 
>> chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j]
>>
>>                  vector signed int vo1 = vec_add(v1, vf1);
>>                  vector signed int vo2 = vec_add(v2, vf2);
>> @@ -200,8 +202,8 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const 
>> int16_t **lumSrc, int lumF
>>                  l1_V = l2_V;
>>              }
>>              for ( ; i < chrDstW; i++) {
>> -                u[i] += chrSrc[j][i] * chrFilter[j];
>> -                v[i] += chrSrc[j][i + 2048] * chrFilter[j];
>> +                u[i] += chrUSrc[j][i] * chrFilter[j];
>> +                v[i] += chrVSrc[j][i] * chrFilter[j];
>>              }
>>          }
>>          altivec_packIntArrayToCharArray(u, uDest, chrDstW);
>> diff --git a/libswscale/ppc/swscale_template.c 
>> b/libswscale/ppc/swscale_template.c
>> index 7968177..022bff7 100644
>> --- a/libswscale/ppc/swscale_template.c
>> +++ b/libswscale/ppc/swscale_template.c
>> @@ -18,27 +18,30 @@
>>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
>> USA
>>   */
>>
>> -
>> -#if COMPILE_TEMPLATE_ALTIVEC
>>  #include "swscale_altivec_template.c"
>> -#endif
>>
>> -#if COMPILE_TEMPLATE_ALTIVEC
>> -static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t 
>> *lumFilter, const int16_t **lumSrc, int lumFilterSize,
>> -                                    const int16_t *chrFilter, const int16_t 
>> **chrSrc, int chrFilterSize, const int16_t **alpSrc,
>> -                                    uint8_t *dest, uint8_t *uDest, uint8_t 
>> *vDest, uint8_t *aDest, long dstW, long chrDstW)
>
> in general ok but removing these guards belongs to other patch

OK pushed without that then.

Ronald
_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to