First commit addressing Trac ticket #5570. Functions defined in libswscale/input.c have corresponding SIMD definitions in libswscale/ppc/input_vsx.c --- libswscale/ppc/Makefile | 1 + libswscale/ppc/input_vsx.c | 1070 +++++++++++++++++++++++++++++++++++++++++ libswscale/swscale.c | 3 + libswscale/swscale_internal.h | 1 + 4 files changed, 1075 insertions(+) create mode 100644 libswscale/ppc/input_vsx.c
diff --git a/libswscale/ppc/Makefile b/libswscale/ppc/Makefile index d1b596e..2482893 100644 --- a/libswscale/ppc/Makefile +++ b/libswscale/ppc/Makefile @@ -1,3 +1,4 @@ OBJS += ppc/swscale_altivec.o \ + ppc/input_vsx.o \ ppc/yuv2rgb_altivec.o \ ppc/yuv2yuv_altivec.o \ diff --git a/libswscale/ppc/input_vsx.c b/libswscale/ppc/input_vsx.c new file mode 100644 index 0000000..adb0e38 --- /dev/null +++ b/libswscale/ppc/input_vsx.c @@ -0,0 +1,1070 @@ +/* + * Copyright (C) 2016 Dan Parrot <dan.par...@mail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <math.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "libavutil/avutil.h" +#include "libavutil/bswap.h" +#include "libavutil/cpu.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/mathematics.h" +#include "libavutil/pixdesc.h" +#include "libavutil/avassert.h" +#include "config.h" +#include "libswscale/rgb2rgb.h" +#include "libswscale/swscale.h" +#include "libswscale/swscale_internal.h" + +#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) + +#define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? b_r : r_b) +#define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? r_b : b_r) + +#if HAVE_VSX + +// This is a SIMD version for IBM POWER8 of function rgb64ToY_c_template +// in file libswscale/input.c +static av_always_inline void +rgb64ToY_c_template_vsx(uint16_t *dst, const uint16_t *src, int width, + enum AVPixelFormat origin, int32_t *rgb2yuv) +{ + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; + int i, j; + int num_vec, frag; + + num_vec = width / 8; + frag = width % 8; + + vector int v_ry = vec_splats((int)ry); + vector int v_gy = vec_splats((int)gy); + vector int v_by = vec_splats((int)by); + + int s_opr2; + s_opr2 = (int)(0x2001 << (RGB2YUV_SHIFT-1)); + + vector int v_opr1 = vec_splats((int)RGB2YUV_SHIFT); + vector int v_opr2 = vec_splats((int)s_opr2); + + vector int v_r, v_g, v_b, v_tmp; + vector short v_tmpi, v_dst; + + for (i = 0; i < num_vec; i++) { + for (j = 7; j >= 0 ; j--) { + int r_b = input_pixel(&src[(i*8+j)*4+0]); + int g = input_pixel(&src[(i*8+j)*4+1]); + int b_r = input_pixel(&src[(i*8+j)*4+2]); + + v_r[j % 4] = r; + v_g[j % 4] = g; + v_b[j % 4] = b; + + if (!(j % 4)) { + v_tmp = v_ry * v_r; + v_tmp = v_tmp + v_gy * v_g; + v_tmp = v_tmp + v_by * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dst[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dst[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dst[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dst[(j / 4) * 4 + 0] = v_tmpi[0]; + } + } + vec_vsx_st(v_dst, 0, (short *)&dst[i*8]); + } + + // computation for any less than vector-length items at tail end + if( frag ) { + for (i = 0; i < frag; i++) { + unsigned int r_b = input_pixel(&src[(num_vec*8+i)*4+0]); + unsigned int g = input_pixel(&src[(num_vec*8+i)*4+1]); + unsigned int b_r = input_pixel(&src[(num_vec*8+i)*4+2]); + + dst[num_vec*8+i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } + } + +} + +// This is a SIMD version for IBM POWER8 of function rgb64ToUV_c_template +// in file libswscale/input.c +static av_always_inline void +rgb64ToUV_c_template_vsx(uint16_t *dstU, uint16_t *dstV, + const uint16_t *src1, const uint16_t *src2, + int width, enum AVPixelFormat origin, int32_t *rgb2yuv) +{ + + int i, j; + int num_vec, frag; + int s_opr2; + + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + av_assert1(src1==src2); + + s_opr2 = (int)(0x10001 << (RGB2YUV_SHIFT-1)); + + vector int v_ru = vec_splats((int)ru); + vector int v_gu = vec_splats((int)gu); + vector int v_bu = vec_splats((int)bu); + + vector int v_rv = vec_splats((int)rv); + vector int v_gv = vec_splats((int)gv); + vector int v_bv = vec_splats((int)bv); + + vector int v_opr1 = vec_splats((int)RGB2YUV_SHIFT); + vector int v_opr2 = vec_splats((int)s_opr2); + + vector int v_r, v_g, v_b, v_tmp; + vector short v_tmpi, v_dstu, v_dstv; + + num_vec = width / 8; + frag = width % 8; + + for (i = 0; i < num_vec; i++) { + for (j = 7; j >= 0 ; j--) { + int r_b = input_pixel(&src1[(i*8+j)*4+0]); + int g = input_pixel(&src1[(i*8+j)*4+1]); + int b_r = input_pixel(&src1[(i*8+j)*4+2]); + + + v_r[j % 4] = r; + v_g[j % 4] = g; + v_b[j % 4] = b; + + if (!(j % 4)) { + v_tmp = v_ru * v_r; + v_tmp = v_tmp + v_gu * v_g; + v_tmp = v_tmp + v_bu * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dstu[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstu[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstu[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstu[(j / 4) * 4 + 0] = v_tmpi[0]; + + v_tmp = v_rv * v_r; + v_tmp = v_tmp + v_gv * v_g; + v_tmp = v_tmp + v_bv * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dstv[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstv[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstv[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstv[(j / 4) * 4 + 0] = v_tmpi[0]; + } + } + vec_vsx_st(v_dstu, 0, (short *)&dstU[i*8]); + vec_vsx_st(v_dstv, 0, (short *)&dstV[i*8]); + } + + // computation for any less than vector-length items at tail end + if( frag ) { + for (i = 0; i < frag; i++) { + int r_b = input_pixel(&src1[(num_vec*8+i)*4+0]); + int g = input_pixel(&src1[(num_vec*8+i)*4+1]); + int b_r = input_pixel(&src1[(num_vec*8+i)*4+2]); + + dstU[num_vec*8+i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[num_vec*8+i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } + } + +} + +// This is a SIMD version for IBM POWER8 of function rgb64ToUV_half_c_template +// in file libswscale/input.c +static av_always_inline void +rgb64ToUV_half_c_template_vsx(uint16_t *dstU, uint16_t *dstV, + const uint16_t *src1, const uint16_t *src2, + int width, enum AVPixelFormat origin, int32_t *rgb2yuv) +{ + + int i, j; + int num_vec, frag; + int s_opr2; + + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + av_assert1(src1==src2); + + vector int v_ru = vec_splats((int)ru); + vector int v_gu = vec_splats((int)gu); + vector int v_bu = vec_splats((int)bu); + + vector int v_rv = vec_splats((int)rv); + vector int v_gv = vec_splats((int)gv); + vector int v_bv = vec_splats((int)bv); + + s_opr2 = (int)(0x10001 << (RGB2YUV_SHIFT-1)); + + vector int v_opr1 = vec_splats((int)RGB2YUV_SHIFT); + vector int v_opr2 = vec_splats((int)s_opr2); + + vector int v_r, v_g, v_b, v_tmp; + vector short v_tmpi, v_dstu, v_dstv; + + num_vec = width / 8; + frag = width % 8; + + for (i = 0; i < num_vec; i++) { + for (j = 7; j >= 0 ; j--) { + int r_b = (input_pixel(&src1[8 * i * 8 + j + 0]) + input_pixel(&src1[8 * i * 8 + j + 4]) + 1) >> 1; + int g = (input_pixel(&src1[8 * i * 8 + j + 1]) + input_pixel(&src1[8 * i * 8 + j + 5]) + 1) >> 1; + int b_r = (input_pixel(&src1[8 * i * 8 + j + 2]) + input_pixel(&src1[8 * i * 8 + j + 6]) + 1) >> 1; + + v_r[j % 4] = r; + v_g[j % 4] = g; + v_b[j % 4] = b; + + if (!(j % 4)) { + v_tmp = v_ru * v_r; + v_tmp = v_tmp + v_gu * v_g; + v_tmp = v_tmp + v_bu * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dstu[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstu[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstu[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstu[(j / 4) * 4 + 0] = v_tmpi[0]; + + v_tmp = v_rv * v_r; + v_tmp = v_tmp + v_gv * v_g; + v_tmp = v_tmp + v_bv * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dstv[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstv[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstv[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstv[(j / 4) * 4 + 0] = v_tmpi[0]; + } + } + vec_vsx_st(v_dstu, 0, (short *)&dstU[i*8]); + vec_vsx_st(v_dstv, 0, (short *)&dstV[i*8]); + } + + // computation for any less than vector-length items at tail end + if( frag ) { + for (i = 0; i < frag; i++) { + int r_b = (input_pixel(&src1[(num_vec * 8 + i) * 8 + 0]) + + input_pixel(&src1[(num_vec * 8 + i) * 8 + 4]) + 1) >> 1; + int g = (input_pixel(&src1[(num_vec * 8 + i) * 8 + 1]) + + input_pixel(&src1[(num_vec * 8 + i) * 8 + 5]) + 1) >> 1; + int b_r = (input_pixel(&src1[(num_vec * 8 + i) * 8 + 2]) + + input_pixel(&src1[(num_vec * 8 + i) * 8 + 6]) + 1) >> 1; + + dstU[num_vec*8+i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[num_vec*8+i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } + } + +} + +#define rgb64funcs(pattern, BE_LE, origin) \ +static void pattern ## 64 ## BE_LE ## ToY_c_vsx(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\ + int width, uint32_t *rgb2yuv) \ +{ \ + const uint16_t *src = (const uint16_t *) _src; \ + uint16_t *dst = (uint16_t *) _dst; \ + rgb64ToY_c_template_vsx(dst, src, width, origin, rgb2yuv); \ +} \ + \ +static void pattern ## 64 ## BE_LE ## ToUV_c_vsx(uint8_t *_dstU, uint8_t *_dstV, \ + const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ + int width, uint32_t *rgb2yuv) \ +{ \ + const uint16_t *src1 = (const uint16_t *) _src1, \ + *src2 = (const uint16_t *) _src2; \ + uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ + rgb64ToUV_c_template_vsx(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ +} \ + \ +static void pattern ## 64 ## BE_LE ## ToUV_half_c_vsx(uint8_t *_dstU, uint8_t *_dstV, \ + const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ + int width, uint32_t *rgb2yuv) \ +{ \ + const uint16_t *src1 = (const uint16_t *) _src1, \ + *src2 = (const uint16_t *) _src2; \ + uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ + rgb64ToUV_half_c_template_vsx(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ +} + +rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE) +rgb64funcs(bgr, LE, AV_PIX_FMT_BGRA64LE) + +// This is a SIMD version for IBM POWER8 of function rgb48ToY_c_template +// in file libswscale/input.c +static av_always_inline void rgb48ToY_c_template_vsx(uint16_t *dst, + const uint16_t *src, int width, + enum AVPixelFormat origin, + int32_t *rgb2yuv) +{ + + int i, j; + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; + + int num_vec, frag; + + num_vec = width / 8; + frag = width % 8; + + vector int v_ry = vec_splats((int)ry); + vector int v_gy = vec_splats((int)gy); + vector int v_by = vec_splats((int)by); + + int s_opr2; + s_opr2 = (int)(0x2001 << (RGB2YUV_SHIFT-1)); + + vector int v_opr1 = vec_splats((int)RGB2YUV_SHIFT); + vector int v_opr2 = vec_splats((int)s_opr2); + + vector int v_r, v_g, v_b, v_tmp; + vector short v_tmpi, v_dst; + + for (i = 0; i < num_vec; i++) { + for (j = 7; j >= 0 ; j--) { + int r_b = input_pixel(&src[(i * 8 + j) * 3 + 0]); + int g = input_pixel(&src[(i * 8 + j) * 3 + 1]); + int b_r = input_pixel(&src[(i * 8 + j) * 3 + 2]); + + v_r[j % 4] = r; + v_g[j % 4] = g; + v_b[j % 4] = b; + + if (!(j % 4)) { + v_tmp = v_ry * v_r; + v_tmp = v_tmp + v_gy * v_g; + v_tmp = v_tmp + v_by * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dst[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dst[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dst[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dst[(j / 4) * 4 + 0] = v_tmpi[0]; + } + } + vec_vsx_st(v_dst, 0, (short *)&dst[i*8]); + } + + // computation for any less than vector-length items at tail end + if( frag ) { + for (i = 0; i < frag; i++) { + int r_b = input_pixel(&src[(num_vec * 8 + i) * 3 + 0]); + int g = input_pixel(&src[(num_vec * 8 + i) * 3 + 1]); + int b_r = input_pixel(&src[(num_vec * 8 + i) * 3 + 2]); + + dst[num_vec*8+i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } + } + +} + +// This is a SIMD version for IBM POWER8 of function rgb48ToUV_c_template +// in file libswscale/input.c +static av_always_inline void rgb48ToUV_c_template_vsx(uint16_t *dstU, + uint16_t *dstV, + const uint16_t *src1, + const uint16_t *src2, + int width, + enum AVPixelFormat origin, + int32_t *rgb2yuv) +{ + + int i, j; + int num_vec, frag; + int s_opr2; + + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + av_assert1(src1==src2); + + num_vec = width / 8; + frag = width % 8; + + vector int v_ru = vec_splats((int)ru); + vector int v_gu = vec_splats((int)gu); + vector int v_bu = vec_splats((int)bu); + + vector int v_rv = vec_splats((int)rv); + vector int v_gv = vec_splats((int)gv); + vector int v_bv = vec_splats((int)bv); + + s_opr2 = (int)(0x10001 << (RGB2YUV_SHIFT-1)); + + vector int v_opr1 = vec_splats((int)RGB2YUV_SHIFT); + vector int v_opr2 = vec_splats((int)s_opr2); + + vector int v_r, v_g, v_b, v_tmp; + vector short v_tmpi, v_dstu, v_dstv; + + for (i = 0; i < num_vec; i++) { + for (j = 7; j >= 0 ; j--) { + int r_b = input_pixel(&src1[(i * 8 + j) * 3 + 0]); + int g = input_pixel(&src1[(i * 8 + j) * 3 + 1]); + int b_r = input_pixel(&src1[(i * 8 + j) * 3 + 2]); + + v_r[j % 4] = r; + v_g[j % 4] = g; + v_b[j % 4] = b; + + if(!(j % 4)) { + v_tmp = v_ru * v_r; + v_tmp = v_tmp + v_gu * v_g; + v_tmp = v_tmp + v_bu * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dstu[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstu[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstu[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstu[(j / 4) * 4 + 0] = v_tmpi[0]; + + v_tmp = v_rv * v_r; + v_tmp = v_tmp + v_gv * v_g; + v_tmp = v_tmp + v_bv * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dstv[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstv[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstv[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstv[(j / 4) * 4 + 0] = v_tmpi[0]; + } + } + vec_vsx_st(v_dstu, 0, (short *)&dstU[i*8]); + vec_vsx_st(v_dstv, 0, (short *)&dstV[i*8]); + } + + // computation for any less than vector-length items at tail end + if( frag ) { + for (i = 0; i < frag; i++) { + int r_b = input_pixel(&src1[(num_vec * 8 + i) * 3 + 0]); + int g = input_pixel(&src1[(num_vec * 8 + i) * 3 + 1]); + int b_r = input_pixel(&src1[(num_vec * 8 + i) * 3 + 2]); + + dstU[num_vec*8+i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[num_vec*8+i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } + } + +} + +// This is a SIMD version for IBM POWER8 of function rgb48ToUV_half_c_template +// in file libswscale/input.c +static av_always_inline void rgb48ToUV_half_c_template_vsx(uint16_t *dstU, + uint16_t *dstV, + const uint16_t *src1, + const uint16_t *src2, + int width, + enum AVPixelFormat origin, + int32_t *rgb2yuv) +{ + + int i, j; + int num_vec, frag; + + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + av_assert1(src1==src2); + + num_vec = width / 8; + frag = width % 8; + + vector int v_ru = vec_splats((int)ru); + vector int v_gu = vec_splats((int)gu); + vector int v_bu = vec_splats((int)bu); + + vector int v_rv = vec_splats((int)rv); + vector int v_gv = vec_splats((int)gv); + vector int v_bv = vec_splats((int)bv); + + int s_opr2; + s_opr2 = (int)(0x10001 << (RGB2YUV_SHIFT-1)); + + vector int v_opr1 = vec_splats((int)RGB2YUV_SHIFT); + vector int v_opr2 = vec_splats((int)s_opr2); + + vector int v_r, v_g, v_b, v_tmp; + vector short v_tmpi, v_dstu, v_dstv; + + for (i = 0; i < num_vec; i++) { + for (j = 7; j >= 0 ; j--) { + int r_b = (input_pixel(&src1[6 * (i * 8 + j) + 0]) + + input_pixel(&src1[6 * (i * 8 + j) + 3]) + 1) >> 1; + int g = (input_pixel(&src1[6 * (i * 8 + j) + 1]) + + input_pixel(&src1[6 * (i * 8 + j) + 4]) + 1) >> 1; + int b_r = (input_pixel(&src1[6 * (i * 8 + j) + 2]) + + input_pixel(&src1[6 * (i * 8 + j) + 5]) + 1) >> 1; + + v_r[j % 4] = r; + v_g[j % 4] = g; + v_b[j % 4] = b; + + if(!(j % 4)) { + v_tmp = v_ru * v_r; + v_tmp = v_tmp + v_gu * v_g; + v_tmp = v_tmp + v_bu * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dstu[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstu[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstu[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstu[(j / 4) * 4 + 0] = v_tmpi[0]; + + v_tmp = v_rv * v_r; + v_tmp = v_tmp + v_gv * v_g; + v_tmp = v_tmp + v_bv * v_b; + v_tmp = v_tmp + v_opr2; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr1); + + v_tmpi = (vector short)v_tmp; + v_dstv[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstv[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstv[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstv[(j / 4) * 4 + 0] = v_tmpi[0]; + } + } + vec_vsx_st(v_dstu, 0, (short *)&dstU[i*8]); + vec_vsx_st(v_dstv, 0, (short *)&dstV[i*8]); + } + + // computation for any less than vector-length items at tail end + if( frag ) { + for (i = 0; i < frag; i++) { + int r_b = (input_pixel(&src1[6 * (num_vec * 8 + i) + 0]) + + input_pixel(&src1[6 * (num_vec * 8 + i) + 3]) + 1) >> 1; + int g = (input_pixel(&src1[6 * (num_vec * 8 + i) + 1]) + + input_pixel(&src1[6 * (num_vec * 8 + i) + 4]) + 1) >> 1; + int b_r = (input_pixel(&src1[6 * (num_vec * 8 + i) + 2]) + + input_pixel(&src1[6 * (num_vec * 8 + i) + 5]) + 1) >> 1; + + dstU[num_vec*8+i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[num_vec*8+i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } + } +} + +#undef r +#undef b +#undef input_pixel + +#define rgb48funcs(pattern, BE_LE, origin) \ +static void pattern ## 48 ## BE_LE ## ToY_c_vsx(uint8_t *_dst, \ + const uint8_t *_src, \ + const uint8_t *unused0, const uint8_t *unused1,\ + int width, \ + uint32_t *rgb2yuv) \ +{ \ + const uint16_t *src = (const uint16_t *)_src; \ + uint16_t *dst = (uint16_t *)_dst; \ + rgb48ToY_c_template_vsx(dst, src, width, origin, rgb2yuv); \ +} \ + \ +static void pattern ## 48 ## BE_LE ## ToUV_c_vsx(uint8_t *_dstU, \ + uint8_t *_dstV, \ + const uint8_t *unused0, \ + const uint8_t *_src1, \ + const uint8_t *_src2, \ + int width, \ + uint32_t *rgb2yuv) \ +{ \ + const uint16_t *src1 = (const uint16_t *)_src1, \ + *src2 = (const uint16_t *)_src2; \ + uint16_t *dstU = (uint16_t *)_dstU, \ + *dstV = (uint16_t *)_dstV; \ + rgb48ToUV_c_template_vsx(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ +} \ + \ +static void pattern ## 48 ## BE_LE ## ToUV_half_c_vsx(uint8_t *_dstU, \ + uint8_t *_dstV, \ + const uint8_t *unused0, \ + const uint8_t *_src1, \ + const uint8_t *_src2, \ + int width, \ + uint32_t *rgb2yuv) \ +{ \ + const uint16_t *src1 = (const uint16_t *)_src1, \ + *src2 = (const uint16_t *)_src2; \ + uint16_t *dstU = (uint16_t *)_dstU, \ + *dstV = (uint16_t *)_dstV; \ + rgb48ToUV_half_c_template_vsx(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ +} + +rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE) +rgb48funcs(bgr, LE, AV_PIX_FMT_BGR48LE) + +#define input_pixel(i) ((origin == AV_PIX_FMT_RGBA || \ + origin == AV_PIX_FMT_BGRA || \ + origin == AV_PIX_FMT_ARGB || \ + origin == AV_PIX_FMT_ABGR) \ + ? AV_RN32A(&src[(i) * 4]) \ + : (isBE(origin) ? AV_RB16(&src[(i) * 2]) \ + : AV_RL16(&src[(i) * 2]))) + +// This is a SIMD version for IBM POWER8 of function rgb16_32ToY_c_template +// in file libswscale/input.c +static av_always_inline void rgb16_32ToY_c_template_vsx(int16_t *dst, + const uint8_t *src, + int width, + enum AVPixelFormat origin, + int shr, int shg, + int shb, int shp, + int maskr, int maskg, + int maskb, int rsh, + int gsh, int bsh, int S, + int32_t *rgb2yuv) +{ + const int ry = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh; + const unsigned rnd = (32<<((S)-1)) + (1<<(S-7)); + int i, j; + + int num_vec, frag; + + num_vec = width / 8; + frag = width % 8; + + vector int v_ry = vec_splats((int)ry); + vector int v_gy = vec_splats((int)gy); + vector int v_by = vec_splats((int)by); + + vector int v_rnd = vec_splats((int)rnd); + vector int v_opr = vec_splats((int)((S)-6)); + + vector int v_r, v_b, v_g, v_tmp; + vector short v_tmpi, v_dst; + + for (i = 0; i < num_vec; i++) { + for (j = 7; j >= 0; j--) { + int px = input_pixel(i * 8 + j) >> shp; + int b = (px & maskb) >> shb; + int g = (px & maskg) >> shg; + int r = (px & maskr) >> shr; + + v_r[j % 4] = r; + v_g[j % 4] = g; + v_b[j % 4] = b; + + if (!(j % 4)) { + v_tmp = v_ry * v_r; + v_tmp = v_tmp + v_gy * v_g; + v_tmp = v_tmp + v_by * v_b; + v_tmp = v_tmp + v_rnd; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr); + + v_tmpi = (vector short)v_tmp; + v_dst[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dst[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dst[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dst[(j / 4) * 4 + 0] = v_tmpi[0]; + } + } + vec_vsx_st(v_dst, 0, (short *)&dst[i * 8]); + } + + if ( frag ) { + for (i = 0; i < frag; i++) { + int px = input_pixel(num_vec * 8 + i) >> shp; + int b = (px & maskb) >> shb; + int g = (px & maskg) >> shg; + int r = (px & maskr) >> shr; + + dst[num_vec * 8 + i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6); + } + } +} + +// This is a SIMD version for IBM POWER8 of function rgb16_32ToUV_c_template +// in file libswscale/input.c +static av_always_inline void rgb16_32ToUV_c_template_vsx(int16_t *dstU, + int16_t *dstV, + const uint8_t *src, + int width, + enum AVPixelFormat origin, + int shr, int shg, + int shb, int shp, + int maskr, int maskg, + int maskb, int rsh, + int gsh, int bsh, int S, + int32_t *rgb2yuv) +{ + const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh, + rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh; + const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7)); + int i, j; + + int num_vec, frag; + + num_vec = width / 8; + frag = width % 8; + + vector int v_ru = vec_splats((int)ru); + vector int v_gu = vec_splats((int)gu); + vector int v_bu = vec_splats((int)bu); + + vector int v_rv = vec_splats((int)rv); + vector int v_gv = vec_splats((int)gv); + vector int v_bv = vec_splats((int)bv); + + vector int v_rnd = vec_splats((int)rnd); + vector int v_opr = vec_splats((int)((S)-6)); + + vector int v_r, v_b, v_g; + vector int v_tmp; + vector short v_tmpi, v_dstu, v_dstv; + + for (i = 0; i < num_vec; i++) { + for(j = 7; j >= 0; j--) { + int px = input_pixel(i * 8 + j) >> shp; + int b = (px & maskb) >> shb; + int g = (px & maskg) >> shg; + int r = (px & maskr) >> shr; + + v_r[j % 4] = r; + v_g[j % 4] = g; + v_b[j % 4] = b; + + if (!(j % 4)) { + v_tmp = v_ru * v_r; + v_tmp = v_tmp + v_gu * v_g; + v_tmp = v_tmp + v_bu * v_b; + v_tmp = v_tmp + v_rnd; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr); + + v_tmpi = (vector short)v_tmp; + v_dstu[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstu[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstu[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstu[(j / 4) * 4 + 0] = v_tmpi[0]; + + v_tmp = v_rv * v_r; + v_tmp = v_tmp + v_gv * v_g; + v_tmp = v_tmp + v_bv * v_b; + v_tmp = v_tmp + v_rnd; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr); + + v_tmpi = (vector short)v_tmp; + v_dstv[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstv[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstv[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstv[(j / 4) * 4 + 0] = v_tmpi[0]; + } + } + vec_vsx_st(v_dstu, 0, (short *)&dstU[i * 8]); + vec_vsx_st(v_dstv, 0, (short *)&dstV[i * 8]); + } + + if ( frag ) { + for (i = 0; i < frag; i++) { + int px = input_pixel(num_vec * 8 + i) >> shp; + int b = (px & maskb) >> shb; + int g = (px & maskg) >> shg; + int r = (px & maskr) >> shr; + + dstU[num_vec * 8 + i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6); + dstV[num_vec * 8 + i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6); + } + } +} + +// This is a SIMD version for IBM POWER8 of function rgb16_32ToUV_half_c_template +// in file libswscale/input.c +static av_always_inline void rgb16_32ToUV_half_c_template_vsx(int16_t *dstU, + int16_t *dstV, + const uint8_t *src, + int width, + enum AVPixelFormat origin, + int shr, int shg, + int shb, int shp, + int maskr, int maskg, + int maskb, int rsh, + int gsh, int bsh, int S, + int32_t *rgb2yuv) +{ + const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh, + rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh, + maskgx = ~(maskr | maskb); + const unsigned rnd = (256U<<(S)) + (1<<(S-6)); + int i, j; + + int num_vec, frag; + + num_vec = width / 8; + frag = width % 8; + + vector int v_ru = vec_splats((int)ru); + vector int v_gu = vec_splats((int)gu); + vector int v_bu = vec_splats((int)bu); + + vector int v_rv = vec_splats((int)rv); + vector int v_gv = vec_splats((int)gv); + vector int v_bv = vec_splats((int)bv); + + vector int v_rnd = vec_splats((int)rnd); + vector int v_opr = vec_splats((int)((S)-6+1)); + + vector int v_r, v_b, v_g; + vector int v_tmp; + vector short v_tmpi, v_dstu, v_dstv; + + maskr |= maskr << 1; + maskb |= maskb << 1; + maskg |= maskg << 1; + + for (i = 0; i < num_vec; i++) { + for (j = 7; j >= 0 ; j--) { + unsigned px0 = input_pixel(2 * (i * 8 + j) + 0) >> shp; + unsigned px1 = input_pixel(2 * (i * 8 + j) + 1) >> shp; + int b, r, g = (px0 & maskgx) + (px1 & maskgx); + int rb = px0 + px1 - g; + + b = (rb & maskb) >> shb; + if (shp || + origin == AV_PIX_FMT_BGR565LE || origin == AV_PIX_FMT_BGR565BE || + origin == AV_PIX_FMT_RGB565LE || origin == AV_PIX_FMT_RGB565BE) { + g >>= shg; + } else { + g = (g & maskg) >> shg; + } + r = (rb & maskr) >> shr; + + v_r[j % 4] = r; + v_g[j % 4] = g; + v_b[j % 4] = b; + + if (!(j % 4)) { + v_tmp = v_ru * v_r; + v_tmp = v_tmp + v_gu * v_g; + v_tmp = v_tmp + v_bu * v_b; + v_tmp = v_tmp + v_rnd; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr); + + v_tmpi = (vector short)v_tmp; + v_dstu[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstu[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstu[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstu[(j / 4) * 4 + 0] = v_tmpi[0]; + + v_tmp = v_rv * v_r; + v_tmp = v_tmp + v_gv * v_g; + v_tmp = v_tmp + v_bv * v_b; + v_tmp = v_tmp + v_rnd; + v_tmp = vec_sr(v_tmp, (vector unsigned int)v_opr); + + v_tmpi = (vector short)v_tmp; + v_dstv[(j / 4) * 4 + 3] = v_tmpi[6]; + v_dstv[(j / 4) * 4 + 2] = v_tmpi[4]; + v_dstv[(j / 4) * 4 + 1] = v_tmpi[2]; + v_dstv[(j / 4) * 4 + 0] = v_tmpi[0]; + } + } + vec_vsx_st(v_dstu, 0, (short *)&dstU[i * 8]); + vec_vsx_st(v_dstv, 0, (short *)&dstV[i * 8]); + } + + if ( frag ) { + for (i = 0; i < frag; i++) { + unsigned px0 = input_pixel(2 * (num_vec * 8 + i) + 0) >> shp; + unsigned px1 = input_pixel(2 * (num_vec * 8 + i) + 1) >> shp; + int b, r, g = (px0 & maskgx) + (px1 & maskgx); + int rb = px0 + px1 - g; + + b = (rb & maskb) >> shb; + if (shp || + origin == AV_PIX_FMT_BGR565LE || origin == AV_PIX_FMT_BGR565BE || + origin == AV_PIX_FMT_RGB565LE || origin == AV_PIX_FMT_RGB565BE) { + g >>= shg; + } else { + g = (g & maskg) >> shg; + } + r = (rb & maskr) >> shr; + + dstU[num_vec * 8 + i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1); + dstV[num_vec * 8 + i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1); + } + } +} + +#undef input_pixel + +#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ + maskg, maskb, rsh, gsh, bsh, S) \ +static void name ## ToY_c_vsx(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \ + int width, uint32_t *tab) \ +{ \ + rgb16_32ToY_c_template_vsx((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \ + maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \ +} \ + \ +static void name ## ToUV_c_vsx(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ + int width, uint32_t *tab) \ +{ \ + rgb16_32ToUV_c_template_vsx((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ + shr, shg, shb, shp, \ + maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\ +} \ + \ +static void name ## ToUV_half_c_vsx(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *unused0, const uint8_t *src, \ + const uint8_t *dummy, \ + int width, uint32_t *tab) \ +{ \ + rgb16_32ToUV_half_c_template_vsx((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ + shr, shg, shb, shp, \ + maskr, maskg, maskb, \ + rsh, gsh, bsh, S, tab); \ +} + +rgb16_32_wrapper(AV_PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7) +rgb16_32_wrapper(AV_PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4) +rgb16_32_wrapper(AV_PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) +rgb16_32_wrapper(AV_PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) + +#endif /* HAVE_VSX */ + +av_cold void ff_sws_init_input_funcs_vsx(SwsContext *c) +{ +#if HAVE_VSX + + enum AVPixelFormat srcFormat = c->srcFormat; + + if (c->chrSrcHSubSample) { + switch (srcFormat) { + case AV_PIX_FMT_RGBA64LE: + c->chrToYV12 = rgb64LEToUV_half_c_vsx; + break; + case AV_PIX_FMT_BGRA64LE: + c->chrToYV12 = bgr64LEToUV_half_c_vsx; + break; + case AV_PIX_FMT_RGB48LE: + c->chrToYV12 = rgb48LEToUV_half_c_vsx; + break; + case AV_PIX_FMT_BGR48LE: + c->chrToYV12 = bgr48LEToUV_half_c_vsx; + break; + case AV_PIX_FMT_BGR565LE: + c->chrToYV12 = bgr16leToUV_half_c_vsx; + break; + case AV_PIX_FMT_BGR555LE: + c->chrToYV12 = bgr15leToUV_half_c_vsx; + break; + case AV_PIX_FMT_BGR444LE: + c->chrToYV12 = bgr12leToUV_half_c_vsx; + break; + case AV_PIX_FMT_RGB565LE: + c->chrToYV12 = rgb16leToUV_half_c_vsx; + break; + case AV_PIX_FMT_RGB555LE: + c->chrToYV12 = rgb15leToUV_half_c_vsx; + break; + case AV_PIX_FMT_RGB444LE: + c->chrToYV12 = rgb12leToUV_half_c_vsx; + break; + } + } else { + switch (srcFormat) { + case AV_PIX_FMT_RGBA64LE: + c->chrToYV12 = rgb64LEToUV_c_vsx; + break; + case AV_PIX_FMT_BGRA64LE: + c->chrToYV12 = bgr64LEToUV_c_vsx; + break; + case AV_PIX_FMT_RGB48LE: + c->chrToYV12 = rgb48LEToUV_c_vsx; + break; + case AV_PIX_FMT_BGR48LE: + c->chrToYV12 = bgr48LEToUV_c_vsx; + break; + case AV_PIX_FMT_BGR565LE: + c->chrToYV12 = bgr16leToUV_c_vsx; + break; + case AV_PIX_FMT_BGR555LE: + c->chrToYV12 = bgr15leToUV_c_vsx; + break; + case AV_PIX_FMT_BGR444LE: + c->chrToYV12 = bgr12leToUV_c_vsx; + break; + case AV_PIX_FMT_RGB565LE: + c->chrToYV12 = rgb16leToUV_c_vsx; + break; + case AV_PIX_FMT_RGB555LE: + c->chrToYV12 = rgb15leToUV_c_vsx; + break; + case AV_PIX_FMT_RGB444LE: + c->chrToYV12 = rgb12leToUV_c_vsx; + break; + } + } + switch (srcFormat) { + case AV_PIX_FMT_BGR565LE: + c->lumToYV12 = bgr16leToY_c_vsx; + break; + case AV_PIX_FMT_BGR555LE: + c->lumToYV12 = bgr15leToY_c_vsx; + break; + case AV_PIX_FMT_BGR444LE: + c->lumToYV12 = bgr12leToY_c_vsx; + break; + case AV_PIX_FMT_RGB565LE: + c->lumToYV12 = rgb16leToY_c_vsx; + break; + case AV_PIX_FMT_RGB555LE: + c->lumToYV12 = rgb15leToY_c_vsx; + break; + case AV_PIX_FMT_RGB444LE: + c->lumToYV12 = rgb12leToY_c_vsx; + break; + case AV_PIX_FMT_RGB48LE: + c->lumToYV12 = rgb48LEToY_c_vsx; + break; + case AV_PIX_FMT_BGR48LE: + c->lumToYV12 = bgr48LEToY_c_vsx; + break; + case AV_PIX_FMT_RGBA64LE: + c->lumToYV12 = rgb64LEToY_c_vsx; + break; + case AV_PIX_FMT_BGRA64LE: + c->lumToYV12 = bgr64LEToY_c_vsx; + break; + } +#endif /* HAVE_VSX */ +} + diff --git a/libswscale/swscale.c b/libswscale/swscale.c index bbea0fe..3e4d177 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -566,6 +566,9 @@ static av_cold void sws_init_swscale(SwsContext *c) ff_sws_init_input_funcs(c); + if (HAVE_VSX && (!HAVE_BIGENDIAN)) { + ff_sws_init_input_funcs_vsx(c); + } if (c->srcBpc == 8) { if (c->dstBpc <= 14) { diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 3f72f98..9283ac8 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -881,6 +881,7 @@ void ff_get_unscaled_swscale_aarch64(SwsContext *c); SwsFunc ff_getSwsFunc(SwsContext *c); void ff_sws_init_input_funcs(SwsContext *c); +void ff_sws_init_input_funcs_vsx(SwsContext *c); void ff_sws_init_output_funcs(SwsContext *c, yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX, -- 2.4.11 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel