Allow odd widths for conversion it costs very little and simplifies setup slightly. x86 asm will fall back to the C code if width is odd. Round to nearest rather than just down. This reduces the Y error reported by tests/swscale from 3 to 1. x86 asm doesn't mirror the C so exact correspondence isn't an issue there.
Signed-off-by: John Cox <j...@kynesim.co.uk> --- libswscale/rgb2rgb_template.c | 42 ++++++++++++++++++------------- libswscale/swscale_unscaled.c | 5 ++-- libswscale/x86/rgb2rgb_template.c | 5 ++++ 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index e57bfa6545..5503e58a29 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -656,6 +656,8 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; int y; const int chromWidth = width >> 1; + const int32_t ky = ((16 << 1) + 1) << (RGB2YUV_SHIFT - 1); + const int32_t kc = ((128 << 1) + 1) << (RGB2YUV_SHIFT - 1); for (y = 0; y < height; y += 2) { int i; @@ -664,9 +666,9 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, unsigned int g = src[6 * i + 1]; unsigned int r = src[6 * i + 2]; - unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; - unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; + unsigned int Y = (ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT; + unsigned int V = (rv * r + gv * g + bv * b + kc) >> RGB2YUV_SHIFT; + unsigned int U = (ru * r + gu * g + bu * b + kc) >> RGB2YUV_SHIFT; udst[i] = U; vdst[i] = V; @@ -676,30 +678,36 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, g = src[6 * i + 4]; r = src[6 * i + 5]; - Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + Y = ((ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT); ydst[2 * i + 1] = Y; } - ydst += lumStride; - src += srcStride; - - if (y+1 == height) - break; - - for (i = 0; i < chromWidth; i++) { + if ((width & 1) != 0) { unsigned int b = src[6 * i + 0]; unsigned int g = src[6 * i + 1]; unsigned int r = src[6 * i + 2]; - unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; + unsigned int Y = (ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT; + unsigned int V = (rv * r + gv * g + bv * b + kc) >> RGB2YUV_SHIFT; + unsigned int U = (ru * r + gu * g + bu * b + kc) >> RGB2YUV_SHIFT; + udst[i] = U; + vdst[i] = V; ydst[2 * i] = Y; + } + ydst += lumStride; + src += srcStride; - b = src[6 * i + 3]; - g = src[6 * i + 4]; - r = src[6 * i + 5]; + if (y+1 == height) + break; - Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - ydst[2 * i + 1] = Y; + for (i = 0; i < width; i++) { + unsigned int b = src[3 * i + 0]; + unsigned int g = src[3 * i + 1]; + unsigned int r = src[3 * i + 2]; + + unsigned int Y = (ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT; + + ydst[i] = Y; } udst += chromStride; vdst += chromStride; diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 751bdcb2e4..e10f967755 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -1994,7 +1994,6 @@ void ff_get_unscaled_swscale(SwsContext *c) const enum AVPixelFormat dstFormat = c->dstFormat; const int flags = c->flags; const int dstH = c->dstH; - const int dstW = c->dstW; int needsDither; needsDither = isAnyRGB(dstFormat) && @@ -2052,12 +2051,12 @@ void ff_get_unscaled_swscale(SwsContext *c) /* bgr24toYV12 */ if (srcFormat == AV_PIX_FMT_BGR24 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && - !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)) && !(dstW&1)) + !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT))) c->convert_unscaled = bgr24ToYv12Wrapper; /* rgb24toYV12 */ if (srcFormat == AV_PIX_FMT_RGB24 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && - !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)) && !(dstW&1)) + !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT))) c->convert_unscaled = rgb24ToYv12Wrapper; /* RGB/BGR -> RGB/BGR (no dither needed forms) */ diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index dc2b4e205a..f90527aa08 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -1555,6 +1555,11 @@ static inline void RENAME(bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ int y; const x86_reg chromWidth= width>>1; + if ((width & 1) != 0) { + ff_bgr24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv); + return; + } + if (height > 2) { ff_bgr24toyv12_c(src, ydst, udst, vdst, width, 2, lumStride, chromStride, srcStride, rgb2yuv); src += 2*srcStride; -- 2.39.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".