From: "Ronald S. Bultje" <[email protected]>
Fix an overflow in 16-bit vertical scaling (e.g. using -flags lanczos),
fix a crash in bilinear vertical scaling when using C code if output
format was 8-bpc RGB but without alpha (e.g. RGB24), fix an underflow
in 16-bit horizontal scaling when using e.g. lanczos scaling, and fix
a bug when converting from 16-bit to 8-bit and using MMX optimizations,
where we would wrongly use the second (unused) hald of the U plane,
rather than the V plane, to write out the V pixels.
---
libswscale/swscale.c | 23 ++++++++++++-----------
libswscale/utils.c | 2 +-
libswscale/x86/swscale_template.c | 24 ++++++++++++------------
3 files changed, 25 insertions(+), 24 deletions(-)
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index dacf40e..74c9225 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -206,7 +206,7 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const
int32_t **lumSrc,
int i;
uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
- int shift = 15 + 16 - output_bits;
+ int shift = 15 + 16 - output_bits - 1;
#define output_pixel(pos, val) \
if (big_endian) { \
@@ -223,24 +223,24 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const
int32_t **lumSrc,
} \
}
for (i = 0; i < dstW; i++) {
- int val = 1 << (30-output_bits);
+ int val = 1 << (30-output_bits - 1);
int j;
for (j = 0; j < lumFilterSize; j++)
- val += lumSrc[j][i] * lumFilter[j];
+ val += (lumSrc[j][i] * lumFilter[j]) >> 1;
output_pixel(&yDest[i], val);
}
if (uDest) {
for (i = 0; i < chrDstW; i++) {
- int u = 1 << (30-output_bits);
- int v = 1 << (30-output_bits);
+ int u = 1 << (30-output_bits - 1);
+ int v = 1 << (30-output_bits - 1);
int j;
for (j = 0; j < chrFilterSize; j++) {
- u += chrUSrc[j][i] * chrFilter[j];
- v += chrVSrc[j][i] * chrFilter[j];
+ u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
+ v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
}
output_pixel(&uDest[i], u);
@@ -250,11 +250,11 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const
int32_t **lumSrc,
if (CONFIG_SWSCALE_ALPHA && aDest) {
for (i = 0; i < dstW; i++) {
- int val = 1 << (30-output_bits);
+ int val = 1 << (30-output_bits - 1);
int j;
for (j = 0; j < lumFilterSize; j++)
- val += alpSrc[j][i] * lumFilter[j];
+ val += (alpSrc[j][i] * lumFilter[j]) >> 1;
output_pixel(&aDest[i], val);
}
@@ -1147,7 +1147,8 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
- *abuf0 = abuf[0], *abuf1 = abuf[1];
+ *abuf0 = hasAlpha ? abuf[0] : NULL,
+ *abuf1 = hasAlpha ? abuf[1] : NULL;
int yalpha1 = 4095 - yalpha;
int uvalpha1 = 4095 - uvalpha;
int i;
@@ -1865,7 +1866,7 @@ static void hScale16_c(SwsContext *c, int16_t *_dst, int
dstW, const uint8_t *_s
for (i = 0; i < dstW; i++) {
int j;
int srcPos = filterPos[i];
- unsigned int val = 0;
+ int val = 0;
for (j = 0; j < filterSize; j++) {
val += src[srcPos + j] * filter[filterSize * i + j];
diff --git a/libswscale/utils.c b/libswscale/utils.c
index eea32a1..99eab11 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1040,7 +1040,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
for (i=0; i<c->vChrBufSize; i++) {
FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1,
fail);
c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
- c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i]
+ dst_stride_px;
+ c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i]
+ (dst_stride >> 1);
}
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
for (i=0; i<c->vLumBufSize; i++) {
diff --git a/libswscale/x86/swscale_template.c
b/libswscale/x86/swscale_template.c
index f58ac52..26cd274 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -81,7 +81,7 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t
*lumFilter,
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
if (uDest) {
- x86_reg uv_off = c->uv_off;
+ x86_reg uv_off = c->uv_offx2 >> 1;
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW +
uv_off, uv_off)
}
@@ -164,7 +164,7 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const
int16_t *lumFilter,
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
if (uDest) {
- x86_reg uv_off = c->uv_off;
+ x86_reg uv_off = c->uv_offx2 >> 1;
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off,
chrDstW + uv_off, uv_off)
}
@@ -473,7 +473,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX_ACCURATE
@@ -506,7 +506,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX
@@ -563,7 +563,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -587,7 +587,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -640,7 +640,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -664,7 +664,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -797,7 +797,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -821,7 +821,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -862,7 +862,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -883,7 +883,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const
int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off << 1;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
--
1.7.5.4
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel