From: "Ronald S. Bultje" <[email protected]>

Fix an overflow in 16-bit vertical scaling (e.g. using -flags lanczos),
fix a crash in bilinear vertical scaling when using C code if output
format was 8-bpc RGB but without alpha (e.g. RGB24), fix an underflow
in 16-bit horizontal scaling when using e.g. lanczos scaling, and fix
a bug when converting from 16-bit to 8-bit and using MMX optimizations,
where we would wrongly use the second (unused) hald of the U plane,
rather than the V plane, to write out the V pixels.
---
 libswscale/swscale.c              |   23 ++++++++++++-----------
 libswscale/utils.c                |    2 +-
 libswscale/x86/swscale_template.c |   24 ++++++++++++------------
 3 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index dacf40e..74c9225 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -206,7 +206,7 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const 
int32_t **lumSrc,
     int i;
     uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
              *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
-    int shift = 15 + 16 - output_bits;
+    int shift = 15 + 16 - output_bits - 1;
 
 #define output_pixel(pos, val) \
     if (big_endian) { \
@@ -223,24 +223,24 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const 
int32_t **lumSrc,
         } \
     }
     for (i = 0; i < dstW; i++) {
-        int val = 1 << (30-output_bits);
+        int val = 1 << (30-output_bits - 1);
         int j;
 
         for (j = 0; j < lumFilterSize; j++)
-            val += lumSrc[j][i] * lumFilter[j];
+            val += (lumSrc[j][i] * lumFilter[j]) >> 1;
 
         output_pixel(&yDest[i], val);
     }
 
     if (uDest) {
         for (i = 0; i < chrDstW; i++) {
-            int u = 1 << (30-output_bits);
-            int v = 1 << (30-output_bits);
+            int u = 1 << (30-output_bits - 1);
+            int v = 1 << (30-output_bits - 1);
             int j;
 
             for (j = 0; j < chrFilterSize; j++) {
-                u += chrUSrc[j][i] * chrFilter[j];
-                v += chrVSrc[j][i] * chrFilter[j];
+                u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
+                v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
             }
 
             output_pixel(&uDest[i], u);
@@ -250,11 +250,11 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const 
int32_t **lumSrc,
 
     if (CONFIG_SWSCALE_ALPHA && aDest) {
         for (i = 0; i < dstW; i++) {
-            int val = 1 << (30-output_bits);
+            int val = 1 << (30-output_bits - 1);
             int j;
 
             for (j = 0; j < lumFilterSize; j++)
-                val += alpSrc[j][i] * lumFilter[j];
+                val += (alpSrc[j][i] * lumFilter[j]) >> 1;
 
             output_pixel(&aDest[i], val);
         }
@@ -1147,7 +1147,8 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
-                  *abuf0 = abuf[0], *abuf1 = abuf[1];
+                  *abuf0 = hasAlpha ? abuf[0] : NULL,
+                  *abuf1 = hasAlpha ? abuf[1] : NULL;
     int  yalpha1 = 4095 - yalpha;
     int uvalpha1 = 4095 - uvalpha;
     int i;
@@ -1865,7 +1866,7 @@ static void hScale16_c(SwsContext *c, int16_t *_dst, int 
dstW, const uint8_t *_s
     for (i = 0; i < dstW; i++) {
         int j;
         int srcPos = filterPos[i];
-        unsigned int val = 0;
+        int val = 0;
 
         for (j = 0; j < filterSize; j++) {
             val += src[srcPos + j] * filter[filterSize * i + j];
diff --git a/libswscale/utils.c b/libswscale/utils.c
index eea32a1..99eab11 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1040,7 +1040,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, 
SwsFilter *dstFilter)
     for (i=0; i<c->vChrBufSize; i++) {
         FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1, 
fail);
         c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
-        c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] 
+ dst_stride_px;
+        c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] 
+ (dst_stride >> 1);
     }
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
         for (i=0; i<c->vLumBufSize; i++) {
diff --git a/libswscale/x86/swscale_template.c 
b/libswscale/x86/swscale_template.c
index f58ac52..26cd274 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -81,7 +81,7 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t 
*lumFilter,
             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
 
     if (uDest) {
-        x86_reg uv_off = c->uv_off;
+        x86_reg uv_off = c->uv_offx2 >> 1;
         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + 
uv_off, uv_off)
     }
@@ -164,7 +164,7 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const 
int16_t *lumFilter,
             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
 
     if (uDest) {
-        x86_reg uv_off = c->uv_off;
+        x86_reg uv_off = c->uv_offx2 >> 1;
         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, 
chrDstW + uv_off, uv_off)
     }
@@ -473,7 +473,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
         YSCALEYUV2PACKEDX_ACCURATE
@@ -506,7 +506,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
         YSCALEYUV2PACKEDX
@@ -563,7 +563,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -587,7 +587,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -640,7 +640,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -664,7 +664,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -797,7 +797,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -821,7 +821,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -862,7 +862,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX_ACCURATE
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -883,7 +883,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const 
int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off << 1;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
-- 
1.7.5.4

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to