[FFmpeg-soc] [PATCH] Some fast_bilinear patches.

Ramiro Polla Thu, 23 Jul 2009 18:56:42 -0700

Hi,

Attached are some patches for the fast bilinear scaler, while I wait
on Michael's answer on the previous message regarding fast_bilinear.


Hmm, should these patches go to -devel or -soc?

Ramiro Polla

From 05c4bf02481265155b7d25438e9b32ceefcd578e Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 21:56:57 -0300
Subject: [PATCH] Avoid duplication in initMMX2HScaler()

---
 swscale.c |   61 +++++++++++++++++++++++++++++--------------------------------
 1 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/swscale.c b/swscale.c
index bbeef61..b11786e 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1790,6 +1790,10 @@ error:
 #ifdef COMPILE_MMX2
 static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
 {
+    uint8_t *fragment;
+    x86_reg imm8OfPShufW1;
+    x86_reg imm8OfPShufW2;
+    x86_reg fragmentLength;
     uint8_t *fragmentA;
     x86_reg imm8OfPShufW1A;
     x86_reg imm8OfPShufW2A;
@@ -1897,6 +1901,9 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
             int b=((xpos+xInc)>>16) - xx;
             int c=((xpos+xInc*2)>>16) - xx;
             int d=((xpos+xInc*3)>>16) - xx;
+            int maxShift;
+            int shift=0;
+            int inc;
 
             filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
             filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
@@ -1906,51 +1913,41 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
 
             if (d+1<4)
             {
-                int maxShift= 3-(d+1);
-                int shift=0;
-
-                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
-
-                funnyCode[fragmentPos + imm8OfPShufW1B]=
-                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
-                funnyCode[fragmentPos + imm8OfPShufW2B]=
-                    a | (b<<2) | (c<<4) | (d<<6);
-
-                if (i+3>=dstW) shift=maxShift; //avoid overread
-                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
-
-                if (shift && i>=shift)
-                {
-                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
-                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
-                    filterPos[i/2]-=shift;
-                }
-
-                fragmentPos+= fragmentLengthB;
+                fragment = fragmentB;
+                imm8OfPShufW1 = imm8OfPShufW1B;
+                imm8OfPShufW2 = imm8OfPShufW2B;
+                fragmentLength = fragmentLengthB;
+                inc = 1;
             }
             else
             {
-                int maxShift= 3-d;
-                int shift=0;
+                fragment = fragmentA;
+                imm8OfPShufW1 = imm8OfPShufW1A;
+                imm8OfPShufW2 = imm8OfPShufW2A;
+                fragmentLength = fragmentLengthA;
+                inc = 0;
+            }
 
-                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
+                maxShift= 3-(d+inc);
 
-                funnyCode[fragmentPos + imm8OfPShufW1A]=
-                funnyCode[fragmentPos + imm8OfPShufW2A]=
+                memcpy(funnyCode + fragmentPos, fragment, fragmentLength);
+
+                funnyCode[fragmentPos + imm8OfPShufW1]=
+                    (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
+                funnyCode[fragmentPos + imm8OfPShufW2]=
                     a | (b<<2) | (c<<4) | (d<<6);
 
-                if (i+4>=dstW) shift=maxShift; //avoid overread
-                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
+                if (i+3+inc>=dstW) shift=maxShift; //avoid overread
+                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
 
                 if (shift && i>=shift)
                 {
-                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
-                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
+                    funnyCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
+                    funnyCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
                     filterPos[i/2]-=shift;
                 }
 
-                fragmentPos+= fragmentLengthA;
-            }
+                fragmentPos+= fragmentLength;
 
             funnyCode[fragmentPos]= RET;
         }
-- 
1.6.0.4

From 4877bb6851ecb955ed6d985c5046f95f992725f4 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 22:02:06 -0300
Subject: [PATCH] Cosmetics

---
 swscale.c |   60 ++++++++++++++++++++++++------------------------------------
 1 files changed, 24 insertions(+), 36 deletions(-)

diff --git a/swscale.c b/swscale.c
index b11786e..b24907b 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1790,18 +1790,10 @@ error:
 #ifdef COMPILE_MMX2
 static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
 {
-    uint8_t *fragment;
-    x86_reg imm8OfPShufW1;
-    x86_reg imm8OfPShufW2;
-    x86_reg fragmentLength;
-    uint8_t *fragmentA;
-    x86_reg imm8OfPShufW1A;
-    x86_reg imm8OfPShufW2A;
-    x86_reg fragmentLengthA;
-    uint8_t *fragmentB;
-    x86_reg imm8OfPShufW1B;
-    x86_reg imm8OfPShufW2B;
-    x86_reg fragmentLengthB;
+    uint8_t *fragment     , *fragmentA     , *fragmentB;
+    x86_reg imm8OfPShufW1 , imm8OfPShufW1A , imm8OfPShufW1B;
+    x86_reg imm8OfPShufW2 , imm8OfPShufW2A , imm8OfPShufW2B;
+    x86_reg fragmentLength, fragmentLengthA, fragmentLengthB;
     int fragmentPos;
 
     int xpos, i;
@@ -1891,12 +1883,10 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
     xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
     fragmentPos=0;
 
-    for (i=0; i<dstW/numSplits; i++)
-    {
+    for (i=0; i<dstW/numSplits; i++) {
         int xx=xpos>>16;
 
-        if ((i&3) == 0)
-        {
+        if ((i&3) == 0) {
             int a=0;
             int b=((xpos+xInc)>>16) - xx;
             int c=((xpos+xInc*2)>>16) - xx;
@@ -1911,16 +1901,13 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
             filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
             filterPos[i/2]= xx;
 
-            if (d+1<4)
-            {
+            if (d+1<4) {
                 fragment = fragmentB;
                 imm8OfPShufW1 = imm8OfPShufW1B;
                 imm8OfPShufW2 = imm8OfPShufW2B;
                 fragmentLength = fragmentLengthB;
                 inc = 1;
-            }
-            else
-            {
+            } else {
                 fragment = fragmentA;
                 imm8OfPShufW1 = imm8OfPShufW1A;
                 imm8OfPShufW2 = imm8OfPShufW2A;
@@ -1928,26 +1915,27 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
                 inc = 0;
             }
 
-                maxShift= 3-(d+inc);
+            maxShift= 3-(d+inc);
 
-                memcpy(funnyCode + fragmentPos, fragment, fragmentLength);
+            memcpy(funnyCode + fragmentPos, fragment, fragmentLength);
 
-                funnyCode[fragmentPos + imm8OfPShufW1]=
-                    (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
-                funnyCode[fragmentPos + imm8OfPShufW2]=
-                    a | (b<<2) | (c<<4) | (d<<6);
+            funnyCode[fragmentPos + imm8OfPShufW1]=
+                (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
+            funnyCode[fragmentPos + imm8OfPShufW2]=
+                 a      | ( b     <<2) | ( c     <<4) | ( d     <<6);
 
-                if (i+3+inc>=dstW) shift=maxShift; //avoid overread
-                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
+            if (i+3+inc>=dstW)
+                shift=maxShift; //avoid overread
+            else if ((filterPos[i/2]&3) <= maxShift)
+                shift=filterPos[i/2]&3; //Align
 
-                if (shift && i>=shift)
-                {
-                    funnyCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
-                    funnyCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
-                    filterPos[i/2]-=shift;
-                }
+            if (shift && i>=shift) {
+                funnyCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
+                funnyCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
+                filterPos[i/2]-=shift;
+            }
 
-                fragmentPos+= fragmentLength;
+            fragmentPos+= fragmentLength;
 
             funnyCode[fragmentPos]= RET;
         }
-- 
1.6.0.4

From 96c44c351bba02aa0f28f225965224ff2805d8a0 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 22:06:07 -0300
Subject: [PATCH] Fix asm comments for x86 fast_bilinear.

---
 swscale_template.c |   32 ++++++++++++++++----------------
 1 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/swscale_template.c b/swscale_template.c
index 279cab0..af69886 100644
--- a/swscale_template.c
+++ b/swscale_template.c
@@ -2330,32 +2330,32 @@ FUNNY_Y_CODE
         __asm__ volatile(
         "xor %%"REG_a", %%"REG_a"            \n\t" // i
         "xor %%"REG_d", %%"REG_d"            \n\t" // xx
-        "xorl    %%ecx, %%ecx                \n\t" // 2*xalpha
+        "xorl    %%ecx, %%ecx                \n\t" // xalpha
         ASMALIGN(4)
         "1:                                  \n\t"
         "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
         "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
         "subl    %%edi, %%esi                \n\t" //src[xx+1] - src[xx]
-        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*2*xalpha
+        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*xalpha
         "shll      $16, %%edi                \n\t"
-        "addl    %%edi, %%esi                \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+        "addl    %%edi, %%esi                \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
         "mov        %1, %%"REG_D"            \n\t"
         "shrl       $9, %%esi                \n\t"
         "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
-        "addw       %4, %%cx                 \n\t" //2*xalpha += xInc&0xFF
-        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>8 + carry
+        "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
+        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
 
         "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
         "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
         "subl    %%edi, %%esi                \n\t" //src[xx+1] - src[xx]
-        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*2*xalpha
+        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*xalpha
         "shll      $16, %%edi                \n\t"
-        "addl    %%edi, %%esi                \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+        "addl    %%edi, %%esi                \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
         "mov        %1, %%"REG_D"            \n\t"
         "shrl       $9, %%esi                \n\t"
         "movw     %%si, 2(%%"REG_D", %%"REG_a", 2)  \n\t"
-        "addw       %4, %%cx                 \n\t" //2*xalpha += xInc&0xFF
-        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>8 + carry
+        "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
+        "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
 
 
         "add        $2, %%"REG_a"            \n\t"
@@ -2538,16 +2538,16 @@ FUNNY_UV_CODE
             __asm__ volatile(
             "xor %%"REG_a", %%"REG_a"               \n\t" // i
             "xor %%"REG_d", %%"REG_d"               \n\t" // xx
-            "xorl    %%ecx, %%ecx                   \n\t" // 2*xalpha
+            "xorl    %%ecx, %%ecx                   \n\t" // xalpha
             ASMALIGN(4)
             "1:                                     \n\t"
             "mov        %0, %%"REG_S"               \n\t"
             "movzbl  (%%"REG_S", %%"REG_d"), %%edi  \n\t" //src[xx]
             "movzbl 1(%%"REG_S", %%"REG_d"), %%esi  \n\t" //src[xx+1]
             "subl    %%edi, %%esi                   \n\t" //src[xx+1] - src[xx]
-            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*2*xalpha
+            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*xalpha
             "shll      $16, %%edi                   \n\t"
-            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
             "mov        %1, %%"REG_D"               \n\t"
             "shrl       $9, %%esi                   \n\t"
             "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
@@ -2555,15 +2555,15 @@ FUNNY_UV_CODE
             "movzbl    (%5, %%"REG_d"), %%edi       \n\t" //src[xx]
             "movzbl   1(%5, %%"REG_d"), %%esi       \n\t" //src[xx+1]
             "subl    %%edi, %%esi                   \n\t" //src[xx+1] - src[xx]
-            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*2*xalpha
+            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*xalpha
             "shll      $16, %%edi                   \n\t"
-            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
             "mov        %1, %%"REG_D"               \n\t"
             "shrl       $9, %%esi                   \n\t"
             "movw     %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2)   \n\t"
 
-            "addw       %4, %%cx                    \n\t" //2*xalpha += xInc&0xFF
-            "adc        %3, %%"REG_d"               \n\t" //xx+= xInc>>8 + carry
+            "addw       %4, %%cx                    \n\t" //xalpha += xInc&0xFFFF
+            "adc        %3, %%"REG_d"               \n\t" //xx+= xInc>>16 + carry
             "add        $1, %%"REG_a"               \n\t"
             "cmp        %2, %%"REG_a"               \n\t"
             " jb        1b                          \n\t"
-- 
1.6.0.4

From e781b5986f8dd88cf50fb0321730f970d54f045f Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 22:21:20 -0300
Subject: [PATCH] Factorize FAST_BILINEAR_X86.

---
 swscale_template.c |   36 ++++++++++++------------------------
 1 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/swscale_template.c b/swscale_template.c
index af69886..e14daa3 100644
--- a/swscale_template.c
+++ b/swscale_template.c
@@ -2199,6 +2199,14 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
 #endif /* HAVE_MMX */
 }
 
+#define FAST_BILINEAR_X86 \
+    "subl    %%edi, %%esi    \n\t" /*  src[xx+1] - src[xx] */                   \
+    "imull   %%ecx, %%esi    \n\t" /* (src[xx+1] - src[xx])*xalpha */           \
+    "shll      $16, %%edi    \n\t"                                              \
+    "addl    %%edi, %%esi    \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */  \
+    "mov        %1, %%"REG_D"\n\t"                                              \
+    "shrl       $9, %%esi    \n\t"                                              \
+
 static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
                                         int dstWidth, const uint8_t *src, int srcW,
                                         int xInc)
@@ -2335,24 +2343,14 @@ FUNNY_Y_CODE
         "1:                                  \n\t"
         "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
         "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
-        "subl    %%edi, %%esi                \n\t" //src[xx+1] - src[xx]
-        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*xalpha
-        "shll      $16, %%edi                \n\t"
-        "addl    %%edi, %%esi                \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
-        "mov        %1, %%"REG_D"            \n\t"
-        "shrl       $9, %%esi                \n\t"
+        FAST_BILINEAR_X86
         "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
         "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
         "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
 
         "movzbl    (%0, %%"REG_d"), %%edi    \n\t" //src[xx]
         "movzbl   1(%0, %%"REG_d"), %%esi    \n\t" //src[xx+1]
-        "subl    %%edi, %%esi                \n\t" //src[xx+1] - src[xx]
-        "imull   %%ecx, %%esi                \n\t" //(src[xx+1] - src[xx])*xalpha
-        "shll      $16, %%edi                \n\t"
-        "addl    %%edi, %%esi                \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
-        "mov        %1, %%"REG_D"            \n\t"
-        "shrl       $9, %%esi                \n\t"
+        FAST_BILINEAR_X86
         "movw     %%si, 2(%%"REG_D", %%"REG_a", 2)  \n\t"
         "addw       %4, %%cx                 \n\t" //xalpha += xInc&0xFFFF
         "adc        %3, %%"REG_d"            \n\t" //xx+= xInc>>16 + carry
@@ -2544,22 +2542,12 @@ FUNNY_UV_CODE
             "mov        %0, %%"REG_S"               \n\t"
             "movzbl  (%%"REG_S", %%"REG_d"), %%edi  \n\t" //src[xx]
             "movzbl 1(%%"REG_S", %%"REG_d"), %%esi  \n\t" //src[xx+1]
-            "subl    %%edi, %%esi                   \n\t" //src[xx+1] - src[xx]
-            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*xalpha
-            "shll      $16, %%edi                   \n\t"
-            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
-            "mov        %1, %%"REG_D"               \n\t"
-            "shrl       $9, %%esi                   \n\t"
+            FAST_BILINEAR_X86
             "movw     %%si, (%%"REG_D", %%"REG_a", 2)   \n\t"
 
             "movzbl    (%5, %%"REG_d"), %%edi       \n\t" //src[xx]
             "movzbl   1(%5, %%"REG_d"), %%esi       \n\t" //src[xx+1]
-            "subl    %%edi, %%esi                   \n\t" //src[xx+1] - src[xx]
-            "imull   %%ecx, %%esi                   \n\t" //(src[xx+1] - src[xx])*xalpha
-            "shll      $16, %%edi                   \n\t"
-            "addl    %%edi, %%esi                   \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
-            "mov        %1, %%"REG_D"               \n\t"
-            "shrl       $9, %%esi                   \n\t"
+            FAST_BILINEAR_X86
             "movw     %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2)   \n\t"
 
             "addw       %4, %%cx                    \n\t" //xalpha += xInc&0xFFFF
-- 
1.6.0.4

From 963493aae295377d3d44ad5e01f37f73a535a5c9 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 22:43:30 -0300
Subject: [PATCH] Determine "funnyCode" size at runtime.

---
 swscale.c          |   49 ++++++++++++++++++++++++++++++-------------------
 swscale_internal.h |    2 ++
 2 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/swscale.c b/swscale.c
index b24907b..797b086 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1788,12 +1788,14 @@ error:
 }
 
 #ifdef COMPILE_MMX2
-static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
+static void initMMX2HScaler(int dstW, int xInc, uint8_t **funnyCodePtr, int *funnyCodeSizePtr, int16_t *filter, int32_t *filterPos, int numSplits)
 {
     uint8_t *fragment     , *fragmentA     , *fragmentB;
     x86_reg imm8OfPShufW1 , imm8OfPShufW1A , imm8OfPShufW1B;
     x86_reg imm8OfPShufW2 , imm8OfPShufW2A , imm8OfPShufW2B;
     x86_reg fragmentLength, fragmentLengthA, fragmentLengthB;
+    int funnyCodeSize = 1;
+    uint8_t *funnyCode;
     int fragmentPos;
 
     int xpos, i;
@@ -1883,6 +1885,27 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
     xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
     fragmentPos=0;
 
+    /* Determine code size. */
+    for (i=0; i<dstW/numSplits; i+=4) {
+        if (((xpos+xInc*3)>>16) - (xpos>>16) < 3)
+            funnyCodeSize += fragmentLengthB;
+        else
+            funnyCodeSize += fragmentLengthA;
+        xpos+=xInc*4;
+    }
+    *funnyCodeSizePtr = funnyCodeSize;
+
+#ifdef MAP_ANONYMOUS
+    *funnyCodePtr = mmap(NULL, funnyCodeSize, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+#elif HAVE_VIRTUALALLOC
+    *funnyCodePtr = VirtualAlloc(NULL, funnyCodeSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+#else
+    *funnyCodePtr = av_malloc(funnyCodeSize);
+#endif
+    funnyCode = *funnyCodePtr;
+
+    xpos= 0;
+
     for (i=0; i<dstW/numSplits; i++) {
         int xx=xpos>>16;
 
@@ -2848,29 +2871,17 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
                    (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
                    srcFilter->chrH, dstFilter->chrH, c->param);
 
-#define MAX_FUNNY_CODE_SIZE 10000
 #if defined(COMPILE_MMX2)
 // can't downscale !!!
         if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
         {
-#ifdef MAP_ANONYMOUS
-            c->funnyYCode  = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-            c->funnyUVCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-#elif HAVE_VIRTUALALLOC
-            c->funnyYCode  = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
-            c->funnyUVCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
-#else
-            c->funnyYCode  = av_malloc(MAX_FUNNY_CODE_SIZE);
-            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
-#endif
-
             c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
             c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
             c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
             c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
 
-            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
-            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
+            initMMX2HScaler(      dstW, c->lumXInc, &c->funnyYCode , &c->funnyYCodeSize , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
+            initMMX2HScaler(c->chrDstW, c->chrXInc, &c->funnyUVCode, &c->funnyUVCodeSize, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
         }
 #endif /* defined(COMPILE_MMX2) */
     } // initialize horizontal stuff
@@ -3539,11 +3550,11 @@ void sws_freeContext(SwsContext *c){
 
 #if ARCH_X86 && CONFIG_GPL
 #ifdef MAP_ANONYMOUS
-    if (c->funnyYCode ) munmap(c->funnyYCode , MAX_FUNNY_CODE_SIZE);
-    if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
+    if (c->funnyYCode ) munmap(c->funnyYCode , c->funnyYCodeSize);
+    if (c->funnyUVCode) munmap(c->funnyUVCode, c->funnyUVCodeSize);
 #elif HAVE_VIRTUALALLOC
-    if (c->funnyYCode ) VirtualFree(c->funnyYCode , MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
-    if (c->funnyUVCode) VirtualFree(c->funnyUVCode, MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
+    if (c->funnyYCode ) VirtualFree(c->funnyYCode , c->funnyYCodeSize , MEM_RELEASE);
+    if (c->funnyUVCode) VirtualFree(c->funnyUVCode, c->funnyUVCodeSize, MEM_RELEASE);
 #else
     av_free(c->funnyYCode );
     av_free(c->funnyUVCode);
diff --git a/swscale_internal.h b/swscale_internal.h
index 50cf304..bdedf49 100644
--- a/swscale_internal.h
+++ b/swscale_internal.h
@@ -113,6 +113,8 @@ typedef struct SwsContext{
 
     uint8_t *funnyYCode;
     uint8_t *funnyUVCode;
+    int funnyYCodeSize;
+    int funnyUVCodeSize;
     int32_t *lumMmx2FilterPos;
     int32_t *chrMmx2FilterPos;
     int16_t *lumMmx2Filter;
-- 
1.6.0.4

_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

[FFmpeg-soc] [PATCH] Some fast_bilinear patches.

Reply via email to