Hi,
Attached are some patches for the fast bilinear scaler, while I wait
on Michael's answer on the previous message regarding fast_bilinear.
Hmm, should these patches go to -devel or -soc?
Ramiro Polla
From 05c4bf02481265155b7d25438e9b32ceefcd578e Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 21:56:57 -0300
Subject: [PATCH] Avoid duplication in initMMX2HScaler()
---
swscale.c | 61 +++++++++++++++++++++++++++++--------------------------------
1 files changed, 29 insertions(+), 32 deletions(-)
diff --git a/swscale.c b/swscale.c
index bbeef61..b11786e 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1790,6 +1790,10 @@ error:
#ifdef COMPILE_MMX2
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
{
+ uint8_t *fragment;
+ x86_reg imm8OfPShufW1;
+ x86_reg imm8OfPShufW2;
+ x86_reg fragmentLength;
uint8_t *fragmentA;
x86_reg imm8OfPShufW1A;
x86_reg imm8OfPShufW2A;
@@ -1897,6 +1901,9 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
int b=((xpos+xInc)>>16) - xx;
int c=((xpos+xInc*2)>>16) - xx;
int d=((xpos+xInc*3)>>16) - xx;
+ int maxShift;
+ int shift=0;
+ int inc;
filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9;
@@ -1906,51 +1913,41 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
if (d+1<4)
{
- int maxShift= 3-(d+1);
- int shift=0;
-
- memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
-
- funnyCode[fragmentPos + imm8OfPShufW1B]=
- (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
- funnyCode[fragmentPos + imm8OfPShufW2B]=
- a | (b<<2) | (c<<4) | (d<<6);
-
- if (i+3>=dstW) shift=maxShift; //avoid overread
- else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
-
- if (shift && i>=shift)
- {
- funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
- funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
- filterPos[i/2]-=shift;
- }
-
- fragmentPos+= fragmentLengthB;
+ fragment = fragmentB;
+ imm8OfPShufW1 = imm8OfPShufW1B;
+ imm8OfPShufW2 = imm8OfPShufW2B;
+ fragmentLength = fragmentLengthB;
+ inc = 1;
}
else
{
- int maxShift= 3-d;
- int shift=0;
+ fragment = fragmentA;
+ imm8OfPShufW1 = imm8OfPShufW1A;
+ imm8OfPShufW2 = imm8OfPShufW2A;
+ fragmentLength = fragmentLengthA;
+ inc = 0;
+ }
- memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
+ maxShift= 3-(d+inc);
- funnyCode[fragmentPos + imm8OfPShufW1A]=
- funnyCode[fragmentPos + imm8OfPShufW2A]=
+ memcpy(funnyCode + fragmentPos, fragment, fragmentLength);
+
+ funnyCode[fragmentPos + imm8OfPShufW1]=
+ (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
+ funnyCode[fragmentPos + imm8OfPShufW2]=
a | (b<<2) | (c<<4) | (d<<6);
- if (i+4>=dstW) shift=maxShift; //avoid overread
- else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
+ if (i+3+inc>=dstW) shift=maxShift; //avoid overread
+ else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
if (shift && i>=shift)
{
- funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
- funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
+ funnyCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
+ funnyCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
filterPos[i/2]-=shift;
}
- fragmentPos+= fragmentLengthA;
- }
+ fragmentPos+= fragmentLength;
funnyCode[fragmentPos]= RET;
}
--
1.6.0.4
From 4877bb6851ecb955ed6d985c5046f95f992725f4 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 22:02:06 -0300
Subject: [PATCH] Cosmetics
---
swscale.c | 60 ++++++++++++++++++++++++------------------------------------
1 files changed, 24 insertions(+), 36 deletions(-)
diff --git a/swscale.c b/swscale.c
index b11786e..b24907b 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1790,18 +1790,10 @@ error:
#ifdef COMPILE_MMX2
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
{
- uint8_t *fragment;
- x86_reg imm8OfPShufW1;
- x86_reg imm8OfPShufW2;
- x86_reg fragmentLength;
- uint8_t *fragmentA;
- x86_reg imm8OfPShufW1A;
- x86_reg imm8OfPShufW2A;
- x86_reg fragmentLengthA;
- uint8_t *fragmentB;
- x86_reg imm8OfPShufW1B;
- x86_reg imm8OfPShufW2B;
- x86_reg fragmentLengthB;
+ uint8_t *fragment , *fragmentA , *fragmentB;
+ x86_reg imm8OfPShufW1 , imm8OfPShufW1A , imm8OfPShufW1B;
+ x86_reg imm8OfPShufW2 , imm8OfPShufW2A , imm8OfPShufW2B;
+ x86_reg fragmentLength, fragmentLengthA, fragmentLengthB;
int fragmentPos;
int xpos, i;
@@ -1891,12 +1883,10 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
fragmentPos=0;
- for (i=0; i<dstW/numSplits; i++)
- {
+ for (i=0; i<dstW/numSplits; i++) {
int xx=xpos>>16;
- if ((i&3) == 0)
- {
+ if ((i&3) == 0) {
int a=0;
int b=((xpos+xInc)>>16) - xx;
int c=((xpos+xInc*2)>>16) - xx;
@@ -1911,16 +1901,13 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
filterPos[i/2]= xx;
- if (d+1<4)
- {
+ if (d+1<4) {
fragment = fragmentB;
imm8OfPShufW1 = imm8OfPShufW1B;
imm8OfPShufW2 = imm8OfPShufW2B;
fragmentLength = fragmentLengthB;
inc = 1;
- }
- else
- {
+ } else {
fragment = fragmentA;
imm8OfPShufW1 = imm8OfPShufW1A;
imm8OfPShufW2 = imm8OfPShufW2A;
@@ -1928,26 +1915,27 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
inc = 0;
}
- maxShift= 3-(d+inc);
+ maxShift= 3-(d+inc);
- memcpy(funnyCode + fragmentPos, fragment, fragmentLength);
+ memcpy(funnyCode + fragmentPos, fragment, fragmentLength);
- funnyCode[fragmentPos + imm8OfPShufW1]=
- (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
- funnyCode[fragmentPos + imm8OfPShufW2]=
- a | (b<<2) | (c<<4) | (d<<6);
+ funnyCode[fragmentPos + imm8OfPShufW1]=
+ (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
+ funnyCode[fragmentPos + imm8OfPShufW2]=
+ a | ( b <<2) | ( c <<4) | ( d <<6);
- if (i+3+inc>=dstW) shift=maxShift; //avoid overread
- else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
+ if (i+3+inc>=dstW)
+ shift=maxShift; //avoid overread
+ else if ((filterPos[i/2]&3) <= maxShift)
+ shift=filterPos[i/2]&3; //Align
- if (shift && i>=shift)
- {
- funnyCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
- funnyCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
- filterPos[i/2]-=shift;
- }
+ if (shift && i>=shift) {
+ funnyCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
+ funnyCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
+ filterPos[i/2]-=shift;
+ }
- fragmentPos+= fragmentLength;
+ fragmentPos+= fragmentLength;
funnyCode[fragmentPos]= RET;
}
--
1.6.0.4
From 96c44c351bba02aa0f28f225965224ff2805d8a0 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 22:06:07 -0300
Subject: [PATCH] Fix asm comments for x86 fast_bilinear.
---
swscale_template.c | 32 ++++++++++++++++----------------
1 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/swscale_template.c b/swscale_template.c
index 279cab0..af69886 100644
--- a/swscale_template.c
+++ b/swscale_template.c
@@ -2330,32 +2330,32 @@ FUNNY_Y_CODE
__asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" // i
"xor %%"REG_d", %%"REG_d" \n\t" // xx
- "xorl %%ecx, %%ecx \n\t" // 2*xalpha
+ "xorl %%ecx, %%ecx \n\t" // xalpha
ASMALIGN(4)
"1: \n\t"
"movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
"subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
- "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
+ "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*xalpha
"shll $16, %%edi \n\t"
- "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+ "addl %%edi, %%esi \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
"mov %1, %%"REG_D" \n\t"
"shrl $9, %%esi \n\t"
"movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
- "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
- "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
+ "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
+ "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
"movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
"subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
- "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
+ "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*xalpha
"shll $16, %%edi \n\t"
- "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+ "addl %%edi, %%esi \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
"mov %1, %%"REG_D" \n\t"
"shrl $9, %%esi \n\t"
"movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t"
- "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
- "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
+ "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
+ "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
"add $2, %%"REG_a" \n\t"
@@ -2538,16 +2538,16 @@ FUNNY_UV_CODE
__asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" // i
"xor %%"REG_d", %%"REG_d" \n\t" // xx
- "xorl %%ecx, %%ecx \n\t" // 2*xalpha
+ "xorl %%ecx, %%ecx \n\t" // xalpha
ASMALIGN(4)
"1: \n\t"
"mov %0, %%"REG_S" \n\t"
"movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
"subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
- "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
+ "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*xalpha
"shll $16, %%edi \n\t"
- "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+ "addl %%edi, %%esi \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
"mov %1, %%"REG_D" \n\t"
"shrl $9, %%esi \n\t"
"movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
@@ -2555,15 +2555,15 @@ FUNNY_UV_CODE
"movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
"subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
- "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
+ "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*xalpha
"shll $16, %%edi \n\t"
- "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
+ "addl %%edi, %%esi \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
"mov %1, %%"REG_D" \n\t"
"shrl $9, %%esi \n\t"
"movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t"
- "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
- "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
+ "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
+ "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
"add $1, %%"REG_a" \n\t"
"cmp %2, %%"REG_a" \n\t"
" jb 1b \n\t"
--
1.6.0.4
From e781b5986f8dd88cf50fb0321730f970d54f045f Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 22:21:20 -0300
Subject: [PATCH] Factorize FAST_BILINEAR_X86.
---
swscale_template.c | 36 ++++++++++++------------------------
1 files changed, 12 insertions(+), 24 deletions(-)
diff --git a/swscale_template.c b/swscale_template.c
index af69886..e14daa3 100644
--- a/swscale_template.c
+++ b/swscale_template.c
@@ -2199,6 +2199,14 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
#endif /* HAVE_MMX */
}
+#define FAST_BILINEAR_X86 \
+ "subl %%edi, %%esi \n\t" /* src[xx+1] - src[xx] */ \
+ "imull %%ecx, %%esi \n\t" /* (src[xx+1] - src[xx])*xalpha */ \
+ "shll $16, %%edi \n\t" \
+ "addl %%edi, %%esi \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */ \
+ "mov %1, %%"REG_D"\n\t" \
+ "shrl $9, %%esi \n\t" \
+
static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
int dstWidth, const uint8_t *src, int srcW,
int xInc)
@@ -2335,24 +2343,14 @@ FUNNY_Y_CODE
"1: \n\t"
"movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
- "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
- "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*xalpha
- "shll $16, %%edi \n\t"
- "addl %%edi, %%esi \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
- "mov %1, %%"REG_D" \n\t"
- "shrl $9, %%esi \n\t"
+ FAST_BILINEAR_X86
"movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
"addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
"adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
"movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
- "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
- "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*xalpha
- "shll $16, %%edi \n\t"
- "addl %%edi, %%esi \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
- "mov %1, %%"REG_D" \n\t"
- "shrl $9, %%esi \n\t"
+ FAST_BILINEAR_X86
"movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t"
"addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
"adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
@@ -2544,22 +2542,12 @@ FUNNY_UV_CODE
"mov %0, %%"REG_S" \n\t"
"movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
- "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
- "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*xalpha
- "shll $16, %%edi \n\t"
- "addl %%edi, %%esi \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
- "mov %1, %%"REG_D" \n\t"
- "shrl $9, %%esi \n\t"
+ FAST_BILINEAR_X86
"movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
"movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
- "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
- "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*xalpha
- "shll $16, %%edi \n\t"
- "addl %%edi, %%esi \n\t" //src[xx+1]*xalpha + src[xx]*(1-xalpha)
- "mov %1, %%"REG_D" \n\t"
- "shrl $9, %%esi \n\t"
+ FAST_BILINEAR_X86
"movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t"
"addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
--
1.6.0.4
From 963493aae295377d3d44ad5e01f37f73a535a5c9 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ram...@macbuntu.(none)>
Date: Thu, 23 Jul 2009 22:43:30 -0300
Subject: [PATCH] Determine "funnyCode" size at runtime.
---
swscale.c | 49 ++++++++++++++++++++++++++++++-------------------
swscale_internal.h | 2 ++
2 files changed, 32 insertions(+), 19 deletions(-)
diff --git a/swscale.c b/swscale.c
index b24907b..797b086 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1788,12 +1788,14 @@ error:
}
#ifdef COMPILE_MMX2
-static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
+static void initMMX2HScaler(int dstW, int xInc, uint8_t **funnyCodePtr, int *funnyCodeSizePtr, int16_t *filter, int32_t *filterPos, int numSplits)
{
uint8_t *fragment , *fragmentA , *fragmentB;
x86_reg imm8OfPShufW1 , imm8OfPShufW1A , imm8OfPShufW1B;
x86_reg imm8OfPShufW2 , imm8OfPShufW2A , imm8OfPShufW2B;
x86_reg fragmentLength, fragmentLengthA, fragmentLengthB;
+ int funnyCodeSize = 1;
+ uint8_t *funnyCode;
int fragmentPos;
int xpos, i;
@@ -1883,6 +1885,27 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
fragmentPos=0;
+ /* Determine code size. */
+ for (i=0; i<dstW/numSplits; i+=4) {
+ if (((xpos+xInc*3)>>16) - (xpos>>16) < 3)
+ funnyCodeSize += fragmentLengthB;
+ else
+ funnyCodeSize += fragmentLengthA;
+ xpos+=xInc*4;
+ }
+ *funnyCodeSizePtr = funnyCodeSize;
+
+#ifdef MAP_ANONYMOUS
+ *funnyCodePtr = mmap(NULL, funnyCodeSize, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+#elif HAVE_VIRTUALALLOC
+ *funnyCodePtr = VirtualAlloc(NULL, funnyCodeSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+#else
+ *funnyCodePtr = av_malloc(funnyCodeSize);
+#endif
+ funnyCode = *funnyCodePtr;
+
+ xpos= 0;
+
for (i=0; i<dstW/numSplits; i++) {
int xx=xpos>>16;
@@ -2848,29 +2871,17 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
srcFilter->chrH, dstFilter->chrH, c->param);
-#define MAX_FUNNY_CODE_SIZE 10000
#if defined(COMPILE_MMX2)
// can't downscale !!!
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
{
-#ifdef MAP_ANONYMOUS
- c->funnyYCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
- c->funnyUVCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-#elif HAVE_VIRTUALALLOC
- c->funnyYCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
- c->funnyUVCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
-#else
- c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
- c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
-#endif
-
c->lumMmx2Filter = av_malloc((dstW /8+8)*sizeof(int16_t));
c->chrMmx2Filter = av_malloc((c->chrDstW /4+8)*sizeof(int16_t));
c->lumMmx2FilterPos= av_malloc((dstW /2/8+8)*sizeof(int32_t));
c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
- initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
- initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
+ initMMX2HScaler( dstW, c->lumXInc, &c->funnyYCode , &c->funnyYCodeSize , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
+ initMMX2HScaler(c->chrDstW, c->chrXInc, &c->funnyUVCode, &c->funnyUVCodeSize, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
}
#endif /* defined(COMPILE_MMX2) */
} // initialize horizontal stuff
@@ -3539,11 +3550,11 @@ void sws_freeContext(SwsContext *c){
#if ARCH_X86 && CONFIG_GPL
#ifdef MAP_ANONYMOUS
- if (c->funnyYCode ) munmap(c->funnyYCode , MAX_FUNNY_CODE_SIZE);
- if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
+ if (c->funnyYCode ) munmap(c->funnyYCode , c->funnyYCodeSize);
+ if (c->funnyUVCode) munmap(c->funnyUVCode, c->funnyUVCodeSize);
#elif HAVE_VIRTUALALLOC
- if (c->funnyYCode ) VirtualFree(c->funnyYCode , MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
- if (c->funnyUVCode) VirtualFree(c->funnyUVCode, MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
+ if (c->funnyYCode ) VirtualFree(c->funnyYCode , c->funnyYCodeSize , MEM_RELEASE);
+ if (c->funnyUVCode) VirtualFree(c->funnyUVCode, c->funnyUVCodeSize, MEM_RELEASE);
#else
av_free(c->funnyYCode );
av_free(c->funnyUVCode);
diff --git a/swscale_internal.h b/swscale_internal.h
index 50cf304..bdedf49 100644
--- a/swscale_internal.h
+++ b/swscale_internal.h
@@ -113,6 +113,8 @@ typedef struct SwsContext{
uint8_t *funnyYCode;
uint8_t *funnyUVCode;
+ int funnyYCodeSize;
+ int funnyUVCodeSize;
int32_t *lumMmx2FilterPos;
int32_t *chrMmx2FilterPos;
int16_t *lumMmx2Filter;
--
1.6.0.4
_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc