On Mon, 20 Jun 2011, Måns Rullgård wrote:
"Ronald S. Bultje" <[email protected]> writes:
On Mon, Jun 20, 2011 at 5:55 AM, Mans Rullgard <[email protected]> wrote:
Signed-off-by: Mans Rullgard <[email protected]>
---
libavcodec/x86/h264_i386.h | 65 +++++++++++++++++++++++---------------------
1 files changed, 34 insertions(+), 31 deletions(-)
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index b303347..d38b18e 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -45,25 +45,26 @@ static int decode_significance_x86(CABACContext *c, int
max_coeff,
int bit;
x86_reg coeff_count;
int low;
+ int range;
__asm__ volatile(
- "movl %a10(%5), %%esi \n\t"
- "movl %a11(%5), %3 \n\t"
+ "movl %a11(%6), %5 \n\t"
+ "movl %a12(%6), %3 \n\t"
[..]
+ "movl %5, %a11(%6) \n\t"
+ "movl %3, %a12(%6) \n\t"
+ :"=&r"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit),
+ "=&r"(range)
Patch OK.
Having said that, the above code very much smells like stuff that gcc
might just get right, so why not change it to a r/w register instead
of w-only, and do the assignments outside the inline assembly? The
code may actually become readable then.
It's worth a try.
gcc 4.3 through 4.5 get it... almost right. No extra instructions,
but they do use larger than necessary opcodes.
before:
lea rbp, [rbx+0x3cbc0]
...
mov edx, [rbp+0x4]
mov eax, [rbp]
after:
lea rbp, [rbx+0x3cbc0]
...
mov edx, [rbp+0x4]
mov eax, [rbx+0x3cbc0]
--Loren Merritt
From 66caceca6ac2ada82c91bb0e796e557fdc06f1b0 Mon Sep 17 00:00:00 2001
From: Loren Merritt <[email protected]>
Date: Sat, 25 Jun 2011 11:49:44 +0000
Subject: [PATCH] x86: h264: remove hardcoded load/stores in
decode_significance[_8x8]_x86
---
libavcodec/x86/h264_i386.h | 32 +++++++-------------------------
1 files changed, 7 insertions(+), 25 deletions(-)
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 9c86210..58bafc9 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -45,23 +45,18 @@ static int decode_significance_x86(CABACContext *c, int
max_coeff,
int minusindex= 4-(intptr_t)index;
int bit;
x86_reg coeff_count;
- int low;
- int range;
__asm__ volatile(
- "movl %a11(%6), %5 \n\t"
- "movl %a12(%6), %3 \n\t"
-
"2: \n\t"
BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
- "%w3", "%5", "%k0", "%b0", "%a13")
+ "%w3", "%5", "%k0", "%b0", "%a11")
"test $1, %4 \n\t"
" jz 3f \n\t"
"add %10, %1 \n\t"
BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
- "%w3", "%5", "%k0", "%b0", "%a13")
+ "%w3", "%5", "%k0", "%b0", "%a11")
"sub %10, %1 \n\t"
"mov %2, %0 \n\t"
@@ -86,13 +81,9 @@ static int decode_significance_x86(CABACContext *c, int
max_coeff,
"4: \n\t"
"add %9, %k0 \n\t"
"shr $2, %k0 \n\t"
-
- "movl %5, %a11(%6) \n\t"
- "movl %3, %a12(%6) \n\t"
:"=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
- "=&r"(low), "=&r"(bit), "=&r"(range)
+ "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
:"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
- "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
"i"(offsetof(CABACContext, bytestream))
: "%"REG_c, "memory"
);
@@ -105,14 +96,9 @@ static int decode_significance_8x8_x86(CABACContext *c,
int minusindex= 4-(intptr_t)index;
int bit;
x86_reg coeff_count;
- int low;
- int range;
x86_reg last=0;
x86_reg state;
__asm__ volatile(
- "movl %a12(%7), %5 \n\t"
- "movl %a13(%7), %3 \n\t"
-
"mov %1, %6 \n\t"
"2: \n\t"
@@ -121,7 +107,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"add %9, %6 \n\t"
BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
- "%w3", "%5", "%k0", "%b0", "%a14")
+ "%w3", "%5", "%k0", "%b0", "%a12")
"mov %1, %k6 \n\t"
"test $1, %4 \n\t"
@@ -132,7 +118,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"add %11, %6 \n\t"
BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
- "%w3", "%5", "%k0", "%b0", "%a14")
+ "%w3", "%5", "%k0", "%b0", "%a12")
"mov %2, %0 \n\t"
"mov %1, %k6 \n\t"
@@ -154,13 +140,9 @@ static int decode_significance_8x8_x86(CABACContext *c,
"4: \n\t"
"addl %8, %k0 \n\t"
"shr $2, %k0 \n\t"
-
- "movl %5, %a12(%7) \n\t"
- "movl %3, %a13(%7) \n\t"
- :"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit),
- "=&r"(range), "=&r"(state)
+ :"=&q"(coeff_count),"+m"(last), "+m"(index), "+&r"(c->low), "=&r"(bit),
+ "+&r"(c->range), "=&r"(state)
:"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
"m"(sig_off), "m"(last_off),
- "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
"i"(offsetof(CABACContext, bytestream))
: "%"REG_c, "memory"
);
--
1.7.4.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel