On Mon, 20 Jun 2011, Måns Rullgård wrote:
"Ronald S. Bultje" <[email protected]> writes:
On Mon, Jun 20, 2011 at 5:55 AM, Mans Rullgard <[email protected]> wrote:

Signed-off-by: Mans Rullgard <[email protected]>
---
 libavcodec/x86/h264_i386.h |   65 +++++++++++++++++++++++---------------------
 1 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index b303347..d38b18e 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -45,25 +45,26 @@ static int decode_significance_x86(CABACContext *c, int 
max_coeff,
    int bit;
    x86_reg coeff_count;
    int low;
+    int range;
    __asm__ volatile(
-        "movl %a10(%5), %%esi                   \n\t"
-        "movl %a11(%5), %3                      \n\t"
+        "movl %a11(%6), %5                      \n\t"
+        "movl %a12(%6), %3                      \n\t"
[..]
+        "movl %5, %a11(%6)                      \n\t"
+        "movl %3, %a12(%6)                      \n\t"
+        :"=&r"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit),
+         "=&r"(range)

Patch OK.

Having said that, the above code very much smells like stuff that gcc
might just get right, so why not change it to a r/w register instead
of w-only, and do the assignments outside the inline assembly? The
code may actually become readable then.

It's worth a try.

gcc 4.3 through 4.5 get it... almost right. No extra instructions, but they do use larger than necessary opcodes.

before:
lea rbp, [rbx+0x3cbc0]
...
mov edx, [rbp+0x4]
mov eax, [rbp]

after:
lea rbp, [rbx+0x3cbc0]
...
mov edx, [rbp+0x4]
mov eax, [rbx+0x3cbc0]

--Loren Merritt
From 66caceca6ac2ada82c91bb0e796e557fdc06f1b0 Mon Sep 17 00:00:00 2001
From: Loren Merritt <[email protected]>
Date: Sat, 25 Jun 2011 11:49:44 +0000
Subject: [PATCH] x86: h264: remove hardcoded load/stores in 
decode_significance[_8x8]_x86

---
 libavcodec/x86/h264_i386.h |   32 +++++++-------------------------
 1 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 9c86210..58bafc9 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -45,23 +45,18 @@ static int decode_significance_x86(CABACContext *c, int 
max_coeff,
     int minusindex= 4-(intptr_t)index;
     int bit;
     x86_reg coeff_count;
-    int low;
-    int range;
     __asm__ volatile(
-        "movl %a11(%6), %5                      \n\t"
-        "movl %a12(%6), %3                      \n\t"
-
         "2:                                     \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
-                             "%w3", "%5", "%k0", "%b0", "%a13")
+                             "%w3", "%5", "%k0", "%b0", "%a11")
 
         "test $1, %4                            \n\t"
         " jz 3f                                 \n\t"
         "add  %10, %1                           \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
-                             "%w3", "%5", "%k0", "%b0", "%a13")
+                             "%w3", "%5", "%k0", "%b0", "%a11")
 
         "sub  %10, %1                           \n\t"
         "mov  %2, %0                            \n\t"
@@ -86,13 +81,9 @@ static int decode_significance_x86(CABACContext *c, int 
max_coeff,
         "4:                                     \n\t"
         "add  %9, %k0                           \n\t"
         "shr $2, %k0                            \n\t"
-
-        "movl %5, %a11(%6)                      \n\t"
-        "movl %3, %a12(%6)                      \n\t"
         :"=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
-         "=&r"(low), "=&r"(bit), "=&r"(range)
+         "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
         :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
-         "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
          "i"(offsetof(CABACContext, bytestream))
         : "%"REG_c, "memory"
     );
@@ -105,14 +96,9 @@ static int decode_significance_8x8_x86(CABACContext *c,
     int minusindex= 4-(intptr_t)index;
     int bit;
     x86_reg coeff_count;
-    int low;
-    int range;
     x86_reg last=0;
     x86_reg state;
     __asm__ volatile(
-        "movl %a12(%7), %5                      \n\t"
-        "movl %a13(%7), %3                      \n\t"
-
         "mov %1, %6                             \n\t"
         "2:                                     \n\t"
 
@@ -121,7 +107,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "add %9, %6                             \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
-                             "%w3", "%5", "%k0", "%b0", "%a14")
+                             "%w3", "%5", "%k0", "%b0", "%a12")
 
         "mov %1, %k6                            \n\t"
         "test $1, %4                            \n\t"
@@ -132,7 +118,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "add %11, %6                            \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
-                             "%w3", "%5", "%k0", "%b0", "%a14")
+                             "%w3", "%5", "%k0", "%b0", "%a12")
 
         "mov %2, %0                             \n\t"
         "mov %1, %k6                            \n\t"
@@ -154,13 +140,9 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "4:                                     \n\t"
         "addl %8, %k0                           \n\t"
         "shr $2, %k0                            \n\t"
-
-        "movl %5, %a12(%7)                      \n\t"
-        "movl %3, %a13(%7)                      \n\t"
-        :"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit),
-         "=&r"(range), "=&r"(state)
+        :"=&q"(coeff_count),"+m"(last), "+m"(index), "+&r"(c->low), "=&r"(bit),
+         "+&r"(c->range), "=&r"(state)
         :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), 
"m"(sig_off), "m"(last_off),
-         "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
          "i"(offsetof(CABACContext, bytestream))
         : "%"REG_c, "memory"
     );
-- 
1.7.4.1

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to