On Wed, 04 Oct 2000, Brian Gerst wrote:

>Mikulas Patocka wrote:
>> 
>> Hi.
>> 
>> arch/i386/kernel/entry.S
>>         xchgl %eax, ORIG_EAX(%esp)      # orig_eax (get the error code. )
>>         movl %esp,%edx
>>         xchgl %ecx, ES(%esp)            # get the address and save es.
>>         pushl %eax                      # push the error code
>> 
>> xchg with memory operand has implicit lock prefix and is slooooooow. It is
>
>Here is a patch that removes the xchg instructions.  As an interesting
>note, I tested the difference in cycles between one xchgl and two movl
>instructions on 3 different processors (a Celeron, a K6-2, and an
>Athlon, all UP).  The celeron and K6-2 showed noticable improvements. 
>The real surprise was the the Athlon, where the times were identical. 
>This means that either the Athlon has a really fast lock cycle or it is
>ignoring it.

I also did some testing on the performance of xchgl reg1,(reg2) vs the
the equivalent (but non-locked) sequence using 3 moves and a scratch reg:

     Pentium II/III     xchgl takes 19 cycles
                        3 moves takes just under 3 cycles
                        -> xchgl is more than 6 times slower
     K6-III             xchgl takes 77 (!!) cycles
                        3 moves takes just under 2 cycles
                        -> xchgl is more than 38 times slower

We could also do the following:

1. Move the error_code block from divide_error to page_fault;
   this removes one jump from the page_fault path.
2. Apply Brian's patch to replace xchgl with ordinary moves;
   this is an improvement for all faults.
3. Move the page_fault/error_code block to immediately precede
   ret_from_exception; this removes one jump from the return path
   and is an improvement for all faults.

The patch below against 2.4.0-test9 implements this. I'm been running
with this in place for a while now, with no observable negative effects.
I also disassembled arch/i386/kernel/entry.o before and after the patch
and verified that it didn't cause any short-offset jumps in frequently
executed paths to become long-offset jumps.

/Mikael

--- linux-2.4.0-test9/arch/i386/kernel/entry.S.~1~      Wed Oct  4 16:03:19 2000
+++ linux-2.4.0-test9/arch/i386/kernel/entry.S  Mon Oct  9 16:30:16 2000
@@ -256,7 +256,35 @@
        movl $-ENOSYS,EAX(%esp)
        jmp ret_from_sys_call
 
-       ALIGN
+ENTRY(page_fault)
+       pushl $ SYMBOL_NAME(do_page_fault)
+       ALIGN   /* for the benefit of the other exception paths */
+error_code:
+       pushl %ds
+       pushl %eax
+       xorl %eax,%eax
+       pushl %ebp
+       pushl %edi
+       pushl %esi
+       pushl %edx
+       decl %eax                       # eax = -1
+       pushl %ecx
+       pushl %ebx
+       cld
+       movl %es,%ecx
+       movl ORIG_EAX(%esp), %esi       # get the error code
+       movl ES(%esp), %edi             # get the function address
+       movl %eax, ORIG_EAX(%esp)
+       movl %ecx, ES(%esp)
+       movl %esp,%edx
+       pushl %esi                      # push the error code
+       pushl %edx                      # push the pt_regs pointer
+       movl $(__KERNEL_DS),%edx
+       movl %edx,%ds
+       movl %edx,%es
+       GET_CURRENT(%ebx)
+       call *%edi
+       addl $8,%esp
 ret_from_exception:
 #ifdef CONFIG_SMP
        GET_CURRENT(%ebx)
@@ -291,32 +319,7 @@
 ENTRY(divide_error)
        pushl $0                # no error code
        pushl $ SYMBOL_NAME(do_divide_error)
-       ALIGN
-error_code:
-       pushl %ds
-       pushl %eax
-       xorl %eax,%eax
-       pushl %ebp
-       pushl %edi
-       pushl %esi
-       pushl %edx
-       decl %eax                       # eax = -1
-       pushl %ecx
-       pushl %ebx
-       cld
-       movl %es,%ecx
-       xchgl %eax, ORIG_EAX(%esp)      # orig_eax (get the error code. )
-       movl %esp,%edx
-       xchgl %ecx, ES(%esp)            # get the address and save es.
-       pushl %eax                      # push the error code
-       pushl %edx
-       movl $(__KERNEL_DS),%edx
-       movl %edx,%ds
-       movl %edx,%es
-       GET_CURRENT(%ebx)
-       call *%ecx
-       addl $8,%esp
-       jmp ret_from_exception
+       jmp error_code
 
 ENTRY(coprocessor_error)
        pushl $0
@@ -408,10 +411,6 @@
 
 ENTRY(alignment_check)
        pushl $ SYMBOL_NAME(do_alignment_check)
-       jmp error_code
-
-ENTRY(page_fault)
-       pushl $ SYMBOL_NAME(do_page_fault)
        jmp error_code
 
 ENTRY(machine_check)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/

Reply via email to