On Thu, Jan 20, 2005 at 08:32:31PM +0100, Rafael J. Wysocki wrote: > Hi, > > The following patch speeds up the restoring of swsusp images on x86-64 > and makes the assembly code more readable (tested and works on AMD64). It's > against 2.6.11-rc1-mm1, but applies to 2.6.11-rc1-mm2. Please consifer for > applying. > > Signed-off-by: Rafael J. Wysocki <[EMAIL PROTECTED]>
Thanks. I applied it with some small changes to not hardcode any C fields. BTW Pavel, while reading the code I noticed some dubious things in the code: - The TLB flush doesn't flush global pages (turn of PGE and turn it on again). Since that handles kernel pages which are marked global this is surely wrong. - Also is it really needed to flush the TLB after each page and wouldn't INVLPG be better here? Or do you want to flush other pages than the just copied one there too? INVLPG would also take care of the global pages at least on x86-64 (iirc there are some bugs in this regard on some older i386 cpus) - There is a comment that says the code shouldn't use stack, but it definitely uses the stack for some things. Either the comment or the code is wrong. Which is? -Andi The following patch speeds up the restoring of swsusp images on x86-64 and makes the assembly code more readable (tested and works on AMD64). It's against 2.6.11-rc1-mm1, but applies to 2.6.11-rc1-mm2. Please consifer for applying. Signed-off-by: Rafael J. Wysocki <[EMAIL PROTECTED]> Changed by AK to not hardcode any C values and get them from offset.h instead. Signed-off-by: Andi Kleen <[EMAIL PROTECTED]> Index: linux/arch/x86_64/kernel/suspend_asm.S =================================================================== --- linux.orig/arch/x86_64/kernel/suspend_asm.S 2004-10-19 01:55:08.%N +0200 +++ linux/arch/x86_64/kernel/suspend_asm.S 2005-01-22 03:20:28.%N +0100 @@ -11,6 +12,7 @@ #include <linux/linkage.h> #include <asm/segment.h> #include <asm/page.h> +#include <asm/offset.h> ENTRY(swsusp_arch_suspend) @@ -49,43 +51,31 @@ movq %rcx, %cr3; movq %rax, %cr4; # turn PGE back on + movq pagedir_nosave(%rip), %rdx + /* compute the limit */ movl nr_copy_pages(%rip), %eax - xorl %ecx, %ecx - movq $0, %r10 testl %eax, %eax jz done -.L105: - xorl %esi, %esi - movq $0, %r11 - jmp .L104 - .p2align 4,,7 -copy_one_page: - movq %r10, %rcx -.L104: - movq pagedir_nosave(%rip), %rdx - movq %rcx, %rax - salq $5, %rax - movq 8(%rdx,%rax), %rcx - movq (%rdx,%rax), %rax - movzbl (%rsi,%rax), %eax - movb %al, (%rsi,%rcx) - - movq %cr3, %rax; # flush TLB - movq %rax, %cr3; - - movq %r11, %rax - incq %rax - cmpq $4095, %rax - movq %rax, %rsi - movq %rax, %r11 - jbe copy_one_page - movq %r10, %rax - incq %rax - movq %rax, %rcx - movq %rax, %r10 - mov nr_copy_pages(%rip), %eax - cmpq %rax, %rcx - jb .L105 + movq %rdx,%r8 + movl $SIZEOF_PBE,%r9d + mul %r9 # with rax, clobbers rdx + movq %r8, %rdx + addq %r8, %rax +loop: + /* get addresses from the pbe and copy the page */ + movq pbe_address(%rdx), %rsi + movq pbe_orig_address(%rdx), %rdi + movq $512, %rcx + rep + movsq + + movq %cr3, %rcx; # flush TLB + movq %rcx, %cr3; + + /* progress to the next pbe */ + addq $SIZEOF_PBE, %rdx + cmpq %rax, %rdx + jb loop done: movl $24, %eax movl %eax, %ds Index: linux/arch/x86_64/kernel/asm-offsets.c =================================================================== --- linux.orig/arch/x86_64/kernel/asm-offsets.c 2004-10-19 01:55:08.%N +0200 +++ linux/arch/x86_64/kernel/asm-offsets.c 2005-01-22 03:09:50.%N +0100 @@ -8,6 +8,7 @@ #include <linux/stddef.h> #include <linux/errno.h> #include <linux/hardirq.h> +#include <linux/suspend.h> #include <asm/pda.h> #include <asm/processor.h> #include <asm/segment.h> @@ -61,6 +62,8 @@ offsetof (struct rt_sigframe32, uc.uc_mcontext)); BLANK(); #endif - + DEFINE(SIZEOF_PBE, sizeof(struct pbe)); + DEFINE(pbe_address, offsetof(struct pbe, address)); + DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); return 0; } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/